xref: /titanic_52/usr/src/uts/sun4v/io/vsw.c (revision b54157c1b1bf9673e4da8b526477d59202cd08a6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/errno.h>
31 #include <sys/debug.h>
32 #include <sys/time.h>
33 #include <sys/sysmacros.h>
34 #include <sys/systm.h>
35 #include <sys/user.h>
36 #include <sys/stropts.h>
37 #include <sys/stream.h>
38 #include <sys/strlog.h>
39 #include <sys/strsubr.h>
40 #include <sys/cmn_err.h>
41 #include <sys/cpu.h>
42 #include <sys/kmem.h>
43 #include <sys/conf.h>
44 #include <sys/ddi.h>
45 #include <sys/sunddi.h>
46 #include <sys/ksynch.h>
47 #include <sys/stat.h>
48 #include <sys/kstat.h>
49 #include <sys/vtrace.h>
50 #include <sys/strsun.h>
51 #include <sys/dlpi.h>
52 #include <sys/ethernet.h>
53 #include <net/if.h>
54 #include <sys/varargs.h>
55 #include <sys/machsystm.h>
56 #include <sys/modctl.h>
57 #include <sys/modhash.h>
58 #include <sys/mac.h>
59 #include <sys/mac_ether.h>
60 #include <sys/taskq.h>
61 #include <sys/note.h>
62 #include <sys/mach_descrip.h>
63 #include <sys/mac.h>
64 #include <sys/mdeg.h>
65 #include <sys/ldc.h>
66 #include <sys/vsw_fdb.h>
67 #include <sys/vsw.h>
68 #include <sys/vio_mailbox.h>
69 #include <sys/vnet_mailbox.h>
70 #include <sys/vnet_common.h>
71 #include <sys/vio_util.h>
72 #include <sys/sdt.h>
73 #include <sys/atomic.h>
74 #include <sys/callb.h>
75 #include <sys/vlan.h>
76 
77 /*
78  * Function prototypes.
79  */
80 static	int vsw_attach(dev_info_t *, ddi_attach_cmd_t);
81 static	int vsw_detach(dev_info_t *, ddi_detach_cmd_t);
82 static	int vsw_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
83 static	int vsw_get_md_physname(vsw_t *, md_t *, mde_cookie_t, char *);
84 static	int vsw_get_md_smodes(vsw_t *, md_t *, mde_cookie_t, uint8_t *, int *);
85 
86 /* MDEG routines */
87 static	int vsw_mdeg_register(vsw_t *vswp);
88 static	void vsw_mdeg_unregister(vsw_t *vswp);
89 static	int vsw_mdeg_cb(void *cb_argp, mdeg_result_t *);
90 static	int vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *);
91 static	int vsw_get_initial_md_properties(vsw_t *vswp, md_t *, mde_cookie_t);
92 static	int vsw_read_mdprops(vsw_t *vswp);
93 static	void vsw_vlan_read_ids(void *arg, int type, md_t *mdp,
94 	mde_cookie_t node, uint16_t *pvidp, uint16_t **vidspp,
95 	uint16_t *nvidsp, uint16_t *default_idp);
96 static	int vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp,
97 	md_t *mdp, mde_cookie_t *node);
98 static	void vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp,
99 	mde_cookie_t node);
100 static	void vsw_update_md_prop(vsw_t *, md_t *, mde_cookie_t);
101 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr);
102 
103 /* Mac driver related routines */
104 static int vsw_mac_register(vsw_t *);
105 static int vsw_mac_unregister(vsw_t *);
106 static int vsw_m_stat(void *, uint_t, uint64_t *);
107 static void vsw_m_stop(void *arg);
108 static int vsw_m_start(void *arg);
109 static int vsw_m_unicst(void *arg, const uint8_t *);
110 static int vsw_m_multicst(void *arg, boolean_t, const uint8_t *);
111 static int vsw_m_promisc(void *arg, boolean_t);
112 static mblk_t *vsw_m_tx(void *arg, mblk_t *);
113 void vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh,
114     mblk_t *mp, vsw_macrx_flags_t flags);
115 
116 /*
117  * Functions imported from other files.
118  */
119 extern void vsw_setup_switching_timeout(void *arg);
120 extern void vsw_stop_switching_timeout(vsw_t *vswp);
121 extern int vsw_setup_switching(vsw_t *);
122 extern int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *);
123 extern int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *);
124 extern void vsw_del_mcst_vsw(vsw_t *);
125 extern mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr);
126 extern int vsw_detach_ports(vsw_t *vswp);
127 extern int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node);
128 extern int vsw_port_detach(vsw_t *vswp, int p_instance);
129 static int vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex,
130 	md_t *prev_mdp, mde_cookie_t prev_mdex);
131 extern	int vsw_port_attach(vsw_port_t *port);
132 extern vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance);
133 extern int vsw_mac_attach(vsw_t *vswp);
134 extern void vsw_mac_detach(vsw_t *vswp);
135 extern int vsw_mac_open(vsw_t *vswp);
136 extern void vsw_mac_close(vsw_t *vswp);
137 extern int vsw_set_hw(vsw_t *, vsw_port_t *, int);
138 extern int vsw_unset_hw(vsw_t *, vsw_port_t *, int);
139 extern void vsw_reconfig_hw(vsw_t *);
140 extern void vsw_unset_addrs(vsw_t *vswp);
141 extern void vsw_set_addrs(vsw_t *vswp);
142 extern void vsw_create_vlans(void *arg, int type);
143 extern void vsw_destroy_vlans(void *arg, int type);
144 extern void vsw_vlan_add_ids(void *arg, int type);
145 extern void vsw_vlan_remove_ids(void *arg, int type);
146 extern void vsw_vlan_unaware_port_reset(vsw_port_t *portp);
147 extern uint32_t vsw_vlan_frame_untag(void *arg, int type, mblk_t **np,
148 	mblk_t **npt);
149 extern mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp);
150 extern void vsw_hio_cleanup(vsw_t *vswp);
151 extern void vsw_hio_start_ports(vsw_t *vswp);
152 void vsw_hio_port_update(vsw_port_t *portp, boolean_t hio_enabled);
153 
154 /*
155  * Internal tunables.
156  */
157 int	vsw_num_handshakes = VNET_NUM_HANDSHAKES; /* # of handshake attempts */
158 int	vsw_wretries = 100;		/* # of write attempts */
159 int	vsw_desc_delay = 0;		/* delay in us */
160 int	vsw_read_attempts = 5;		/* # of reads of descriptor */
161 int	vsw_mac_open_retries = 20;	/* max # of mac_open() retries */
162 int	vsw_setup_switching_delay = 3;	/* setup sw timeout interval in sec */
163 int	vsw_ldc_tx_delay = 5;		/* delay(ticks) for tx retries */
164 int	vsw_ldc_tx_retries = 10;	/* # of ldc tx retries */
165 boolean_t vsw_ldc_rxthr_enabled = B_TRUE;	/* LDC Rx thread enabled */
166 boolean_t vsw_ldc_txthr_enabled = B_TRUE;	/* LDC Tx thread enabled */
167 
168 uint32_t	vsw_fdb_nchains = 8;	/* # of chains in fdb hash table */
169 uint32_t	vsw_vlan_nchains = 4;	/* # of chains in vlan id hash table */
170 uint32_t	vsw_ethermtu = 1500;	/* mtu of the device */
171 
172 /* delay in usec to wait for all references on a fdb entry to be dropped */
173 uint32_t vsw_fdbe_refcnt_delay = 10;
174 
175 /*
176  * Default vlan id. This is only used internally when the "default-vlan-id"
177  * property is not present in the MD device node. Therefore, this should not be
178  * used as a tunable; if this value is changed, the corresponding variable
179  * should be updated to the same value in all vnets connected to this vsw.
180  */
181 uint16_t	vsw_default_vlan_id = 1;
182 
183 /*
184  * Workaround for a version handshake bug in obp's vnet.
185  * If vsw initiates version negotiation starting from the highest version,
186  * obp sends a nack and terminates version handshake. To workaround
187  * this, we do not initiate version handshake when the channel comes up.
188  * Instead, we wait for the peer to send its version info msg and go through
189  * the version protocol exchange. If we successfully negotiate a version,
190  * before sending the ack, we send our version info msg to the peer
191  * using the <major,minor> version that we are about to ack.
192  */
193 boolean_t vsw_obp_ver_proto_workaround = B_TRUE;
194 
195 /*
196  * In the absence of "priority-ether-types" property in MD, the following
197  * internal tunable can be set to specify a single priority ethertype.
198  */
199 uint64_t vsw_pri_eth_type = 0;
200 
201 /*
202  * Number of transmit priority buffers that are preallocated per device.
203  * This number is chosen to be a small value to throttle transmission
204  * of priority packets. Note: Must be a power of 2 for vio_create_mblks().
205  */
206 uint32_t vsw_pri_tx_nmblks = 64;
207 
208 boolean_t vsw_hio_enabled = B_TRUE;	/* Enable/disable HybridIO */
209 int vsw_hio_max_cleanup_retries = 10;	/* Max retries for HybridIO cleanp */
210 int vsw_hio_cleanup_delay = 10000;	/* 10ms */
211 
212 /*
213  * External tunables.
214  */
215 /*
216  * Enable/disable thread per ring. This is a mode selection
217  * that is done a vsw driver attach time.
218  */
219 boolean_t vsw_multi_ring_enable = B_FALSE;
220 int vsw_mac_rx_rings = VSW_MAC_RX_RINGS;
221 
222 /* Number of transmit descriptors -  must be power of 2 */
223 uint32_t vsw_ntxds = VSW_RING_NUM_EL;
224 
225 /*
226  * Max number of mblks received in one receive operation.
227  */
228 uint32_t vsw_chain_len = (VSW_NUM_MBLKS * 0.6);
229 
230 /*
231  * Tunables for three different pools, that is, the size and
232  * number of mblks for each pool.
233  */
234 uint32_t vsw_mblk_size1 = VSW_MBLK_SZ_128;	/* size=128 for pool1 */
235 uint32_t vsw_mblk_size2 = VSW_MBLK_SZ_256;	/* size=256 for pool2 */
236 uint32_t vsw_mblk_size3 = VSW_MBLK_SZ_2048;	/* size=2048 for pool3 */
237 uint32_t vsw_num_mblks1 = VSW_NUM_MBLKS;	/* number of mblks for pool1 */
238 uint32_t vsw_num_mblks2 = VSW_NUM_MBLKS;	/* number of mblks for pool2 */
239 uint32_t vsw_num_mblks3 = VSW_NUM_MBLKS;	/* number of mblks for pool3 */
240 
241 /*
242  * vsw_max_tx_qcount is the maximum # of packets that can be queued
243  * before the tx worker thread begins processing the queue. Its value
244  * is chosen to be 4x the default length of tx descriptor ring.
245  */
246 uint32_t vsw_max_tx_qcount = 4 * VSW_RING_NUM_EL;
247 
248 /*
249  * MAC callbacks
250  */
251 static	mac_callbacks_t	vsw_m_callbacks = {
252 	0,
253 	vsw_m_stat,
254 	vsw_m_start,
255 	vsw_m_stop,
256 	vsw_m_promisc,
257 	vsw_m_multicst,
258 	vsw_m_unicst,
259 	vsw_m_tx,
260 	NULL,
261 	NULL,
262 	NULL
263 };
264 
265 static	struct	cb_ops	vsw_cb_ops = {
266 	nulldev,			/* cb_open */
267 	nulldev,			/* cb_close */
268 	nodev,				/* cb_strategy */
269 	nodev,				/* cb_print */
270 	nodev,				/* cb_dump */
271 	nodev,				/* cb_read */
272 	nodev,				/* cb_write */
273 	nodev,				/* cb_ioctl */
274 	nodev,				/* cb_devmap */
275 	nodev,				/* cb_mmap */
276 	nodev,				/* cb_segmap */
277 	nochpoll,			/* cb_chpoll */
278 	ddi_prop_op,			/* cb_prop_op */
279 	NULL,				/* cb_stream */
280 	D_MP,				/* cb_flag */
281 	CB_REV,				/* rev */
282 	nodev,				/* int (*cb_aread)() */
283 	nodev				/* int (*cb_awrite)() */
284 };
285 
286 static	struct	dev_ops	vsw_ops = {
287 	DEVO_REV,		/* devo_rev */
288 	0,			/* devo_refcnt */
289 	vsw_getinfo,		/* devo_getinfo */
290 	nulldev,		/* devo_identify */
291 	nulldev,		/* devo_probe */
292 	vsw_attach,		/* devo_attach */
293 	vsw_detach,		/* devo_detach */
294 	nodev,			/* devo_reset */
295 	&vsw_cb_ops,		/* devo_cb_ops */
296 	(struct bus_ops *)NULL,	/* devo_bus_ops */
297 	ddi_power		/* devo_power */
298 };
299 
300 extern	struct	mod_ops	mod_driverops;
301 static struct modldrv vswmodldrv = {
302 	&mod_driverops,
303 	"sun4v Virtual Switch",
304 	&vsw_ops,
305 };
306 
307 #define	LDC_ENTER_LOCK(ldcp)	\
308 				mutex_enter(&((ldcp)->ldc_cblock));\
309 				mutex_enter(&((ldcp)->ldc_rxlock));\
310 				mutex_enter(&((ldcp)->ldc_txlock));
311 #define	LDC_EXIT_LOCK(ldcp)	\
312 				mutex_exit(&((ldcp)->ldc_txlock));\
313 				mutex_exit(&((ldcp)->ldc_rxlock));\
314 				mutex_exit(&((ldcp)->ldc_cblock));
315 
316 /* Driver soft state ptr  */
317 static void	*vsw_state;
318 
319 /*
320  * Linked list of "vsw_t" structures - one per instance.
321  */
322 vsw_t		*vsw_head = NULL;
323 krwlock_t	vsw_rw;
324 
325 /*
326  * Property names
327  */
328 static char vdev_propname[] = "virtual-device";
329 static char vsw_propname[] = "virtual-network-switch";
330 static char physdev_propname[] = "vsw-phys-dev";
331 static char smode_propname[] = "vsw-switch-mode";
332 static char macaddr_propname[] = "local-mac-address";
333 static char remaddr_propname[] = "remote-mac-address";
334 static char ldcids_propname[] = "ldc-ids";
335 static char chan_propname[] = "channel-endpoint";
336 static char id_propname[] = "id";
337 static char reg_propname[] = "reg";
338 static char pri_types_propname[] = "priority-ether-types";
339 static char vsw_pvid_propname[] = "port-vlan-id";
340 static char vsw_vid_propname[] = "vlan-id";
341 static char vsw_dvid_propname[] = "default-vlan-id";
342 static char port_pvid_propname[] = "remote-port-vlan-id";
343 static char port_vid_propname[] = "remote-vlan-id";
344 static char hybrid_propname[] = "hybrid";
345 
346 /*
347  * Matching criteria passed to the MDEG to register interest
348  * in changes to 'virtual-device-port' nodes identified by their
349  * 'id' property.
350  */
351 static md_prop_match_t vport_prop_match[] = {
352 	{ MDET_PROP_VAL,    "id"   },
353 	{ MDET_LIST_END,    NULL    }
354 };
355 
356 static mdeg_node_match_t vport_match = { "virtual-device-port",
357 						vport_prop_match };
358 
359 /*
360  * Matching criteria passed to the MDEG to register interest
361  * in changes to 'virtual-device' nodes (i.e. vsw nodes) identified
362  * by their 'name' and 'cfg-handle' properties.
363  */
364 static md_prop_match_t vdev_prop_match[] = {
365 	{ MDET_PROP_STR,    "name"   },
366 	{ MDET_PROP_VAL,    "cfg-handle" },
367 	{ MDET_LIST_END,    NULL    }
368 };
369 
370 static mdeg_node_match_t vdev_match = { "virtual-device",
371 						vdev_prop_match };
372 
373 
374 /*
375  * Specification of an MD node passed to the MDEG to filter any
376  * 'vport' nodes that do not belong to the specified node. This
377  * template is copied for each vsw instance and filled in with
378  * the appropriate 'cfg-handle' value before being passed to the MDEG.
379  */
380 static mdeg_prop_spec_t vsw_prop_template[] = {
381 	{ MDET_PROP_STR,    "name",		vsw_propname },
382 	{ MDET_PROP_VAL,    "cfg-handle",	NULL	},
383 	{ MDET_LIST_END,    NULL,		NULL	}
384 };
385 
386 #define	VSW_SET_MDEG_PROP_INST(specp, val)	(specp)[1].ps_val = (val);
387 
388 #ifdef	DEBUG
389 /*
390  * Print debug messages - set to 0x1f to enable all msgs
391  * or 0x0 to turn all off.
392  */
393 int vswdbg = 0x0;
394 
395 /*
396  * debug levels:
397  * 0x01:	Function entry/exit tracing
398  * 0x02:	Internal function messages
399  * 0x04:	Verbose internal messages
400  * 0x08:	Warning messages
401  * 0x10:	Error messages
402  */
403 
404 void
405 vswdebug(vsw_t *vswp, const char *fmt, ...)
406 {
407 	char buf[512];
408 	va_list ap;
409 
410 	va_start(ap, fmt);
411 	(void) vsprintf(buf, fmt, ap);
412 	va_end(ap);
413 
414 	if (vswp == NULL)
415 		cmn_err(CE_CONT, "%s\n", buf);
416 	else
417 		cmn_err(CE_CONT, "vsw%d: %s\n", vswp->instance, buf);
418 }
419 
420 #endif	/* DEBUG */
421 
422 static struct modlinkage modlinkage = {
423 	MODREV_1,
424 	&vswmodldrv,
425 	NULL
426 };
427 
428 int
429 _init(void)
430 {
431 	int status;
432 
433 	rw_init(&vsw_rw, NULL, RW_DRIVER, NULL);
434 
435 	status = ddi_soft_state_init(&vsw_state, sizeof (vsw_t), 1);
436 	if (status != 0) {
437 		return (status);
438 	}
439 
440 	mac_init_ops(&vsw_ops, DRV_NAME);
441 	status = mod_install(&modlinkage);
442 	if (status != 0) {
443 		ddi_soft_state_fini(&vsw_state);
444 	}
445 	return (status);
446 }
447 
448 int
449 _fini(void)
450 {
451 	int status;
452 
453 	status = mod_remove(&modlinkage);
454 	if (status != 0)
455 		return (status);
456 	mac_fini_ops(&vsw_ops);
457 	ddi_soft_state_fini(&vsw_state);
458 
459 	rw_destroy(&vsw_rw);
460 
461 	return (status);
462 }
463 
464 int
465 _info(struct modinfo *modinfop)
466 {
467 	return (mod_info(&modlinkage, modinfop));
468 }
469 
470 static int
471 vsw_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
472 {
473 	vsw_t		*vswp;
474 	int		instance;
475 	char		hashname[MAXNAMELEN];
476 	char		qname[TASKQ_NAMELEN];
477 	enum		{ PROG_init = 0x00,
478 				PROG_locks = 0x01,
479 				PROG_readmd = 0x02,
480 				PROG_fdb = 0x04,
481 				PROG_mfdb = 0x08,
482 				PROG_taskq = 0x10,
483 				PROG_swmode = 0x20,
484 				PROG_macreg = 0x40,
485 				PROG_mdreg = 0x80}
486 			progress;
487 
488 	progress = PROG_init;
489 	int		rv;
490 
491 	switch (cmd) {
492 	case DDI_ATTACH:
493 		break;
494 	case DDI_RESUME:
495 		/* nothing to do for this non-device */
496 		return (DDI_SUCCESS);
497 	case DDI_PM_RESUME:
498 	default:
499 		return (DDI_FAILURE);
500 	}
501 
502 	instance = ddi_get_instance(dip);
503 	if (ddi_soft_state_zalloc(vsw_state, instance) != DDI_SUCCESS) {
504 		DERR(NULL, "vsw%d: ddi_soft_state_zalloc failed", instance);
505 		return (DDI_FAILURE);
506 	}
507 	vswp = ddi_get_soft_state(vsw_state, instance);
508 
509 	if (vswp == NULL) {
510 		DERR(NULL, "vsw%d: ddi_get_soft_state failed", instance);
511 		goto vsw_attach_fail;
512 	}
513 
514 	vswp->dip = dip;
515 	vswp->instance = instance;
516 	ddi_set_driver_private(dip, (caddr_t)vswp);
517 
518 	mutex_init(&vswp->hw_lock, NULL, MUTEX_DRIVER, NULL);
519 	mutex_init(&vswp->mac_lock, NULL, MUTEX_DRIVER, NULL);
520 	mutex_init(&vswp->mca_lock, NULL, MUTEX_DRIVER, NULL);
521 	mutex_init(&vswp->swtmout_lock, NULL, MUTEX_DRIVER, NULL);
522 	rw_init(&vswp->if_lockrw, NULL, RW_DRIVER, NULL);
523 	rw_init(&vswp->mfdbrw, NULL, RW_DRIVER, NULL);
524 	rw_init(&vswp->plist.lockrw, NULL, RW_DRIVER, NULL);
525 
526 	progress |= PROG_locks;
527 
528 	rv = vsw_read_mdprops(vswp);
529 	if (rv != 0)
530 		goto vsw_attach_fail;
531 
532 	progress |= PROG_readmd;
533 
534 	/* setup the unicast forwarding database  */
535 	(void) snprintf(hashname, MAXNAMELEN, "vsw_unicst_table-%d",
536 	    vswp->instance);
537 	D2(vswp, "creating unicast hash table (%s)...", hashname);
538 	vswp->fdb_nchains = vsw_fdb_nchains;
539 	vswp->fdb_hashp = mod_hash_create_ptrhash(hashname, vswp->fdb_nchains,
540 	    mod_hash_null_valdtor, sizeof (void *));
541 	vsw_create_vlans((void *)vswp, VSW_LOCALDEV);
542 	progress |= PROG_fdb;
543 
544 	/* setup the multicast fowarding database */
545 	(void) snprintf(hashname, MAXNAMELEN, "vsw_mcst_table-%d",
546 	    vswp->instance);
547 	D2(vswp, "creating multicast hash table %s)...", hashname);
548 	vswp->mfdb = mod_hash_create_ptrhash(hashname, vsw_fdb_nchains,
549 	    mod_hash_null_valdtor, sizeof (void *));
550 
551 	progress |= PROG_mfdb;
552 
553 	/*
554 	 * Create the taskq which will process all the VIO
555 	 * control messages.
556 	 */
557 	(void) snprintf(qname, TASKQ_NAMELEN, "vsw_taskq%d", vswp->instance);
558 	if ((vswp->taskq_p = ddi_taskq_create(vswp->dip, qname, 1,
559 	    TASKQ_DEFAULTPRI, 0)) == NULL) {
560 		cmn_err(CE_WARN, "!vsw%d: Unable to create task queue",
561 		    vswp->instance);
562 		goto vsw_attach_fail;
563 	}
564 
565 	progress |= PROG_taskq;
566 
567 	/* prevent auto-detaching */
568 	if (ddi_prop_update_int(DDI_DEV_T_NONE, vswp->dip,
569 	    DDI_NO_AUTODETACH, 1) != DDI_SUCCESS) {
570 		cmn_err(CE_NOTE, "!Unable to set \"%s\" property for "
571 		    "instance %u", DDI_NO_AUTODETACH, instance);
572 	}
573 
574 	/*
575 	 * Setup the required switching mode,
576 	 * based on the mdprops that we read earlier.
577 	 */
578 	rv = vsw_setup_switching(vswp);
579 	if (rv == EAGAIN) {
580 		/*
581 		 * Unable to setup switching mode;
582 		 * as the error is EAGAIN, schedule a timeout to retry.
583 		 */
584 		mutex_enter(&vswp->swtmout_lock);
585 
586 		vswp->swtmout_enabled = B_TRUE;
587 		vswp->swtmout_id =
588 		    timeout(vsw_setup_switching_timeout, vswp,
589 		    (vsw_setup_switching_delay * drv_usectohz(MICROSEC)));
590 
591 		mutex_exit(&vswp->swtmout_lock);
592 	} else if (rv != 0) {
593 		goto vsw_attach_fail;
594 	}
595 
596 	progress |= PROG_swmode;
597 
598 	/* Register with mac layer as a provider */
599 	rv = vsw_mac_register(vswp);
600 	if (rv != 0)
601 		goto vsw_attach_fail;
602 
603 	progress |= PROG_macreg;
604 
605 	/*
606 	 * Now we have everything setup, register an interest in
607 	 * specific MD nodes.
608 	 *
609 	 * The callback is invoked in 2 cases, firstly if upon mdeg
610 	 * registration there are existing nodes which match our specified
611 	 * criteria, and secondly if the MD is changed (and again, there
612 	 * are nodes which we are interested in present within it. Note
613 	 * that our callback will be invoked even if our specified nodes
614 	 * have not actually changed).
615 	 *
616 	 */
617 	rv = vsw_mdeg_register(vswp);
618 	if (rv != 0)
619 		goto vsw_attach_fail;
620 
621 	progress |= PROG_mdreg;
622 
623 	WRITE_ENTER(&vsw_rw);
624 	vswp->next = vsw_head;
625 	vsw_head = vswp;
626 	RW_EXIT(&vsw_rw);
627 
628 	ddi_report_dev(vswp->dip);
629 	return (DDI_SUCCESS);
630 
631 vsw_attach_fail:
632 	DERR(NULL, "vsw_attach: failed");
633 
634 	if (progress & PROG_mdreg) {
635 		vsw_mdeg_unregister(vswp);
636 		(void) vsw_detach_ports(vswp);
637 	}
638 
639 	if (progress & PROG_macreg)
640 		(void) vsw_mac_unregister(vswp);
641 
642 	if (progress & PROG_swmode) {
643 		vsw_stop_switching_timeout(vswp);
644 		vsw_hio_cleanup(vswp);
645 		mutex_enter(&vswp->mac_lock);
646 		vsw_mac_detach(vswp);
647 		vsw_mac_close(vswp);
648 		mutex_exit(&vswp->mac_lock);
649 	}
650 
651 	if (progress & PROG_taskq)
652 		ddi_taskq_destroy(vswp->taskq_p);
653 
654 	if (progress & PROG_mfdb)
655 		mod_hash_destroy_hash(vswp->mfdb);
656 
657 	if (progress & PROG_fdb) {
658 		vsw_destroy_vlans(vswp, VSW_LOCALDEV);
659 		mod_hash_destroy_hash(vswp->fdb_hashp);
660 	}
661 
662 	if (progress & PROG_readmd) {
663 		if (VSW_PRI_ETH_DEFINED(vswp)) {
664 			kmem_free(vswp->pri_types,
665 			    sizeof (uint16_t) * vswp->pri_num_types);
666 		}
667 		(void) vio_destroy_mblks(vswp->pri_tx_vmp);
668 	}
669 
670 	if (progress & PROG_locks) {
671 		rw_destroy(&vswp->plist.lockrw);
672 		rw_destroy(&vswp->mfdbrw);
673 		rw_destroy(&vswp->if_lockrw);
674 		mutex_destroy(&vswp->swtmout_lock);
675 		mutex_destroy(&vswp->mca_lock);
676 		mutex_destroy(&vswp->mac_lock);
677 		mutex_destroy(&vswp->hw_lock);
678 	}
679 
680 	ddi_soft_state_free(vsw_state, instance);
681 	return (DDI_FAILURE);
682 }
683 
684 static int
685 vsw_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
686 {
687 	vio_mblk_pool_t		*poolp, *npoolp;
688 	vsw_t			**vswpp, *vswp;
689 	int 			instance;
690 
691 	instance = ddi_get_instance(dip);
692 	vswp = ddi_get_soft_state(vsw_state, instance);
693 
694 	if (vswp == NULL) {
695 		return (DDI_FAILURE);
696 	}
697 
698 	switch (cmd) {
699 	case DDI_DETACH:
700 		break;
701 	case DDI_SUSPEND:
702 	case DDI_PM_SUSPEND:
703 	default:
704 		return (DDI_FAILURE);
705 	}
706 
707 	D2(vswp, "detaching instance %d", instance);
708 
709 	/* Stop any pending timeout to setup switching mode. */
710 	vsw_stop_switching_timeout(vswp);
711 
712 	if (vswp->if_state & VSW_IF_REG) {
713 		if (vsw_mac_unregister(vswp) != 0) {
714 			cmn_err(CE_WARN, "!vsw%d: Unable to detach from "
715 			    "MAC layer", vswp->instance);
716 			return (DDI_FAILURE);
717 		}
718 	}
719 
720 	vsw_mdeg_unregister(vswp);
721 
722 	/* remove mac layer callback */
723 	mutex_enter(&vswp->mac_lock);
724 	if ((vswp->mh != NULL) && (vswp->mrh != NULL)) {
725 		mac_rx_remove(vswp->mh, vswp->mrh, B_TRUE);
726 		vswp->mrh = NULL;
727 	}
728 	mutex_exit(&vswp->mac_lock);
729 
730 	if (vsw_detach_ports(vswp) != 0) {
731 		cmn_err(CE_WARN, "!vsw%d: Unable to unconfigure ports",
732 		    vswp->instance);
733 		return (DDI_FAILURE);
734 	}
735 
736 	rw_destroy(&vswp->if_lockrw);
737 
738 	/* cleanup HybridIO */
739 	vsw_hio_cleanup(vswp);
740 
741 	mutex_destroy(&vswp->hw_lock);
742 
743 	/*
744 	 * Now that the ports have been deleted, stop and close
745 	 * the physical device.
746 	 */
747 	mutex_enter(&vswp->mac_lock);
748 
749 	vsw_mac_detach(vswp);
750 	vsw_mac_close(vswp);
751 
752 	mutex_exit(&vswp->mac_lock);
753 
754 	mutex_destroy(&vswp->mac_lock);
755 	mutex_destroy(&vswp->swtmout_lock);
756 
757 	/*
758 	 * Destroy any free pools that may still exist.
759 	 */
760 	poolp = vswp->rxh;
761 	while (poolp != NULL) {
762 		npoolp = vswp->rxh = poolp->nextp;
763 		if (vio_destroy_mblks(poolp) != 0) {
764 			vswp->rxh = poolp;
765 			return (DDI_FAILURE);
766 		}
767 		poolp = npoolp;
768 	}
769 
770 	/*
771 	 * Remove this instance from any entries it may be on in
772 	 * the hash table by using the list of addresses maintained
773 	 * in the vsw_t structure.
774 	 */
775 	vsw_del_mcst_vsw(vswp);
776 
777 	vswp->mcap = NULL;
778 	mutex_destroy(&vswp->mca_lock);
779 
780 	/*
781 	 * By now any pending tasks have finished and the underlying
782 	 * ldc's have been destroyed, so its safe to delete the control
783 	 * message taskq.
784 	 */
785 	if (vswp->taskq_p != NULL)
786 		ddi_taskq_destroy(vswp->taskq_p);
787 
788 	/*
789 	 * At this stage all the data pointers in the hash table
790 	 * should be NULL, as all the ports have been removed and will
791 	 * have deleted themselves from the port lists which the data
792 	 * pointers point to. Hence we can destroy the table using the
793 	 * default destructors.
794 	 */
795 	D2(vswp, "vsw_detach: destroying hash tables..");
796 	vsw_destroy_vlans(vswp, VSW_LOCALDEV);
797 	mod_hash_destroy_hash(vswp->fdb_hashp);
798 	vswp->fdb_hashp = NULL;
799 
800 	WRITE_ENTER(&vswp->mfdbrw);
801 	mod_hash_destroy_hash(vswp->mfdb);
802 	vswp->mfdb = NULL;
803 	RW_EXIT(&vswp->mfdbrw);
804 	rw_destroy(&vswp->mfdbrw);
805 
806 	/* free pri_types table */
807 	if (VSW_PRI_ETH_DEFINED(vswp)) {
808 		kmem_free(vswp->pri_types,
809 		    sizeof (uint16_t) * vswp->pri_num_types);
810 		(void) vio_destroy_mblks(vswp->pri_tx_vmp);
811 	}
812 
813 	ddi_remove_minor_node(dip, NULL);
814 
815 	rw_destroy(&vswp->plist.lockrw);
816 	WRITE_ENTER(&vsw_rw);
817 	for (vswpp = &vsw_head; *vswpp; vswpp = &(*vswpp)->next) {
818 		if (*vswpp == vswp) {
819 			*vswpp = vswp->next;
820 			break;
821 		}
822 	}
823 	RW_EXIT(&vsw_rw);
824 	ddi_soft_state_free(vsw_state, instance);
825 
826 	return (DDI_SUCCESS);
827 }
828 
829 static int
830 vsw_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
831 {
832 	_NOTE(ARGUNUSED(dip))
833 
834 	vsw_t	*vswp = NULL;
835 	dev_t	dev = (dev_t)arg;
836 	int	instance;
837 
838 	instance = getminor(dev);
839 
840 	switch (infocmd) {
841 	case DDI_INFO_DEVT2DEVINFO:
842 		if ((vswp = ddi_get_soft_state(vsw_state, instance)) == NULL) {
843 			*result = NULL;
844 			return (DDI_FAILURE);
845 		}
846 		*result = vswp->dip;
847 		return (DDI_SUCCESS);
848 
849 	case DDI_INFO_DEVT2INSTANCE:
850 		*result = (void *)(uintptr_t)instance;
851 		return (DDI_SUCCESS);
852 
853 	default:
854 		*result = NULL;
855 		return (DDI_FAILURE);
856 	}
857 }
858 
859 /*
860  * Get the value of the "vsw-phys-dev" property in the specified
861  * node. This property is the name of the physical device that
862  * the virtual switch will use to talk to the outside world.
863  *
864  * Note it is valid for this property to be NULL (but the property
865  * itself must exist). Callers of this routine should verify that
866  * the value returned is what they expected (i.e. either NULL or non NULL).
867  *
868  * On success returns value of the property in region pointed to by
869  * the 'name' argument, and with return value of 0. Otherwise returns 1.
870  */
871 static int
872 vsw_get_md_physname(vsw_t *vswp, md_t *mdp, mde_cookie_t node, char *name)
873 {
874 	int		len = 0;
875 	int		instance;
876 	char		*physname = NULL;
877 	char		*dev;
878 	const char	*dev_name;
879 	char		myname[MAXNAMELEN];
880 
881 	dev_name = ddi_driver_name(vswp->dip);
882 	instance = ddi_get_instance(vswp->dip);
883 	(void) snprintf(myname, MAXNAMELEN, "%s%d", dev_name, instance);
884 
885 	if (md_get_prop_data(mdp, node, physdev_propname,
886 	    (uint8_t **)(&physname), &len) != 0) {
887 		cmn_err(CE_WARN, "!vsw%d: Unable to get name(s) of physical "
888 		    "device(s) from MD", vswp->instance);
889 		return (1);
890 	} else if ((strlen(physname) + 1) > LIFNAMSIZ) {
891 		cmn_err(CE_WARN, "!vsw%d: %s is too long a device name",
892 		    vswp->instance, physname);
893 		return (1);
894 	} else if (strcmp(myname, physname) == 0) {
895 		/*
896 		 * Prevent the vswitch from opening itself as the
897 		 * network device.
898 		 */
899 		cmn_err(CE_WARN, "!vsw%d: %s is an invalid device name",
900 		    vswp->instance, physname);
901 		return (1);
902 	} else {
903 		(void) strncpy(name, physname, strlen(physname) + 1);
904 		D2(vswp, "%s: using first device specified (%s)",
905 		    __func__, physname);
906 	}
907 
908 #ifdef DEBUG
909 	/*
910 	 * As a temporary measure to aid testing we check to see if there
911 	 * is a vsw.conf file present. If there is we use the value of the
912 	 * vsw_physname property in the file as the name of the physical
913 	 * device, overriding the value from the MD.
914 	 *
915 	 * There may be multiple devices listed, but for the moment
916 	 * we just use the first one.
917 	 */
918 	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vswp->dip, 0,
919 	    "vsw_physname", &dev) == DDI_PROP_SUCCESS) {
920 		if ((strlen(dev) + 1) > LIFNAMSIZ) {
921 			cmn_err(CE_WARN, "vsw%d: %s is too long a device name",
922 			    vswp->instance, dev);
923 			ddi_prop_free(dev);
924 			return (1);
925 		} else {
926 			cmn_err(CE_NOTE, "vsw%d: Using device name (%s) from "
927 			    "config file", vswp->instance, dev);
928 
929 			(void) strncpy(name, dev, strlen(dev) + 1);
930 		}
931 
932 		ddi_prop_free(dev);
933 	}
934 #endif
935 
936 	return (0);
937 }
938 
939 /*
940  * Read the 'vsw-switch-mode' property from the specified MD node.
941  *
942  * Returns 0 on success and the number of modes found in 'found',
943  * otherwise returns 1.
944  */
945 static int
946 vsw_get_md_smodes(vsw_t *vswp, md_t *mdp, mde_cookie_t node,
947 						uint8_t *modes, int *found)
948 {
949 	int		len = 0;
950 	int		smode_num = 0;
951 	char		*smode = NULL;
952 	char		*curr_mode = NULL;
953 
954 	D1(vswp, "%s: enter", __func__);
955 
956 	/*
957 	 * Get the switch-mode property. The modes are listed in
958 	 * decreasing order of preference, i.e. prefered mode is
959 	 * first item in list.
960 	 */
961 	len = 0;
962 	smode_num = 0;
963 	if (md_get_prop_data(mdp, node, smode_propname,
964 	    (uint8_t **)(&smode), &len) != 0) {
965 		/*
966 		 * Unable to get switch-mode property from MD, nothing
967 		 * more we can do.
968 		 */
969 		cmn_err(CE_WARN, "!vsw%d: Unable to get switch mode property"
970 		    " from the MD", vswp->instance);
971 		*found = 0;
972 		return (1);
973 	}
974 
975 	curr_mode = smode;
976 	/*
977 	 * Modes of operation:
978 	 * 'switched'	 - layer 2 switching, underlying HW in
979 	 *			programmed mode.
980 	 * 'promiscuous' - layer 2 switching, underlying HW in
981 	 *			promiscuous mode.
982 	 * 'routed'	 - layer 3 (i.e. IP) routing, underlying HW
983 	 *			in non-promiscuous mode.
984 	 */
985 	while ((curr_mode < (smode + len)) && (smode_num < NUM_SMODES)) {
986 		D2(vswp, "%s: curr_mode = [%s]", __func__, curr_mode);
987 		if (strcmp(curr_mode, "switched") == 0) {
988 			modes[smode_num++] = VSW_LAYER2;
989 		} else if (strcmp(curr_mode, "promiscuous") == 0) {
990 			modes[smode_num++] = VSW_LAYER2_PROMISC;
991 		} else if (strcmp(curr_mode, "routed") == 0) {
992 			modes[smode_num++] = VSW_LAYER3;
993 		} else {
994 			DWARN(vswp, "%s: Unknown switch mode %s, "
995 			    "setting to default 'switched' mode",
996 			    __func__, curr_mode);
997 			modes[smode_num++] = VSW_LAYER2;
998 		}
999 		curr_mode += strlen(curr_mode) + 1;
1000 	}
1001 	*found = smode_num;
1002 
1003 	D2(vswp, "%s: %d modes found", __func__, smode_num);
1004 
1005 	D1(vswp, "%s: exit", __func__);
1006 
1007 	return (0);
1008 }
1009 
1010 /*
1011  * Register with the MAC layer as a network device, so we
1012  * can be plumbed if necessary.
1013  */
1014 static int
1015 vsw_mac_register(vsw_t *vswp)
1016 {
1017 	mac_register_t	*macp;
1018 	int		rv;
1019 
1020 	D1(vswp, "%s: enter", __func__);
1021 
1022 	if ((macp = mac_alloc(MAC_VERSION)) == NULL)
1023 		return (EINVAL);
1024 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1025 	macp->m_driver = vswp;
1026 	macp->m_dip = vswp->dip;
1027 	macp->m_src_addr = (uint8_t *)&vswp->if_addr;
1028 	macp->m_callbacks = &vsw_m_callbacks;
1029 	macp->m_min_sdu = 0;
1030 	macp->m_max_sdu = vsw_ethermtu;
1031 	macp->m_margin = VLAN_TAGSZ;
1032 	rv = mac_register(macp, &vswp->if_mh);
1033 	mac_free(macp);
1034 	if (rv != 0) {
1035 		/*
1036 		 * Treat this as a non-fatal error as we may be
1037 		 * able to operate in some other mode.
1038 		 */
1039 		cmn_err(CE_NOTE, "!vsw%d: Unable to register as "
1040 		    "a provider with MAC layer", vswp->instance);
1041 		return (rv);
1042 	}
1043 
1044 	vswp->if_state |= VSW_IF_REG;
1045 
1046 	vswp->max_frame_size = vsw_ethermtu + sizeof (struct ether_header)
1047 	    + VLAN_TAGSZ;
1048 
1049 	D1(vswp, "%s: exit", __func__);
1050 
1051 	return (rv);
1052 }
1053 
1054 static int
1055 vsw_mac_unregister(vsw_t *vswp)
1056 {
1057 	int		rv = 0;
1058 
1059 	D1(vswp, "%s: enter", __func__);
1060 
1061 	WRITE_ENTER(&vswp->if_lockrw);
1062 
1063 	if (vswp->if_state & VSW_IF_REG) {
1064 		rv = mac_unregister(vswp->if_mh);
1065 		if (rv != 0) {
1066 			DWARN(vswp, "%s: unable to unregister from MAC "
1067 			    "framework", __func__);
1068 
1069 			RW_EXIT(&vswp->if_lockrw);
1070 			D1(vswp, "%s: fail exit", __func__);
1071 			return (rv);
1072 		}
1073 
1074 		/* mark i/f as down and unregistered */
1075 		vswp->if_state &= ~(VSW_IF_UP | VSW_IF_REG);
1076 	}
1077 	RW_EXIT(&vswp->if_lockrw);
1078 
1079 	D1(vswp, "%s: exit", __func__);
1080 
1081 	return (rv);
1082 }
1083 
1084 static int
1085 vsw_m_stat(void *arg, uint_t stat, uint64_t *val)
1086 {
1087 	vsw_t			*vswp = (vsw_t *)arg;
1088 
1089 	D1(vswp, "%s: enter", __func__);
1090 
1091 	mutex_enter(&vswp->mac_lock);
1092 	if (vswp->mh == NULL) {
1093 		mutex_exit(&vswp->mac_lock);
1094 		return (EINVAL);
1095 	}
1096 
1097 	/* return stats from underlying device */
1098 	*val = mac_stat_get(vswp->mh, stat);
1099 
1100 	mutex_exit(&vswp->mac_lock);
1101 
1102 	return (0);
1103 }
1104 
1105 static void
1106 vsw_m_stop(void *arg)
1107 {
1108 	vsw_t		*vswp = (vsw_t *)arg;
1109 
1110 	D1(vswp, "%s: enter", __func__);
1111 
1112 	WRITE_ENTER(&vswp->if_lockrw);
1113 	vswp->if_state &= ~VSW_IF_UP;
1114 	RW_EXIT(&vswp->if_lockrw);
1115 
1116 	mutex_enter(&vswp->hw_lock);
1117 
1118 	(void) vsw_unset_hw(vswp, NULL, VSW_LOCALDEV);
1119 
1120 	if (vswp->recfg_reqd)
1121 		vsw_reconfig_hw(vswp);
1122 
1123 	mutex_exit(&vswp->hw_lock);
1124 
1125 	D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state);
1126 }
1127 
1128 static int
1129 vsw_m_start(void *arg)
1130 {
1131 	vsw_t		*vswp = (vsw_t *)arg;
1132 
1133 	D1(vswp, "%s: enter", __func__);
1134 
1135 	WRITE_ENTER(&vswp->if_lockrw);
1136 
1137 	vswp->if_state |= VSW_IF_UP;
1138 
1139 	if (vswp->switching_setup_done == B_FALSE) {
1140 		/*
1141 		 * If the switching mode has not been setup yet, just
1142 		 * return. The unicast address will be programmed
1143 		 * after the physical device is successfully setup by the
1144 		 * timeout handler.
1145 		 */
1146 		RW_EXIT(&vswp->if_lockrw);
1147 		return (0);
1148 	}
1149 
1150 	/* if in layer2 mode, program unicast address. */
1151 	if (vswp->mh != NULL) {
1152 		mutex_enter(&vswp->hw_lock);
1153 		(void) vsw_set_hw(vswp, NULL, VSW_LOCALDEV);
1154 		mutex_exit(&vswp->hw_lock);
1155 	}
1156 
1157 	RW_EXIT(&vswp->if_lockrw);
1158 
1159 	D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state);
1160 	return (0);
1161 }
1162 
1163 /*
1164  * Change the local interface address.
1165  *
1166  * Note: we don't support this entry point. The local
1167  * mac address of the switch can only be changed via its
1168  * MD node properties.
1169  */
1170 static int
1171 vsw_m_unicst(void *arg, const uint8_t *macaddr)
1172 {
1173 	_NOTE(ARGUNUSED(arg, macaddr))
1174 
1175 	return (DDI_FAILURE);
1176 }
1177 
1178 static int
1179 vsw_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
1180 {
1181 	vsw_t		*vswp = (vsw_t *)arg;
1182 	mcst_addr_t	*mcst_p = NULL;
1183 	uint64_t	addr = 0x0;
1184 	int		i, ret = 0;
1185 
1186 	D1(vswp, "%s: enter", __func__);
1187 
1188 	/*
1189 	 * Convert address into form that can be used
1190 	 * as hash table key.
1191 	 */
1192 	for (i = 0; i < ETHERADDRL; i++) {
1193 		addr = (addr << 8) | mca[i];
1194 	}
1195 
1196 	D2(vswp, "%s: addr = 0x%llx", __func__, addr);
1197 
1198 	if (add) {
1199 		D2(vswp, "%s: adding multicast", __func__);
1200 		if (vsw_add_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) {
1201 			/*
1202 			 * Update the list of multicast addresses
1203 			 * contained within the vsw_t structure to
1204 			 * include this new one.
1205 			 */
1206 			mcst_p = kmem_zalloc(sizeof (mcst_addr_t), KM_NOSLEEP);
1207 			if (mcst_p == NULL) {
1208 				DERR(vswp, "%s unable to alloc mem", __func__);
1209 				(void) vsw_del_mcst(vswp,
1210 				    VSW_LOCALDEV, addr, NULL);
1211 				return (1);
1212 			}
1213 			mcst_p->addr = addr;
1214 			ether_copy(mca, &mcst_p->mca);
1215 
1216 			/*
1217 			 * Call into the underlying driver to program the
1218 			 * address into HW.
1219 			 */
1220 			mutex_enter(&vswp->mac_lock);
1221 			if (vswp->mh != NULL) {
1222 				ret = mac_multicst_add(vswp->mh, mca);
1223 				if (ret != 0) {
1224 					cmn_err(CE_NOTE, "!vsw%d: unable to "
1225 					    "add multicast address",
1226 					    vswp->instance);
1227 					mutex_exit(&vswp->mac_lock);
1228 					(void) vsw_del_mcst(vswp,
1229 					    VSW_LOCALDEV, addr, NULL);
1230 					kmem_free(mcst_p, sizeof (*mcst_p));
1231 					return (ret);
1232 				}
1233 				mcst_p->mac_added = B_TRUE;
1234 			}
1235 			mutex_exit(&vswp->mac_lock);
1236 
1237 			mutex_enter(&vswp->mca_lock);
1238 			mcst_p->nextp = vswp->mcap;
1239 			vswp->mcap = mcst_p;
1240 			mutex_exit(&vswp->mca_lock);
1241 		} else {
1242 			cmn_err(CE_NOTE, "!vsw%d: unable to add multicast "
1243 			    "address", vswp->instance);
1244 		}
1245 		return (ret);
1246 	}
1247 
1248 	D2(vswp, "%s: removing multicast", __func__);
1249 	/*
1250 	 * Remove the address from the hash table..
1251 	 */
1252 	if (vsw_del_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) {
1253 
1254 		/*
1255 		 * ..and then from the list maintained in the
1256 		 * vsw_t structure.
1257 		 */
1258 		mcst_p = vsw_del_addr(VSW_LOCALDEV, vswp, addr);
1259 		ASSERT(mcst_p != NULL);
1260 
1261 		mutex_enter(&vswp->mac_lock);
1262 		if (vswp->mh != NULL && mcst_p->mac_added) {
1263 			(void) mac_multicst_remove(vswp->mh, mca);
1264 			mcst_p->mac_added = B_FALSE;
1265 		}
1266 		mutex_exit(&vswp->mac_lock);
1267 		kmem_free(mcst_p, sizeof (*mcst_p));
1268 	}
1269 
1270 	D1(vswp, "%s: exit", __func__);
1271 
1272 	return (0);
1273 }
1274 
1275 static int
1276 vsw_m_promisc(void *arg, boolean_t on)
1277 {
1278 	vsw_t		*vswp = (vsw_t *)arg;
1279 
1280 	D1(vswp, "%s: enter", __func__);
1281 
1282 	WRITE_ENTER(&vswp->if_lockrw);
1283 	if (on)
1284 		vswp->if_state |= VSW_IF_PROMISC;
1285 	else
1286 		vswp->if_state &= ~VSW_IF_PROMISC;
1287 	RW_EXIT(&vswp->if_lockrw);
1288 
1289 	D1(vswp, "%s: exit", __func__);
1290 
1291 	return (0);
1292 }
1293 
1294 static mblk_t *
1295 vsw_m_tx(void *arg, mblk_t *mp)
1296 {
1297 	vsw_t		*vswp = (vsw_t *)arg;
1298 
1299 	D1(vswp, "%s: enter", __func__);
1300 
1301 	mp = vsw_vlan_frame_pretag(vswp, VSW_LOCALDEV, mp);
1302 
1303 	if (mp == NULL) {
1304 		return (NULL);
1305 	}
1306 
1307 	vswp->vsw_switch_frame(vswp, mp, VSW_LOCALDEV, NULL, NULL);
1308 
1309 	D1(vswp, "%s: exit", __func__);
1310 
1311 	return (NULL);
1312 }
1313 
1314 /*
1315  * Register for machine description (MD) updates.
1316  *
1317  * Returns 0 on success, 1 on failure.
1318  */
1319 static int
1320 vsw_mdeg_register(vsw_t *vswp)
1321 {
1322 	mdeg_prop_spec_t	*pspecp;
1323 	mdeg_node_spec_t	*inst_specp;
1324 	mdeg_handle_t		mdeg_hdl, mdeg_port_hdl;
1325 	size_t			templatesz;
1326 	int			rv;
1327 
1328 	D1(vswp, "%s: enter", __func__);
1329 
1330 	/*
1331 	 * Allocate and initialize a per-instance copy
1332 	 * of the global property spec array that will
1333 	 * uniquely identify this vsw instance.
1334 	 */
1335 	templatesz = sizeof (vsw_prop_template);
1336 	pspecp = kmem_zalloc(templatesz, KM_SLEEP);
1337 
1338 	bcopy(vsw_prop_template, pspecp, templatesz);
1339 
1340 	VSW_SET_MDEG_PROP_INST(pspecp, vswp->regprop);
1341 
1342 	/* initialize the complete prop spec structure */
1343 	inst_specp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP);
1344 	inst_specp->namep = "virtual-device";
1345 	inst_specp->specp = pspecp;
1346 
1347 	D2(vswp, "%s: instance %d registering with mdeg", __func__,
1348 	    vswp->regprop);
1349 	/*
1350 	 * Register an interest in 'virtual-device' nodes with a
1351 	 * 'name' property of 'virtual-network-switch'
1352 	 */
1353 	rv = mdeg_register(inst_specp, &vdev_match, vsw_mdeg_cb,
1354 	    (void *)vswp, &mdeg_hdl);
1355 	if (rv != MDEG_SUCCESS) {
1356 		DERR(vswp, "%s: mdeg_register failed (%d) for vsw node",
1357 		    __func__, rv);
1358 		goto mdeg_reg_fail;
1359 	}
1360 
1361 	/*
1362 	 * Register an interest in 'vsw-port' nodes.
1363 	 */
1364 	rv = mdeg_register(inst_specp, &vport_match, vsw_port_mdeg_cb,
1365 	    (void *)vswp, &mdeg_port_hdl);
1366 	if (rv != MDEG_SUCCESS) {
1367 		DERR(vswp, "%s: mdeg_register failed (%d)\n", __func__, rv);
1368 		(void) mdeg_unregister(mdeg_hdl);
1369 		goto mdeg_reg_fail;
1370 	}
1371 
1372 	/* save off data that will be needed later */
1373 	vswp->inst_spec = inst_specp;
1374 	vswp->mdeg_hdl = mdeg_hdl;
1375 	vswp->mdeg_port_hdl = mdeg_port_hdl;
1376 
1377 	D1(vswp, "%s: exit", __func__);
1378 	return (0);
1379 
1380 mdeg_reg_fail:
1381 	cmn_err(CE_WARN, "!vsw%d: Unable to register MDEG callbacks",
1382 	    vswp->instance);
1383 	kmem_free(pspecp, templatesz);
1384 	kmem_free(inst_specp, sizeof (mdeg_node_spec_t));
1385 
1386 	vswp->mdeg_hdl = NULL;
1387 	vswp->mdeg_port_hdl = NULL;
1388 
1389 	return (1);
1390 }
1391 
1392 static void
1393 vsw_mdeg_unregister(vsw_t *vswp)
1394 {
1395 	D1(vswp, "vsw_mdeg_unregister: enter");
1396 
1397 	if (vswp->mdeg_hdl != NULL)
1398 		(void) mdeg_unregister(vswp->mdeg_hdl);
1399 
1400 	if (vswp->mdeg_port_hdl != NULL)
1401 		(void) mdeg_unregister(vswp->mdeg_port_hdl);
1402 
1403 	if (vswp->inst_spec != NULL) {
1404 		if (vswp->inst_spec->specp != NULL) {
1405 			(void) kmem_free(vswp->inst_spec->specp,
1406 			    sizeof (vsw_prop_template));
1407 			vswp->inst_spec->specp = NULL;
1408 		}
1409 
1410 		(void) kmem_free(vswp->inst_spec, sizeof (mdeg_node_spec_t));
1411 		vswp->inst_spec = NULL;
1412 	}
1413 
1414 	D1(vswp, "vsw_mdeg_unregister: exit");
1415 }
1416 
1417 /*
1418  * Mdeg callback invoked for the vsw node itself.
1419  */
1420 static int
1421 vsw_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1422 {
1423 	vsw_t		*vswp;
1424 	md_t		*mdp;
1425 	mde_cookie_t	node;
1426 	uint64_t	inst;
1427 	char		*node_name = NULL;
1428 
1429 	if (resp == NULL)
1430 		return (MDEG_FAILURE);
1431 
1432 	vswp = (vsw_t *)cb_argp;
1433 
1434 	D1(vswp, "%s: added %d : removed %d : curr matched %d"
1435 	    " : prev matched %d", __func__, resp->added.nelem,
1436 	    resp->removed.nelem, resp->match_curr.nelem,
1437 	    resp->match_prev.nelem);
1438 
1439 	/*
1440 	 * We get an initial callback for this node as 'added'
1441 	 * after registering with mdeg. Note that we would have
1442 	 * already gathered information about this vsw node by
1443 	 * walking MD earlier during attach (in vsw_read_mdprops()).
1444 	 * So, there is a window where the properties of this
1445 	 * node might have changed when we get this initial 'added'
1446 	 * callback. We handle this as if an update occured
1447 	 * and invoke the same function which handles updates to
1448 	 * the properties of this vsw-node if any.
1449 	 *
1450 	 * A non-zero 'match' value indicates that the MD has been
1451 	 * updated and that a virtual-network-switch node is
1452 	 * present which may or may not have been updated. It is
1453 	 * up to the clients to examine their own nodes and
1454 	 * determine if they have changed.
1455 	 */
1456 	if (resp->added.nelem != 0) {
1457 
1458 		if (resp->added.nelem != 1) {
1459 			cmn_err(CE_NOTE, "!vsw%d: number of nodes added "
1460 			    "invalid: %d\n", vswp->instance, resp->added.nelem);
1461 			return (MDEG_FAILURE);
1462 		}
1463 
1464 		mdp = resp->added.mdp;
1465 		node = resp->added.mdep[0];
1466 
1467 	} else if (resp->match_curr.nelem != 0) {
1468 
1469 		if (resp->match_curr.nelem != 1) {
1470 			cmn_err(CE_NOTE, "!vsw%d: number of nodes updated "
1471 			    "invalid: %d\n", vswp->instance,
1472 			    resp->match_curr.nelem);
1473 			return (MDEG_FAILURE);
1474 		}
1475 
1476 		mdp = resp->match_curr.mdp;
1477 		node = resp->match_curr.mdep[0];
1478 
1479 	} else {
1480 		return (MDEG_FAILURE);
1481 	}
1482 
1483 	/* Validate name and instance */
1484 	if (md_get_prop_str(mdp, node, "name", &node_name) != 0) {
1485 		DERR(vswp, "%s: unable to get node name\n",  __func__);
1486 		return (MDEG_FAILURE);
1487 	}
1488 
1489 	/* is this a virtual-network-switch? */
1490 	if (strcmp(node_name, vsw_propname) != 0) {
1491 		DERR(vswp, "%s: Invalid node name: %s\n",
1492 		    __func__, node_name);
1493 		return (MDEG_FAILURE);
1494 	}
1495 
1496 	if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) {
1497 		DERR(vswp, "%s: prop(cfg-handle) not found\n",
1498 		    __func__);
1499 		return (MDEG_FAILURE);
1500 	}
1501 
1502 	/* is this the right instance of vsw? */
1503 	if (inst != vswp->regprop) {
1504 		DERR(vswp, "%s: Invalid cfg-handle: %lx\n",
1505 		    __func__, inst);
1506 		return (MDEG_FAILURE);
1507 	}
1508 
1509 	vsw_update_md_prop(vswp, mdp, node);
1510 
1511 	return (MDEG_SUCCESS);
1512 }
1513 
1514 /*
1515  * Mdeg callback invoked for changes to the vsw-port nodes
1516  * under the vsw node.
1517  */
1518 static int
1519 vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1520 {
1521 	vsw_t		*vswp;
1522 	int		idx;
1523 	md_t		*mdp;
1524 	mde_cookie_t	node;
1525 	uint64_t	inst;
1526 	int		rv;
1527 
1528 	if ((resp == NULL) || (cb_argp == NULL))
1529 		return (MDEG_FAILURE);
1530 
1531 	vswp = (vsw_t *)cb_argp;
1532 
1533 	D2(vswp, "%s: added %d : removed %d : curr matched %d"
1534 	    " : prev matched %d", __func__, resp->added.nelem,
1535 	    resp->removed.nelem, resp->match_curr.nelem,
1536 	    resp->match_prev.nelem);
1537 
1538 	/* process added ports */
1539 	for (idx = 0; idx < resp->added.nelem; idx++) {
1540 		mdp = resp->added.mdp;
1541 		node = resp->added.mdep[idx];
1542 
1543 		D2(vswp, "%s: adding node(%d) 0x%lx", __func__, idx, node);
1544 
1545 		if ((rv = vsw_port_add(vswp, mdp, &node)) != 0) {
1546 			cmn_err(CE_WARN, "!vsw%d: Unable to add new port "
1547 			    "(0x%lx), err=%d", vswp->instance, node, rv);
1548 		}
1549 	}
1550 
1551 	/* process removed ports */
1552 	for (idx = 0; idx < resp->removed.nelem; idx++) {
1553 		mdp = resp->removed.mdp;
1554 		node = resp->removed.mdep[idx];
1555 
1556 		if (md_get_prop_val(mdp, node, id_propname, &inst)) {
1557 			DERR(vswp, "%s: prop(%s) not found in port(%d)",
1558 			    __func__, id_propname, idx);
1559 			continue;
1560 		}
1561 
1562 		D2(vswp, "%s: removing node(%d) 0x%lx", __func__, idx, node);
1563 
1564 		if (vsw_port_detach(vswp, inst) != 0) {
1565 			cmn_err(CE_WARN, "!vsw%d: Unable to remove port %ld",
1566 			    vswp->instance, inst);
1567 		}
1568 	}
1569 
1570 	for (idx = 0; idx < resp->match_curr.nelem; idx++) {
1571 		(void) vsw_port_update(vswp, resp->match_curr.mdp,
1572 		    resp->match_curr.mdep[idx],
1573 		    resp->match_prev.mdp,
1574 		    resp->match_prev.mdep[idx]);
1575 	}
1576 
1577 	D1(vswp, "%s: exit", __func__);
1578 
1579 	return (MDEG_SUCCESS);
1580 }
1581 
1582 /*
1583  * Scan the machine description for this instance of vsw
1584  * and read its properties. Called only from vsw_attach().
1585  * Returns: 0 on success, 1 on failure.
1586  */
1587 static int
1588 vsw_read_mdprops(vsw_t *vswp)
1589 {
1590 	md_t		*mdp = NULL;
1591 	mde_cookie_t	rootnode;
1592 	mde_cookie_t	*listp = NULL;
1593 	uint64_t	inst;
1594 	uint64_t	cfgh;
1595 	char		*name;
1596 	int		rv = 1;
1597 	int		num_nodes = 0;
1598 	int		num_devs = 0;
1599 	int		listsz = 0;
1600 	int		i;
1601 
1602 	/*
1603 	 * In each 'virtual-device' node in the MD there is a
1604 	 * 'cfg-handle' property which is the MD's concept of
1605 	 * an instance number (this may be completely different from
1606 	 * the device drivers instance #). OBP reads that value and
1607 	 * stores it in the 'reg' property of the appropriate node in
1608 	 * the device tree. We first read this reg property and use this
1609 	 * to compare against the 'cfg-handle' property of vsw nodes
1610 	 * in MD to get to this specific vsw instance and then read
1611 	 * other properties that we are interested in.
1612 	 * We also cache the value of 'reg' property and use it later
1613 	 * to register callbacks with mdeg (see vsw_mdeg_register())
1614 	 */
1615 	inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip,
1616 	    DDI_PROP_DONTPASS, reg_propname, -1);
1617 	if (inst == -1) {
1618 		cmn_err(CE_NOTE, "!vsw%d: Unable to read %s property from "
1619 		    "OBP device tree", vswp->instance, reg_propname);
1620 		return (rv);
1621 	}
1622 
1623 	vswp->regprop = inst;
1624 
1625 	if ((mdp = md_get_handle()) == NULL) {
1626 		DWARN(vswp, "%s: cannot init MD\n", __func__);
1627 		return (rv);
1628 	}
1629 
1630 	num_nodes = md_node_count(mdp);
1631 	ASSERT(num_nodes > 0);
1632 
1633 	listsz = num_nodes * sizeof (mde_cookie_t);
1634 	listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP);
1635 
1636 	rootnode = md_root_node(mdp);
1637 
1638 	/* search for all "virtual_device" nodes */
1639 	num_devs = md_scan_dag(mdp, rootnode,
1640 	    md_find_name(mdp, vdev_propname),
1641 	    md_find_name(mdp, "fwd"), listp);
1642 	if (num_devs <= 0) {
1643 		DWARN(vswp, "%s: invalid num_devs:%d\n", __func__, num_devs);
1644 		goto vsw_readmd_exit;
1645 	}
1646 
1647 	/*
1648 	 * Now loop through the list of virtual-devices looking for
1649 	 * devices with name "virtual-network-switch" and for each
1650 	 * such device compare its instance with what we have from
1651 	 * the 'reg' property to find the right node in MD and then
1652 	 * read all its properties.
1653 	 */
1654 	for (i = 0; i < num_devs; i++) {
1655 
1656 		if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) {
1657 			DWARN(vswp, "%s: name property not found\n",
1658 			    __func__);
1659 			goto vsw_readmd_exit;
1660 		}
1661 
1662 		/* is this a virtual-network-switch? */
1663 		if (strcmp(name, vsw_propname) != 0)
1664 			continue;
1665 
1666 		if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) {
1667 			DWARN(vswp, "%s: cfg-handle property not found\n",
1668 			    __func__);
1669 			goto vsw_readmd_exit;
1670 		}
1671 
1672 		/* is this the required instance of vsw? */
1673 		if (inst != cfgh)
1674 			continue;
1675 
1676 		/* now read all properties of this vsw instance */
1677 		rv = vsw_get_initial_md_properties(vswp, mdp, listp[i]);
1678 		break;
1679 	}
1680 
1681 vsw_readmd_exit:
1682 
1683 	kmem_free(listp, listsz);
1684 	(void) md_fini_handle(mdp);
1685 	return (rv);
1686 }
1687 
1688 /*
1689  * Read the initial start-of-day values from the specified MD node.
1690  */
1691 static int
1692 vsw_get_initial_md_properties(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
1693 {
1694 	int		i;
1695 	uint64_t 	macaddr = 0;
1696 
1697 	D1(vswp, "%s: enter", __func__);
1698 
1699 	if (vsw_get_md_physname(vswp, mdp, node, vswp->physname) != 0) {
1700 		return (1);
1701 	}
1702 
1703 	/* mac address for vswitch device itself */
1704 	if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) {
1705 		cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD",
1706 		    vswp->instance);
1707 		return (1);
1708 	}
1709 
1710 	vsw_save_lmacaddr(vswp, macaddr);
1711 
1712 	if (vsw_get_md_smodes(vswp, mdp, node, vswp->smode, &vswp->smode_num)) {
1713 		DWARN(vswp, "%s: Unable to read %s property from MD, "
1714 		    "defaulting to 'switched' mode",
1715 		    __func__, smode_propname);
1716 
1717 		for (i = 0; i < NUM_SMODES; i++)
1718 			vswp->smode[i] = VSW_LAYER2;
1719 
1720 		vswp->smode_num = NUM_SMODES;
1721 	} else {
1722 		ASSERT(vswp->smode_num != 0);
1723 	}
1724 
1725 	/* read vlan id properties of this vsw instance */
1726 	vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &vswp->pvid,
1727 	    &vswp->vids, &vswp->nvids, &vswp->default_vlan_id);
1728 
1729 	/* read priority-ether-types */
1730 	vsw_read_pri_eth_types(vswp, mdp, node);
1731 
1732 	D1(vswp, "%s: exit", __func__);
1733 	return (0);
1734 }
1735 
1736 /*
1737  * Read vlan id properties of the given MD node.
1738  * Arguments:
1739  *   arg:          device argument(vsw device or a port)
1740  *   type:         type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port)
1741  *   mdp:          machine description
1742  *   node:         md node cookie
1743  *
1744  * Returns:
1745  *   pvidp:        port-vlan-id of the node
1746  *   vidspp:       list of vlan-ids of the node
1747  *   nvidsp:       # of vlan-ids in the list
1748  *   default_idp:  default-vlan-id of the node(if node is vsw device)
1749  */
1750 static void
1751 vsw_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node,
1752 	uint16_t *pvidp, uint16_t **vidspp, uint16_t *nvidsp,
1753 	uint16_t *default_idp)
1754 {
1755 	vsw_t		*vswp;
1756 	vsw_port_t	*portp;
1757 	char		*pvid_propname;
1758 	char		*vid_propname;
1759 	uint_t		nvids = 0;
1760 	uint32_t	vids_size;
1761 	int		rv;
1762 	int		i;
1763 	uint64_t	*data;
1764 	uint64_t	val;
1765 	int		size;
1766 	int		inst;
1767 
1768 	if (type == VSW_LOCALDEV) {
1769 
1770 		vswp = (vsw_t *)arg;
1771 		pvid_propname = vsw_pvid_propname;
1772 		vid_propname = vsw_vid_propname;
1773 		inst = vswp->instance;
1774 
1775 	} else if (type == VSW_VNETPORT) {
1776 
1777 		portp = (vsw_port_t *)arg;
1778 		vswp = portp->p_vswp;
1779 		pvid_propname = port_pvid_propname;
1780 		vid_propname = port_vid_propname;
1781 		inst = portp->p_instance;
1782 
1783 	} else {
1784 		return;
1785 	}
1786 
1787 	if (type == VSW_LOCALDEV && default_idp != NULL) {
1788 		rv = md_get_prop_val(mdp, node, vsw_dvid_propname, &val);
1789 		if (rv != 0) {
1790 			DWARN(vswp, "%s: prop(%s) not found", __func__,
1791 			    vsw_dvid_propname);
1792 
1793 			*default_idp = vsw_default_vlan_id;
1794 		} else {
1795 			*default_idp = val & 0xFFF;
1796 			D2(vswp, "%s: %s(%d): (%d)\n", __func__,
1797 			    vsw_dvid_propname, inst, *default_idp);
1798 		}
1799 	}
1800 
1801 	rv = md_get_prop_val(mdp, node, pvid_propname, &val);
1802 	if (rv != 0) {
1803 		DWARN(vswp, "%s: prop(%s) not found", __func__, pvid_propname);
1804 		*pvidp = vsw_default_vlan_id;
1805 	} else {
1806 
1807 		*pvidp = val & 0xFFF;
1808 		D2(vswp, "%s: %s(%d): (%d)\n", __func__,
1809 		    pvid_propname, inst, *pvidp);
1810 	}
1811 
1812 	rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data,
1813 	    &size);
1814 	if (rv != 0) {
1815 		D2(vswp, "%s: prop(%s) not found", __func__, vid_propname);
1816 		size = 0;
1817 	} else {
1818 		size /= sizeof (uint64_t);
1819 	}
1820 	nvids = size;
1821 
1822 	if (nvids != 0) {
1823 		D2(vswp, "%s: %s(%d): ", __func__, vid_propname, inst);
1824 		vids_size = sizeof (uint16_t) * nvids;
1825 		*vidspp = kmem_zalloc(vids_size, KM_SLEEP);
1826 		for (i = 0; i < nvids; i++) {
1827 			(*vidspp)[i] = data[i] & 0xFFFF;
1828 			D2(vswp, " %d ", (*vidspp)[i]);
1829 		}
1830 		D2(vswp, "\n");
1831 	}
1832 
1833 	*nvidsp = nvids;
1834 }
1835 
1836 /*
1837  * This function reads "priority-ether-types" property from md. This property
1838  * is used to enable support for priority frames. Applications which need
1839  * guaranteed and timely delivery of certain high priority frames to/from
1840  * a vnet or vsw within ldoms, should configure this property by providing
1841  * the ether type(s) for which the priority facility is needed.
1842  * Normal data frames are delivered over a ldc channel using the descriptor
1843  * ring mechanism which is constrained by factors such as descriptor ring size,
1844  * the rate at which the ring is processed at the peer ldc end point, etc.
1845  * The priority mechanism provides an Out-Of-Band path to send/receive frames
1846  * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the
1847  * descriptor ring path and enables a more reliable and timely delivery of
1848  * frames to the peer.
1849  */
1850 static void
1851 vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
1852 {
1853 	int		rv;
1854 	uint16_t	*types;
1855 	uint64_t	*data;
1856 	int		size;
1857 	int		i;
1858 	size_t		mblk_sz;
1859 
1860 	rv = md_get_prop_data(mdp, node, pri_types_propname,
1861 	    (uint8_t **)&data, &size);
1862 	if (rv != 0) {
1863 		/*
1864 		 * Property may not exist if we are running pre-ldoms1.1 f/w.
1865 		 * Check if 'vsw_pri_eth_type' has been set in that case.
1866 		 */
1867 		if (vsw_pri_eth_type != 0) {
1868 			size = sizeof (vsw_pri_eth_type);
1869 			data = &vsw_pri_eth_type;
1870 		} else {
1871 			D3(vswp, "%s: prop(%s) not found", __func__,
1872 			    pri_types_propname);
1873 			size = 0;
1874 		}
1875 	}
1876 
1877 	if (size == 0) {
1878 		vswp->pri_num_types = 0;
1879 		return;
1880 	}
1881 
1882 	/*
1883 	 * we have some priority-ether-types defined;
1884 	 * allocate a table of these types and also
1885 	 * allocate a pool of mblks to transmit these
1886 	 * priority packets.
1887 	 */
1888 	size /= sizeof (uint64_t);
1889 	vswp->pri_num_types = size;
1890 	vswp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP);
1891 	for (i = 0, types = vswp->pri_types; i < size; i++) {
1892 		types[i] = data[i] & 0xFFFF;
1893 	}
1894 	mblk_sz = (VIO_PKT_DATA_HDRSIZE + ETHERMAX + 7) & ~7;
1895 	(void) vio_create_mblks(vsw_pri_tx_nmblks, mblk_sz, &vswp->pri_tx_vmp);
1896 }
1897 
1898 /*
1899  * Check to see if the relevant properties in the specified node have
1900  * changed, and if so take the appropriate action.
1901  *
1902  * If any of the properties are missing or invalid we don't take
1903  * any action, as this function should only be invoked when modifications
1904  * have been made to what we assume is a working configuration, which
1905  * we leave active.
1906  *
1907  * Note it is legal for this routine to be invoked even if none of the
1908  * properties in the port node within the MD have actually changed.
1909  */
1910 static void
1911 vsw_update_md_prop(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
1912 {
1913 	char		physname[LIFNAMSIZ];
1914 	char		drv[LIFNAMSIZ];
1915 	uint_t		ddi_instance;
1916 	uint8_t		new_smode[NUM_SMODES];
1917 	int		i, smode_num = 0;
1918 	uint64_t 	macaddr = 0;
1919 	enum		{MD_init = 0x1,
1920 				MD_physname = 0x2,
1921 				MD_macaddr = 0x4,
1922 				MD_smode = 0x8,
1923 				MD_vlans = 0x10} updated;
1924 	int		rv;
1925 	uint16_t	pvid;
1926 	uint16_t	*vids;
1927 	uint16_t	nvids;
1928 
1929 	updated = MD_init;
1930 
1931 	D1(vswp, "%s: enter", __func__);
1932 
1933 	/*
1934 	 * Check if name of physical device in MD has changed.
1935 	 */
1936 	if (vsw_get_md_physname(vswp, mdp, node, (char *)&physname) == 0) {
1937 		/*
1938 		 * Do basic sanity check on new device name/instance,
1939 		 * if its non NULL. It is valid for the device name to
1940 		 * have changed from a non NULL to a NULL value, i.e.
1941 		 * the vsw is being changed to 'routed' mode.
1942 		 */
1943 		if ((strlen(physname) != 0) &&
1944 		    (ddi_parse(physname, drv,
1945 		    &ddi_instance) != DDI_SUCCESS)) {
1946 			cmn_err(CE_WARN, "!vsw%d: physical device %s is not"
1947 			    " a valid device name/instance",
1948 			    vswp->instance, physname);
1949 			goto fail_reconf;
1950 		}
1951 
1952 		if (strcmp(physname, vswp->physname)) {
1953 			D2(vswp, "%s: device name changed from %s to %s",
1954 			    __func__, vswp->physname, physname);
1955 
1956 			updated |= MD_physname;
1957 		} else {
1958 			D2(vswp, "%s: device name unchanged at %s",
1959 			    __func__, vswp->physname);
1960 		}
1961 	} else {
1962 		cmn_err(CE_WARN, "!vsw%d: Unable to read name of physical "
1963 		    "device from updated MD.", vswp->instance);
1964 		goto fail_reconf;
1965 	}
1966 
1967 	/*
1968 	 * Check if MAC address has changed.
1969 	 */
1970 	if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) {
1971 		cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD",
1972 		    vswp->instance);
1973 		goto fail_reconf;
1974 	} else {
1975 		uint64_t maddr = macaddr;
1976 		READ_ENTER(&vswp->if_lockrw);
1977 		for (i = ETHERADDRL - 1; i >= 0; i--) {
1978 			if (vswp->if_addr.ether_addr_octet[i]
1979 			    != (macaddr & 0xFF)) {
1980 				D2(vswp, "%s: octet[%d] 0x%x != 0x%x",
1981 				    __func__, i,
1982 				    vswp->if_addr.ether_addr_octet[i],
1983 				    (macaddr & 0xFF));
1984 				updated |= MD_macaddr;
1985 				macaddr = maddr;
1986 				break;
1987 			}
1988 			macaddr >>= 8;
1989 		}
1990 		RW_EXIT(&vswp->if_lockrw);
1991 		if (updated & MD_macaddr) {
1992 			vsw_save_lmacaddr(vswp, macaddr);
1993 		}
1994 	}
1995 
1996 	/*
1997 	 * Check if switching modes have changed.
1998 	 */
1999 	if (vsw_get_md_smodes(vswp, mdp, node,
2000 	    new_smode, &smode_num)) {
2001 		cmn_err(CE_WARN, "!vsw%d: Unable to read %s property from MD",
2002 		    vswp->instance, smode_propname);
2003 		goto fail_reconf;
2004 	} else {
2005 		ASSERT(smode_num != 0);
2006 		if (smode_num != vswp->smode_num) {
2007 			D2(vswp, "%s: number of modes changed from %d to %d",
2008 			    __func__, vswp->smode_num, smode_num);
2009 		}
2010 
2011 		for (i = 0; i < smode_num; i++) {
2012 			if (new_smode[i] != vswp->smode[i]) {
2013 				D2(vswp, "%s: mode changed from %d to %d",
2014 				    __func__, vswp->smode[i], new_smode[i]);
2015 				updated |= MD_smode;
2016 				break;
2017 			}
2018 		}
2019 	}
2020 
2021 	/* Read the vlan ids */
2022 	vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &pvid, &vids,
2023 	    &nvids, NULL);
2024 
2025 	/* Determine if there are any vlan id updates */
2026 	if ((pvid != vswp->pvid) ||		/* pvid changed? */
2027 	    (nvids != vswp->nvids) ||		/* # of vids changed? */
2028 	    ((nvids != 0) && (vswp->nvids != 0) &&	/* vids changed? */
2029 	    bcmp(vids, vswp->vids, sizeof (uint16_t) * nvids))) {
2030 		updated |= MD_vlans;
2031 	}
2032 
2033 	/*
2034 	 * Now make any changes which are needed...
2035 	 */
2036 
2037 	if (updated & (MD_physname | MD_smode)) {
2038 
2039 		/*
2040 		 * Stop any pending timeout to setup switching mode.
2041 		 */
2042 		vsw_stop_switching_timeout(vswp);
2043 
2044 		/* Cleanup HybridIO */
2045 		vsw_hio_cleanup(vswp);
2046 
2047 		/*
2048 		 * Remove unicst, mcst addrs of vsw interface
2049 		 * and ports from the physdev.
2050 		 */
2051 		vsw_unset_addrs(vswp);
2052 
2053 		/*
2054 		 * Stop, detach and close the old device..
2055 		 */
2056 		mutex_enter(&vswp->mac_lock);
2057 
2058 		vsw_mac_detach(vswp);
2059 		vsw_mac_close(vswp);
2060 
2061 		mutex_exit(&vswp->mac_lock);
2062 
2063 		/*
2064 		 * Update phys name.
2065 		 */
2066 		if (updated & MD_physname) {
2067 			cmn_err(CE_NOTE, "!vsw%d: changing from %s to %s",
2068 			    vswp->instance, vswp->physname, physname);
2069 			(void) strncpy(vswp->physname,
2070 			    physname, strlen(physname) + 1);
2071 		}
2072 
2073 		/*
2074 		 * Update array with the new switch mode values.
2075 		 */
2076 		if (updated & MD_smode) {
2077 			for (i = 0; i < smode_num; i++)
2078 				vswp->smode[i] = new_smode[i];
2079 
2080 			vswp->smode_num = smode_num;
2081 			vswp->smode_idx = 0;
2082 		}
2083 
2084 		/*
2085 		 * ..and attach, start the new device.
2086 		 */
2087 		rv = vsw_setup_switching(vswp);
2088 		if (rv == EAGAIN) {
2089 			/*
2090 			 * Unable to setup switching mode.
2091 			 * As the error is EAGAIN, schedule a timeout to retry
2092 			 * and return. Programming addresses of ports and
2093 			 * vsw interface will be done when the timeout handler
2094 			 * completes successfully.
2095 			 */
2096 			mutex_enter(&vswp->swtmout_lock);
2097 
2098 			vswp->swtmout_enabled = B_TRUE;
2099 			vswp->swtmout_id =
2100 			    timeout(vsw_setup_switching_timeout, vswp,
2101 			    (vsw_setup_switching_delay *
2102 			    drv_usectohz(MICROSEC)));
2103 
2104 			mutex_exit(&vswp->swtmout_lock);
2105 
2106 			return;
2107 
2108 		} else if (rv) {
2109 			goto fail_update;
2110 		}
2111 
2112 		/*
2113 		 * program unicst, mcst addrs of vsw interface
2114 		 * and ports in the physdev.
2115 		 */
2116 		vsw_set_addrs(vswp);
2117 
2118 		/* Start HIO for ports that have already connected */
2119 		vsw_hio_start_ports(vswp);
2120 
2121 	} else if (updated & MD_macaddr) {
2122 		/*
2123 		 * We enter here if only MD_macaddr is exclusively updated.
2124 		 * If MD_physname and/or MD_smode are also updated, then
2125 		 * as part of that, we would have implicitly processed
2126 		 * MD_macaddr update (above).
2127 		 */
2128 		cmn_err(CE_NOTE, "!vsw%d: changing mac address to 0x%lx",
2129 		    vswp->instance, macaddr);
2130 
2131 		READ_ENTER(&vswp->if_lockrw);
2132 		if (vswp->if_state & VSW_IF_UP) {
2133 
2134 			mutex_enter(&vswp->hw_lock);
2135 			/*
2136 			 * Remove old mac address of vsw interface
2137 			 * from the physdev
2138 			 */
2139 			(void) vsw_unset_hw(vswp, NULL, VSW_LOCALDEV);
2140 			/*
2141 			 * Program new mac address of vsw interface
2142 			 * in the physdev
2143 			 */
2144 			rv = vsw_set_hw(vswp, NULL, VSW_LOCALDEV);
2145 			mutex_exit(&vswp->hw_lock);
2146 			if (rv != 0) {
2147 				cmn_err(CE_NOTE,
2148 				    "!vsw%d: failed to program interface "
2149 				    "unicast address\n", vswp->instance);
2150 			}
2151 			/*
2152 			 * Notify the MAC layer of the changed address.
2153 			 */
2154 			mac_unicst_update(vswp->if_mh,
2155 			    (uint8_t *)&vswp->if_addr);
2156 
2157 		}
2158 		RW_EXIT(&vswp->if_lockrw);
2159 
2160 	}
2161 
2162 	if (updated & MD_vlans) {
2163 		/* Remove existing vlan ids from the hash table. */
2164 		vsw_vlan_remove_ids(vswp, VSW_LOCALDEV);
2165 
2166 		/* save the new vlan ids */
2167 		vswp->pvid = pvid;
2168 		if (vswp->nvids != 0) {
2169 			kmem_free(vswp->vids, sizeof (uint16_t) * vswp->nvids);
2170 			vswp->nvids = 0;
2171 		}
2172 		if (nvids != 0) {
2173 			vswp->nvids = nvids;
2174 			vswp->vids = vids;
2175 		}
2176 
2177 		/* add these new vlan ids into hash table */
2178 		vsw_vlan_add_ids(vswp, VSW_LOCALDEV);
2179 	} else {
2180 		if (nvids != 0) {
2181 			kmem_free(vids, sizeof (uint16_t) * nvids);
2182 		}
2183 	}
2184 
2185 	return;
2186 
2187 fail_reconf:
2188 	cmn_err(CE_WARN, "!vsw%d: configuration unchanged", vswp->instance);
2189 	return;
2190 
2191 fail_update:
2192 	cmn_err(CE_WARN, "!vsw%d: re-configuration failed",
2193 	    vswp->instance);
2194 }
2195 
2196 /*
2197  * Read the port's md properties.
2198  */
2199 static int
2200 vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp,
2201 	md_t *mdp, mde_cookie_t *node)
2202 {
2203 	uint64_t		ldc_id;
2204 	uint8_t			*addrp;
2205 	int			i, addrsz;
2206 	int			num_nodes = 0, nchan = 0;
2207 	int			listsz = 0;
2208 	mde_cookie_t		*listp = NULL;
2209 	struct ether_addr	ea;
2210 	uint64_t		macaddr;
2211 	uint64_t		inst = 0;
2212 	uint64_t		val;
2213 
2214 	if (md_get_prop_val(mdp, *node, id_propname, &inst)) {
2215 		DWARN(vswp, "%s: prop(%s) not found", __func__,
2216 		    id_propname);
2217 		return (1);
2218 	}
2219 
2220 	/*
2221 	 * Find the channel endpoint node(s) (which should be under this
2222 	 * port node) which contain the channel id(s).
2223 	 */
2224 	if ((num_nodes = md_node_count(mdp)) <= 0) {
2225 		DERR(vswp, "%s: invalid number of nodes found (%d)",
2226 		    __func__, num_nodes);
2227 		return (1);
2228 	}
2229 
2230 	D2(vswp, "%s: %d nodes found", __func__, num_nodes);
2231 
2232 	/* allocate enough space for node list */
2233 	listsz = num_nodes * sizeof (mde_cookie_t);
2234 	listp = kmem_zalloc(listsz, KM_SLEEP);
2235 
2236 	nchan = md_scan_dag(mdp, *node, md_find_name(mdp, chan_propname),
2237 	    md_find_name(mdp, "fwd"), listp);
2238 
2239 	if (nchan <= 0) {
2240 		DWARN(vswp, "%s: no %s nodes found", __func__, chan_propname);
2241 		kmem_free(listp, listsz);
2242 		return (1);
2243 	}
2244 
2245 	D2(vswp, "%s: %d %s nodes found", __func__, nchan, chan_propname);
2246 
2247 	/* use property from first node found */
2248 	if (md_get_prop_val(mdp, listp[0], id_propname, &ldc_id)) {
2249 		DWARN(vswp, "%s: prop(%s) not found\n", __func__,
2250 		    id_propname);
2251 		kmem_free(listp, listsz);
2252 		return (1);
2253 	}
2254 
2255 	/* don't need list any more */
2256 	kmem_free(listp, listsz);
2257 
2258 	D2(vswp, "%s: ldc_id 0x%llx", __func__, ldc_id);
2259 
2260 	/* read mac-address property */
2261 	if (md_get_prop_data(mdp, *node, remaddr_propname,
2262 	    &addrp, &addrsz)) {
2263 		DWARN(vswp, "%s: prop(%s) not found",
2264 		    __func__, remaddr_propname);
2265 		return (1);
2266 	}
2267 
2268 	if (addrsz < ETHERADDRL) {
2269 		DWARN(vswp, "%s: invalid address size", __func__);
2270 		return (1);
2271 	}
2272 
2273 	macaddr = *((uint64_t *)addrp);
2274 	D2(vswp, "%s: remote mac address 0x%llx", __func__, macaddr);
2275 
2276 	for (i = ETHERADDRL - 1; i >= 0; i--) {
2277 		ea.ether_addr_octet[i] = macaddr & 0xFF;
2278 		macaddr >>= 8;
2279 	}
2280 
2281 	/* now update all properties into the port */
2282 	portp->p_vswp = vswp;
2283 	portp->p_instance = inst;
2284 	portp->addr_set = VSW_ADDR_UNSET;
2285 	ether_copy(&ea, &portp->p_macaddr);
2286 	if (nchan > VSW_PORT_MAX_LDCS) {
2287 		D2(vswp, "%s: using first of %d ldc ids",
2288 		    __func__, nchan);
2289 		nchan = VSW_PORT_MAX_LDCS;
2290 	}
2291 	portp->num_ldcs = nchan;
2292 	portp->ldc_ids =
2293 	    kmem_zalloc(sizeof (uint64_t) * nchan, KM_SLEEP);
2294 	bcopy(&ldc_id, (portp->ldc_ids), sizeof (uint64_t) * nchan);
2295 
2296 	/* read vlan id properties of this port node */
2297 	vsw_vlan_read_ids(portp, VSW_VNETPORT, mdp, *node, &portp->pvid,
2298 	    &portp->vids, &portp->nvids, NULL);
2299 
2300 	/* Check if hybrid property is present */
2301 	if (md_get_prop_val(mdp, *node, hybrid_propname, &val) == 0) {
2302 		D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname);
2303 		portp->p_hio_enabled = B_TRUE;
2304 	} else {
2305 		portp->p_hio_enabled = B_FALSE;
2306 	}
2307 	/*
2308 	 * Port hio capability determined after version
2309 	 * negotiation, i.e., when we know the peer is HybridIO capable.
2310 	 */
2311 	portp->p_hio_capable = B_FALSE;
2312 	return (0);
2313 }
2314 
2315 /*
2316  * Add a new port to the system.
2317  *
2318  * Returns 0 on success, 1 on failure.
2319  */
2320 int
2321 vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node)
2322 {
2323 	vsw_port_t	*portp;
2324 	int		rv;
2325 
2326 	portp = kmem_zalloc(sizeof (vsw_port_t), KM_SLEEP);
2327 
2328 	rv = vsw_port_read_props(portp, vswp, mdp, node);
2329 	if (rv != 0) {
2330 		kmem_free(portp, sizeof (*portp));
2331 		return (1);
2332 	}
2333 
2334 	rv = vsw_port_attach(portp);
2335 	if (rv != 0) {
2336 		DERR(vswp, "%s: failed to attach port", __func__);
2337 		return (1);
2338 	}
2339 
2340 	return (0);
2341 }
2342 
2343 static int
2344 vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex,
2345 	md_t *prev_mdp, mde_cookie_t prev_mdex)
2346 {
2347 	uint64_t	cport_num;
2348 	uint64_t	pport_num;
2349 	vsw_port_list_t	*plistp;
2350 	vsw_port_t	*portp;
2351 	boolean_t	updated_vlans = B_FALSE;
2352 	uint16_t	pvid;
2353 	uint16_t	*vids;
2354 	uint16_t	nvids;
2355 	uint64_t	val;
2356 	boolean_t	hio_enabled = B_FALSE;
2357 
2358 	/*
2359 	 * For now, we get port updates only if vlan ids changed.
2360 	 * We read the port num and do some sanity check.
2361 	 */
2362 	if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) {
2363 		return (1);
2364 	}
2365 
2366 	if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) {
2367 		return (1);
2368 	}
2369 	if (cport_num != pport_num)
2370 		return (1);
2371 
2372 	plistp = &(vswp->plist);
2373 
2374 	READ_ENTER(&plistp->lockrw);
2375 
2376 	portp = vsw_lookup_port(vswp, cport_num);
2377 	if (portp == NULL) {
2378 		RW_EXIT(&plistp->lockrw);
2379 		return (1);
2380 	}
2381 
2382 	/* Read the vlan ids */
2383 	vsw_vlan_read_ids(portp, VSW_VNETPORT, curr_mdp, curr_mdex, &pvid,
2384 	    &vids, &nvids, NULL);
2385 
2386 	/* Determine if there are any vlan id updates */
2387 	if ((pvid != portp->pvid) ||		/* pvid changed? */
2388 	    (nvids != portp->nvids) ||		/* # of vids changed? */
2389 	    ((nvids != 0) && (portp->nvids != 0) &&	/* vids changed? */
2390 	    bcmp(vids, portp->vids, sizeof (uint16_t) * nvids))) {
2391 		updated_vlans = B_TRUE;
2392 	}
2393 
2394 	if (updated_vlans == B_TRUE) {
2395 
2396 		/* Remove existing vlan ids from the hash table. */
2397 		vsw_vlan_remove_ids(portp, VSW_VNETPORT);
2398 
2399 		/* save the new vlan ids */
2400 		portp->pvid = pvid;
2401 		if (portp->nvids != 0) {
2402 			kmem_free(portp->vids,
2403 			    sizeof (uint16_t) * portp->nvids);
2404 			portp->nvids = 0;
2405 		}
2406 		if (nvids != 0) {
2407 			portp->vids = kmem_zalloc(sizeof (uint16_t) *
2408 			    nvids, KM_SLEEP);
2409 			bcopy(vids, portp->vids, sizeof (uint16_t) * nvids);
2410 			portp->nvids = nvids;
2411 			kmem_free(vids, sizeof (uint16_t) * nvids);
2412 		}
2413 
2414 		/* add these new vlan ids into hash table */
2415 		vsw_vlan_add_ids(portp, VSW_VNETPORT);
2416 
2417 		/* reset the port if it is vlan unaware (ver < 1.3) */
2418 		vsw_vlan_unaware_port_reset(portp);
2419 	}
2420 
2421 	/* Check if hybrid property is present */
2422 	if (md_get_prop_val(curr_mdp, curr_mdex, hybrid_propname, &val) == 0) {
2423 		D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname);
2424 		hio_enabled = B_TRUE;
2425 	}
2426 
2427 	if (portp->p_hio_enabled != hio_enabled) {
2428 		vsw_hio_port_update(portp, hio_enabled);
2429 	}
2430 
2431 	RW_EXIT(&plistp->lockrw);
2432 
2433 	return (0);
2434 }
2435 
2436 /*
2437  * vsw_mac_rx -- A common function to send packets to the interface.
2438  * By default this function check if the interface is UP or not, the
2439  * rest of the behaviour depends on the flags as below:
2440  *
2441  *	VSW_MACRX_PROMISC -- Check if the promisc mode set or not.
2442  *	VSW_MACRX_COPYMSG -- Make a copy of the message(s).
2443  *	VSW_MACRX_FREEMSG -- Free if the messages cannot be sent up the stack.
2444  */
2445 void
2446 vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh,
2447     mblk_t *mp, vsw_macrx_flags_t flags)
2448 {
2449 	mblk_t		*mpt;
2450 
2451 	D1(vswp, "%s:enter\n", __func__);
2452 	READ_ENTER(&vswp->if_lockrw);
2453 	/* Check if the interface is up */
2454 	if (!(vswp->if_state & VSW_IF_UP)) {
2455 		RW_EXIT(&vswp->if_lockrw);
2456 		/* Free messages only if FREEMSG flag specified */
2457 		if (flags & VSW_MACRX_FREEMSG) {
2458 			freemsgchain(mp);
2459 		}
2460 		D1(vswp, "%s:exit\n", __func__);
2461 		return;
2462 	}
2463 	/*
2464 	 * If PROMISC flag is passed, then check if
2465 	 * the interface is in the PROMISC mode.
2466 	 * If not, drop the messages.
2467 	 */
2468 	if (flags & VSW_MACRX_PROMISC) {
2469 		if (!(vswp->if_state & VSW_IF_PROMISC)) {
2470 			RW_EXIT(&vswp->if_lockrw);
2471 			/* Free messages only if FREEMSG flag specified */
2472 			if (flags & VSW_MACRX_FREEMSG) {
2473 				freemsgchain(mp);
2474 			}
2475 			D1(vswp, "%s:exit\n", __func__);
2476 			return;
2477 		}
2478 	}
2479 	RW_EXIT(&vswp->if_lockrw);
2480 	/*
2481 	 * If COPYMSG flag is passed, then make a copy
2482 	 * of the message chain and send up the copy.
2483 	 */
2484 	if (flags & VSW_MACRX_COPYMSG) {
2485 		mp = copymsgchain(mp);
2486 		if (mp == NULL) {
2487 			D1(vswp, "%s:exit\n", __func__);
2488 			return;
2489 		}
2490 	}
2491 
2492 	D2(vswp, "%s: sending up stack", __func__);
2493 
2494 	mpt = NULL;
2495 	(void) vsw_vlan_frame_untag(vswp, VSW_LOCALDEV, &mp, &mpt);
2496 	if (mp != NULL) {
2497 		mac_rx(vswp->if_mh, mrh, mp);
2498 	}
2499 	D1(vswp, "%s:exit\n", __func__);
2500 }
2501 
2502 /* copy mac address of vsw into soft state structure */
2503 static void
2504 vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr)
2505 {
2506 	int	i;
2507 
2508 	WRITE_ENTER(&vswp->if_lockrw);
2509 	for (i = ETHERADDRL - 1; i >= 0; i--) {
2510 		vswp->if_addr.ether_addr_octet[i] = macaddr & 0xFF;
2511 		macaddr >>= 8;
2512 	}
2513 	RW_EXIT(&vswp->if_lockrw);
2514 }
2515