xref: /titanic_41/usr/src/uts/sun4v/io/vsw.c (revision c94146ce6d2a316eca6450a8cd77d8dee993f233)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/errno.h>
31 #include <sys/debug.h>
32 #include <sys/time.h>
33 #include <sys/sysmacros.h>
34 #include <sys/systm.h>
35 #include <sys/user.h>
36 #include <sys/stropts.h>
37 #include <sys/stream.h>
38 #include <sys/strlog.h>
39 #include <sys/strsubr.h>
40 #include <sys/cmn_err.h>
41 #include <sys/cpu.h>
42 #include <sys/kmem.h>
43 #include <sys/conf.h>
44 #include <sys/ddi.h>
45 #include <sys/sunddi.h>
46 #include <sys/ksynch.h>
47 #include <sys/stat.h>
48 #include <sys/kstat.h>
49 #include <sys/vtrace.h>
50 #include <sys/strsun.h>
51 #include <sys/dlpi.h>
52 #include <sys/ethernet.h>
53 #include <net/if.h>
54 #include <sys/varargs.h>
55 #include <sys/machsystm.h>
56 #include <sys/modctl.h>
57 #include <sys/modhash.h>
58 #include <sys/mac.h>
59 #include <sys/mac_ether.h>
60 #include <sys/taskq.h>
61 #include <sys/note.h>
62 #include <sys/mach_descrip.h>
63 #include <sys/mac.h>
64 #include <sys/mdeg.h>
65 #include <sys/ldc.h>
66 #include <sys/vsw_fdb.h>
67 #include <sys/vsw.h>
68 #include <sys/vio_mailbox.h>
69 #include <sys/vnet_mailbox.h>
70 #include <sys/vnet_common.h>
71 #include <sys/vio_util.h>
72 #include <sys/sdt.h>
73 #include <sys/atomic.h>
74 #include <sys/callb.h>
75 #include <sys/vlan.h>
76 
77 /*
78  * Function prototypes.
79  */
80 static	int vsw_attach(dev_info_t *, ddi_attach_cmd_t);
81 static	int vsw_detach(dev_info_t *, ddi_detach_cmd_t);
82 static	int vsw_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
83 static	int vsw_get_md_physname(vsw_t *, md_t *, mde_cookie_t, char *);
84 static	int vsw_get_md_smodes(vsw_t *, md_t *, mde_cookie_t, uint8_t *, int *);
85 
86 /* MDEG routines */
87 static	int vsw_mdeg_register(vsw_t *vswp);
88 static	void vsw_mdeg_unregister(vsw_t *vswp);
89 static	int vsw_mdeg_cb(void *cb_argp, mdeg_result_t *);
90 static	int vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *);
91 static	int vsw_get_initial_md_properties(vsw_t *vswp, md_t *, mde_cookie_t);
92 static	int vsw_read_mdprops(vsw_t *vswp);
93 static	void vsw_vlan_read_ids(void *arg, int type, md_t *mdp,
94 	mde_cookie_t node, uint16_t *pvidp, uint16_t **vidspp,
95 	uint16_t *nvidsp, uint16_t *default_idp);
96 static	int vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp,
97 	md_t *mdp, mde_cookie_t *node);
98 static	void vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp,
99 	mde_cookie_t node);
100 static	void vsw_update_md_prop(vsw_t *, md_t *, mde_cookie_t);
101 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr);
102 
103 /* Mac driver related routines */
104 static int vsw_mac_register(vsw_t *);
105 static int vsw_mac_unregister(vsw_t *);
106 static int vsw_m_stat(void *, uint_t, uint64_t *);
107 static void vsw_m_stop(void *arg);
108 static int vsw_m_start(void *arg);
109 static int vsw_m_unicst(void *arg, const uint8_t *);
110 static int vsw_m_multicst(void *arg, boolean_t, const uint8_t *);
111 static int vsw_m_promisc(void *arg, boolean_t);
112 static mblk_t *vsw_m_tx(void *arg, mblk_t *);
113 void vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh,
114     mblk_t *mp, vsw_macrx_flags_t flags);
115 
116 /*
117  * Functions imported from other files.
118  */
119 extern void vsw_setup_switching_timeout(void *arg);
120 extern void vsw_stop_switching_timeout(vsw_t *vswp);
121 extern int vsw_setup_switching(vsw_t *);
122 extern void vsw_switch_frame_nop(vsw_t *vswp, mblk_t *mp, int caller,
123     vsw_port_t *port, mac_resource_handle_t mrh);
124 extern int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *);
125 extern int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *);
126 extern void vsw_del_mcst_vsw(vsw_t *);
127 extern mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr);
128 extern int vsw_detach_ports(vsw_t *vswp);
129 extern int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node);
130 extern int vsw_port_detach(vsw_t *vswp, int p_instance);
131 static int vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex,
132 	md_t *prev_mdp, mde_cookie_t prev_mdex);
133 extern	int vsw_port_attach(vsw_port_t *port);
134 extern vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance);
135 extern int vsw_mac_attach(vsw_t *vswp);
136 extern void vsw_mac_detach(vsw_t *vswp);
137 extern int vsw_mac_open(vsw_t *vswp);
138 extern void vsw_mac_close(vsw_t *vswp);
139 extern int vsw_set_hw(vsw_t *, vsw_port_t *, int);
140 extern int vsw_unset_hw(vsw_t *, vsw_port_t *, int);
141 extern void vsw_reconfig_hw(vsw_t *);
142 extern void vsw_unset_addrs(vsw_t *vswp);
143 extern void vsw_set_addrs(vsw_t *vswp);
144 extern void vsw_create_vlans(void *arg, int type);
145 extern void vsw_destroy_vlans(void *arg, int type);
146 extern void vsw_vlan_add_ids(void *arg, int type);
147 extern void vsw_vlan_remove_ids(void *arg, int type);
148 extern void vsw_vlan_unaware_port_reset(vsw_port_t *portp);
149 extern uint32_t vsw_vlan_frame_untag(void *arg, int type, mblk_t **np,
150 	mblk_t **npt);
151 extern mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp);
152 extern void vsw_hio_cleanup(vsw_t *vswp);
153 extern void vsw_hio_start_ports(vsw_t *vswp);
154 void vsw_hio_port_update(vsw_port_t *portp, boolean_t hio_enabled);
155 
156 /*
157  * Internal tunables.
158  */
159 int	vsw_num_handshakes = VNET_NUM_HANDSHAKES; /* # of handshake attempts */
160 int	vsw_wretries = 100;		/* # of write attempts */
161 int	vsw_desc_delay = 0;		/* delay in us */
162 int	vsw_read_attempts = 5;		/* # of reads of descriptor */
163 int	vsw_mac_open_retries = 20;	/* max # of mac_open() retries */
164 int	vsw_setup_switching_delay = 3;	/* setup sw timeout interval in sec */
165 int	vsw_ldc_tx_delay = 5;		/* delay(ticks) for tx retries */
166 int	vsw_ldc_tx_retries = 10;	/* # of ldc tx retries */
167 boolean_t vsw_ldc_rxthr_enabled = B_TRUE;	/* LDC Rx thread enabled */
168 boolean_t vsw_ldc_txthr_enabled = B_TRUE;	/* LDC Tx thread enabled */
169 
170 uint32_t	vsw_fdb_nchains = 8;	/* # of chains in fdb hash table */
171 uint32_t	vsw_vlan_nchains = 4;	/* # of chains in vlan id hash table */
172 uint32_t	vsw_ethermtu = 1500;	/* mtu of the device */
173 
174 /* sw timeout for boot delay only, in milliseconds */
175 int vsw_setup_switching_boot_delay = 100 * MILLISEC;
176 
177 /* delay in usec to wait for all references on a fdb entry to be dropped */
178 uint32_t vsw_fdbe_refcnt_delay = 10;
179 
180 /*
181  * Default vlan id. This is only used internally when the "default-vlan-id"
182  * property is not present in the MD device node. Therefore, this should not be
183  * used as a tunable; if this value is changed, the corresponding variable
184  * should be updated to the same value in all vnets connected to this vsw.
185  */
186 uint16_t	vsw_default_vlan_id = 1;
187 
188 /*
189  * Workaround for a version handshake bug in obp's vnet.
190  * If vsw initiates version negotiation starting from the highest version,
191  * obp sends a nack and terminates version handshake. To workaround
192  * this, we do not initiate version handshake when the channel comes up.
193  * Instead, we wait for the peer to send its version info msg and go through
194  * the version protocol exchange. If we successfully negotiate a version,
195  * before sending the ack, we send our version info msg to the peer
196  * using the <major,minor> version that we are about to ack.
197  */
198 boolean_t vsw_obp_ver_proto_workaround = B_TRUE;
199 
200 /*
201  * In the absence of "priority-ether-types" property in MD, the following
202  * internal tunable can be set to specify a single priority ethertype.
203  */
204 uint64_t vsw_pri_eth_type = 0;
205 
206 /*
207  * Number of transmit priority buffers that are preallocated per device.
208  * This number is chosen to be a small value to throttle transmission
209  * of priority packets. Note: Must be a power of 2 for vio_create_mblks().
210  */
211 uint32_t vsw_pri_tx_nmblks = 64;
212 
213 boolean_t vsw_hio_enabled = B_TRUE;	/* Enable/disable HybridIO */
214 int vsw_hio_max_cleanup_retries = 10;	/* Max retries for HybridIO cleanp */
215 int vsw_hio_cleanup_delay = 10000;	/* 10ms */
216 
217 /*
218  * External tunables.
219  */
220 /*
221  * Enable/disable thread per ring. This is a mode selection
222  * that is done a vsw driver attach time.
223  */
224 boolean_t vsw_multi_ring_enable = B_FALSE;
225 int vsw_mac_rx_rings = VSW_MAC_RX_RINGS;
226 
227 /* Number of transmit descriptors -  must be power of 2 */
228 uint32_t vsw_ntxds = VSW_RING_NUM_EL;
229 
230 /*
231  * Max number of mblks received in one receive operation.
232  */
233 uint32_t vsw_chain_len = (VSW_NUM_MBLKS * 0.6);
234 
235 /*
236  * Tunables for three different pools, that is, the size and
237  * number of mblks for each pool.
238  */
239 uint32_t vsw_mblk_size1 = VSW_MBLK_SZ_128;	/* size=128 for pool1 */
240 uint32_t vsw_mblk_size2 = VSW_MBLK_SZ_256;	/* size=256 for pool2 */
241 uint32_t vsw_mblk_size3 = VSW_MBLK_SZ_2048;	/* size=2048 for pool3 */
242 uint32_t vsw_num_mblks1 = VSW_NUM_MBLKS;	/* number of mblks for pool1 */
243 uint32_t vsw_num_mblks2 = VSW_NUM_MBLKS;	/* number of mblks for pool2 */
244 uint32_t vsw_num_mblks3 = VSW_NUM_MBLKS;	/* number of mblks for pool3 */
245 
246 /*
247  * vsw_max_tx_qcount is the maximum # of packets that can be queued
248  * before the tx worker thread begins processing the queue. Its value
249  * is chosen to be 4x the default length of tx descriptor ring.
250  */
251 uint32_t vsw_max_tx_qcount = 4 * VSW_RING_NUM_EL;
252 
253 /*
254  * MAC callbacks
255  */
256 static	mac_callbacks_t	vsw_m_callbacks = {
257 	0,
258 	vsw_m_stat,
259 	vsw_m_start,
260 	vsw_m_stop,
261 	vsw_m_promisc,
262 	vsw_m_multicst,
263 	vsw_m_unicst,
264 	vsw_m_tx,
265 	NULL,
266 	NULL,
267 	NULL
268 };
269 
270 static	struct	cb_ops	vsw_cb_ops = {
271 	nulldev,			/* cb_open */
272 	nulldev,			/* cb_close */
273 	nodev,				/* cb_strategy */
274 	nodev,				/* cb_print */
275 	nodev,				/* cb_dump */
276 	nodev,				/* cb_read */
277 	nodev,				/* cb_write */
278 	nodev,				/* cb_ioctl */
279 	nodev,				/* cb_devmap */
280 	nodev,				/* cb_mmap */
281 	nodev,				/* cb_segmap */
282 	nochpoll,			/* cb_chpoll */
283 	ddi_prop_op,			/* cb_prop_op */
284 	NULL,				/* cb_stream */
285 	D_MP,				/* cb_flag */
286 	CB_REV,				/* rev */
287 	nodev,				/* int (*cb_aread)() */
288 	nodev				/* int (*cb_awrite)() */
289 };
290 
291 static	struct	dev_ops	vsw_ops = {
292 	DEVO_REV,		/* devo_rev */
293 	0,			/* devo_refcnt */
294 	vsw_getinfo,		/* devo_getinfo */
295 	nulldev,		/* devo_identify */
296 	nulldev,		/* devo_probe */
297 	vsw_attach,		/* devo_attach */
298 	vsw_detach,		/* devo_detach */
299 	nodev,			/* devo_reset */
300 	&vsw_cb_ops,		/* devo_cb_ops */
301 	(struct bus_ops *)NULL,	/* devo_bus_ops */
302 	ddi_power		/* devo_power */
303 };
304 
305 extern	struct	mod_ops	mod_driverops;
306 static struct modldrv vswmodldrv = {
307 	&mod_driverops,
308 	"sun4v Virtual Switch",
309 	&vsw_ops,
310 };
311 
312 #define	LDC_ENTER_LOCK(ldcp)	\
313 				mutex_enter(&((ldcp)->ldc_cblock));\
314 				mutex_enter(&((ldcp)->ldc_rxlock));\
315 				mutex_enter(&((ldcp)->ldc_txlock));
316 #define	LDC_EXIT_LOCK(ldcp)	\
317 				mutex_exit(&((ldcp)->ldc_txlock));\
318 				mutex_exit(&((ldcp)->ldc_rxlock));\
319 				mutex_exit(&((ldcp)->ldc_cblock));
320 
321 /* Driver soft state ptr  */
322 static void	*vsw_state;
323 
324 /*
325  * Linked list of "vsw_t" structures - one per instance.
326  */
327 vsw_t		*vsw_head = NULL;
328 krwlock_t	vsw_rw;
329 
330 /*
331  * Property names
332  */
333 static char vdev_propname[] = "virtual-device";
334 static char vsw_propname[] = "virtual-network-switch";
335 static char physdev_propname[] = "vsw-phys-dev";
336 static char smode_propname[] = "vsw-switch-mode";
337 static char macaddr_propname[] = "local-mac-address";
338 static char remaddr_propname[] = "remote-mac-address";
339 static char ldcids_propname[] = "ldc-ids";
340 static char chan_propname[] = "channel-endpoint";
341 static char id_propname[] = "id";
342 static char reg_propname[] = "reg";
343 static char pri_types_propname[] = "priority-ether-types";
344 static char vsw_pvid_propname[] = "port-vlan-id";
345 static char vsw_vid_propname[] = "vlan-id";
346 static char vsw_dvid_propname[] = "default-vlan-id";
347 static char port_pvid_propname[] = "remote-port-vlan-id";
348 static char port_vid_propname[] = "remote-vlan-id";
349 static char hybrid_propname[] = "hybrid";
350 
351 /*
352  * Matching criteria passed to the MDEG to register interest
353  * in changes to 'virtual-device-port' nodes identified by their
354  * 'id' property.
355  */
356 static md_prop_match_t vport_prop_match[] = {
357 	{ MDET_PROP_VAL,    "id"   },
358 	{ MDET_LIST_END,    NULL    }
359 };
360 
361 static mdeg_node_match_t vport_match = { "virtual-device-port",
362 						vport_prop_match };
363 
364 /*
365  * Matching criteria passed to the MDEG to register interest
366  * in changes to 'virtual-device' nodes (i.e. vsw nodes) identified
367  * by their 'name' and 'cfg-handle' properties.
368  */
369 static md_prop_match_t vdev_prop_match[] = {
370 	{ MDET_PROP_STR,    "name"   },
371 	{ MDET_PROP_VAL,    "cfg-handle" },
372 	{ MDET_LIST_END,    NULL    }
373 };
374 
375 static mdeg_node_match_t vdev_match = { "virtual-device",
376 						vdev_prop_match };
377 
378 
379 /*
380  * Specification of an MD node passed to the MDEG to filter any
381  * 'vport' nodes that do not belong to the specified node. This
382  * template is copied for each vsw instance and filled in with
383  * the appropriate 'cfg-handle' value before being passed to the MDEG.
384  */
385 static mdeg_prop_spec_t vsw_prop_template[] = {
386 	{ MDET_PROP_STR,    "name",		vsw_propname },
387 	{ MDET_PROP_VAL,    "cfg-handle",	NULL	},
388 	{ MDET_LIST_END,    NULL,		NULL	}
389 };
390 
391 #define	VSW_SET_MDEG_PROP_INST(specp, val)	(specp)[1].ps_val = (val);
392 
393 #ifdef	DEBUG
394 /*
395  * Print debug messages - set to 0x1f to enable all msgs
396  * or 0x0 to turn all off.
397  */
398 int vswdbg = 0x0;
399 
400 /*
401  * debug levels:
402  * 0x01:	Function entry/exit tracing
403  * 0x02:	Internal function messages
404  * 0x04:	Verbose internal messages
405  * 0x08:	Warning messages
406  * 0x10:	Error messages
407  */
408 
409 void
410 vswdebug(vsw_t *vswp, const char *fmt, ...)
411 {
412 	char buf[512];
413 	va_list ap;
414 
415 	va_start(ap, fmt);
416 	(void) vsprintf(buf, fmt, ap);
417 	va_end(ap);
418 
419 	if (vswp == NULL)
420 		cmn_err(CE_CONT, "%s\n", buf);
421 	else
422 		cmn_err(CE_CONT, "vsw%d: %s\n", vswp->instance, buf);
423 }
424 
425 #endif	/* DEBUG */
426 
427 static struct modlinkage modlinkage = {
428 	MODREV_1,
429 	&vswmodldrv,
430 	NULL
431 };
432 
433 int
434 _init(void)
435 {
436 	int status;
437 
438 	rw_init(&vsw_rw, NULL, RW_DRIVER, NULL);
439 
440 	status = ddi_soft_state_init(&vsw_state, sizeof (vsw_t), 1);
441 	if (status != 0) {
442 		return (status);
443 	}
444 
445 	mac_init_ops(&vsw_ops, DRV_NAME);
446 	status = mod_install(&modlinkage);
447 	if (status != 0) {
448 		ddi_soft_state_fini(&vsw_state);
449 	}
450 	return (status);
451 }
452 
453 int
454 _fini(void)
455 {
456 	int status;
457 
458 	status = mod_remove(&modlinkage);
459 	if (status != 0)
460 		return (status);
461 	mac_fini_ops(&vsw_ops);
462 	ddi_soft_state_fini(&vsw_state);
463 
464 	rw_destroy(&vsw_rw);
465 
466 	return (status);
467 }
468 
469 int
470 _info(struct modinfo *modinfop)
471 {
472 	return (mod_info(&modlinkage, modinfop));
473 }
474 
475 static int
476 vsw_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
477 {
478 	vsw_t		*vswp;
479 	int		instance;
480 	char		hashname[MAXNAMELEN];
481 	char		qname[TASKQ_NAMELEN];
482 	enum		{ PROG_init = 0x00,
483 				PROG_locks = 0x01,
484 				PROG_readmd = 0x02,
485 				PROG_fdb = 0x04,
486 				PROG_mfdb = 0x08,
487 				PROG_taskq = 0x10,
488 				PROG_swmode = 0x20,
489 				PROG_macreg = 0x40,
490 				PROG_mdreg = 0x80}
491 			progress;
492 
493 	progress = PROG_init;
494 	int		rv;
495 
496 	switch (cmd) {
497 	case DDI_ATTACH:
498 		break;
499 	case DDI_RESUME:
500 		/* nothing to do for this non-device */
501 		return (DDI_SUCCESS);
502 	case DDI_PM_RESUME:
503 	default:
504 		return (DDI_FAILURE);
505 	}
506 
507 	instance = ddi_get_instance(dip);
508 	if (ddi_soft_state_zalloc(vsw_state, instance) != DDI_SUCCESS) {
509 		DERR(NULL, "vsw%d: ddi_soft_state_zalloc failed", instance);
510 		return (DDI_FAILURE);
511 	}
512 	vswp = ddi_get_soft_state(vsw_state, instance);
513 
514 	if (vswp == NULL) {
515 		DERR(NULL, "vsw%d: ddi_get_soft_state failed", instance);
516 		goto vsw_attach_fail;
517 	}
518 
519 	vswp->dip = dip;
520 	vswp->instance = instance;
521 	ddi_set_driver_private(dip, (caddr_t)vswp);
522 
523 	mutex_init(&vswp->hw_lock, NULL, MUTEX_DRIVER, NULL);
524 	mutex_init(&vswp->mac_lock, NULL, MUTEX_DRIVER, NULL);
525 	mutex_init(&vswp->mca_lock, NULL, MUTEX_DRIVER, NULL);
526 	mutex_init(&vswp->swtmout_lock, NULL, MUTEX_DRIVER, NULL);
527 	rw_init(&vswp->if_lockrw, NULL, RW_DRIVER, NULL);
528 	rw_init(&vswp->mfdbrw, NULL, RW_DRIVER, NULL);
529 	rw_init(&vswp->plist.lockrw, NULL, RW_DRIVER, NULL);
530 
531 	progress |= PROG_locks;
532 
533 	rv = vsw_read_mdprops(vswp);
534 	if (rv != 0)
535 		goto vsw_attach_fail;
536 
537 	progress |= PROG_readmd;
538 
539 	/* setup the unicast forwarding database  */
540 	(void) snprintf(hashname, MAXNAMELEN, "vsw_unicst_table-%d",
541 	    vswp->instance);
542 	D2(vswp, "creating unicast hash table (%s)...", hashname);
543 	vswp->fdb_nchains = vsw_fdb_nchains;
544 	vswp->fdb_hashp = mod_hash_create_ptrhash(hashname, vswp->fdb_nchains,
545 	    mod_hash_null_valdtor, sizeof (void *));
546 	vsw_create_vlans((void *)vswp, VSW_LOCALDEV);
547 	progress |= PROG_fdb;
548 
549 	/* setup the multicast fowarding database */
550 	(void) snprintf(hashname, MAXNAMELEN, "vsw_mcst_table-%d",
551 	    vswp->instance);
552 	D2(vswp, "creating multicast hash table %s)...", hashname);
553 	vswp->mfdb = mod_hash_create_ptrhash(hashname, vsw_fdb_nchains,
554 	    mod_hash_null_valdtor, sizeof (void *));
555 
556 	progress |= PROG_mfdb;
557 
558 	/*
559 	 * Create the taskq which will process all the VIO
560 	 * control messages.
561 	 */
562 	(void) snprintf(qname, TASKQ_NAMELEN, "vsw_taskq%d", vswp->instance);
563 	if ((vswp->taskq_p = ddi_taskq_create(vswp->dip, qname, 1,
564 	    TASKQ_DEFAULTPRI, 0)) == NULL) {
565 		cmn_err(CE_WARN, "!vsw%d: Unable to create task queue",
566 		    vswp->instance);
567 		goto vsw_attach_fail;
568 	}
569 
570 	progress |= PROG_taskq;
571 
572 	/* prevent auto-detaching */
573 	if (ddi_prop_update_int(DDI_DEV_T_NONE, vswp->dip,
574 	    DDI_NO_AUTODETACH, 1) != DDI_SUCCESS) {
575 		cmn_err(CE_NOTE, "!Unable to set \"%s\" property for "
576 		    "instance %u", DDI_NO_AUTODETACH, instance);
577 	}
578 
579 	/*
580 	 * The null switching function is set to avoid panic until
581 	 * switch mode is setup.
582 	 */
583 	vswp->vsw_switch_frame = vsw_switch_frame_nop;
584 
585 	/*
586 	 * Setup the required switching mode,
587 	 * based on the mdprops that we read earlier.
588 	 * schedule a short timeout (0.1 sec) for the first time
589 	 * setup and avoid calling mac_open() directly here,
590 	 * others are regular timeout 3 secs.
591 	 */
592 	mutex_enter(&vswp->swtmout_lock);
593 
594 	vswp->swtmout_enabled = B_TRUE;
595 	vswp->swtmout_id = timeout(vsw_setup_switching_timeout, vswp,
596 	    drv_usectohz(vsw_setup_switching_boot_delay));
597 
598 	mutex_exit(&vswp->swtmout_lock);
599 
600 	progress |= PROG_swmode;
601 
602 	/* Register with mac layer as a provider */
603 	rv = vsw_mac_register(vswp);
604 	if (rv != 0)
605 		goto vsw_attach_fail;
606 
607 	progress |= PROG_macreg;
608 
609 	/*
610 	 * Now we have everything setup, register an interest in
611 	 * specific MD nodes.
612 	 *
613 	 * The callback is invoked in 2 cases, firstly if upon mdeg
614 	 * registration there are existing nodes which match our specified
615 	 * criteria, and secondly if the MD is changed (and again, there
616 	 * are nodes which we are interested in present within it. Note
617 	 * that our callback will be invoked even if our specified nodes
618 	 * have not actually changed).
619 	 *
620 	 */
621 	rv = vsw_mdeg_register(vswp);
622 	if (rv != 0)
623 		goto vsw_attach_fail;
624 
625 	progress |= PROG_mdreg;
626 
627 	WRITE_ENTER(&vsw_rw);
628 	vswp->next = vsw_head;
629 	vsw_head = vswp;
630 	RW_EXIT(&vsw_rw);
631 
632 	ddi_report_dev(vswp->dip);
633 	return (DDI_SUCCESS);
634 
635 vsw_attach_fail:
636 	DERR(NULL, "vsw_attach: failed");
637 
638 	if (progress & PROG_mdreg) {
639 		vsw_mdeg_unregister(vswp);
640 		(void) vsw_detach_ports(vswp);
641 	}
642 
643 	if (progress & PROG_macreg)
644 		(void) vsw_mac_unregister(vswp);
645 
646 	if (progress & PROG_swmode) {
647 		vsw_stop_switching_timeout(vswp);
648 		vsw_hio_cleanup(vswp);
649 		mutex_enter(&vswp->mac_lock);
650 		vsw_mac_detach(vswp);
651 		vsw_mac_close(vswp);
652 		mutex_exit(&vswp->mac_lock);
653 	}
654 
655 	if (progress & PROG_taskq)
656 		ddi_taskq_destroy(vswp->taskq_p);
657 
658 	if (progress & PROG_mfdb)
659 		mod_hash_destroy_hash(vswp->mfdb);
660 
661 	if (progress & PROG_fdb) {
662 		vsw_destroy_vlans(vswp, VSW_LOCALDEV);
663 		mod_hash_destroy_hash(vswp->fdb_hashp);
664 	}
665 
666 	if (progress & PROG_readmd) {
667 		if (VSW_PRI_ETH_DEFINED(vswp)) {
668 			kmem_free(vswp->pri_types,
669 			    sizeof (uint16_t) * vswp->pri_num_types);
670 		}
671 		(void) vio_destroy_mblks(vswp->pri_tx_vmp);
672 	}
673 
674 	if (progress & PROG_locks) {
675 		rw_destroy(&vswp->plist.lockrw);
676 		rw_destroy(&vswp->mfdbrw);
677 		rw_destroy(&vswp->if_lockrw);
678 		mutex_destroy(&vswp->swtmout_lock);
679 		mutex_destroy(&vswp->mca_lock);
680 		mutex_destroy(&vswp->mac_lock);
681 		mutex_destroy(&vswp->hw_lock);
682 	}
683 
684 	ddi_soft_state_free(vsw_state, instance);
685 	return (DDI_FAILURE);
686 }
687 
688 static int
689 vsw_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
690 {
691 	vio_mblk_pool_t		*poolp, *npoolp;
692 	vsw_t			**vswpp, *vswp;
693 	int 			instance;
694 
695 	instance = ddi_get_instance(dip);
696 	vswp = ddi_get_soft_state(vsw_state, instance);
697 
698 	if (vswp == NULL) {
699 		return (DDI_FAILURE);
700 	}
701 
702 	switch (cmd) {
703 	case DDI_DETACH:
704 		break;
705 	case DDI_SUSPEND:
706 	case DDI_PM_SUSPEND:
707 	default:
708 		return (DDI_FAILURE);
709 	}
710 
711 	D2(vswp, "detaching instance %d", instance);
712 
713 	/* Stop any pending timeout to setup switching mode. */
714 	vsw_stop_switching_timeout(vswp);
715 
716 	if (vswp->if_state & VSW_IF_REG) {
717 		if (vsw_mac_unregister(vswp) != 0) {
718 			cmn_err(CE_WARN, "!vsw%d: Unable to detach from "
719 			    "MAC layer", vswp->instance);
720 			return (DDI_FAILURE);
721 		}
722 	}
723 
724 	vsw_mdeg_unregister(vswp);
725 
726 	/* remove mac layer callback */
727 	mutex_enter(&vswp->mac_lock);
728 	if ((vswp->mh != NULL) && (vswp->mrh != NULL)) {
729 		mac_rx_remove(vswp->mh, vswp->mrh, B_TRUE);
730 		vswp->mrh = NULL;
731 	}
732 	mutex_exit(&vswp->mac_lock);
733 
734 	if (vsw_detach_ports(vswp) != 0) {
735 		cmn_err(CE_WARN, "!vsw%d: Unable to unconfigure ports",
736 		    vswp->instance);
737 		return (DDI_FAILURE);
738 	}
739 
740 	rw_destroy(&vswp->if_lockrw);
741 
742 	/* cleanup HybridIO */
743 	vsw_hio_cleanup(vswp);
744 
745 	mutex_destroy(&vswp->hw_lock);
746 
747 	/*
748 	 * Now that the ports have been deleted, stop and close
749 	 * the physical device.
750 	 */
751 	mutex_enter(&vswp->mac_lock);
752 
753 	vsw_mac_detach(vswp);
754 	vsw_mac_close(vswp);
755 
756 	mutex_exit(&vswp->mac_lock);
757 
758 	mutex_destroy(&vswp->mac_lock);
759 	mutex_destroy(&vswp->swtmout_lock);
760 
761 	/*
762 	 * Destroy any free pools that may still exist.
763 	 */
764 	poolp = vswp->rxh;
765 	while (poolp != NULL) {
766 		npoolp = vswp->rxh = poolp->nextp;
767 		if (vio_destroy_mblks(poolp) != 0) {
768 			vswp->rxh = poolp;
769 			return (DDI_FAILURE);
770 		}
771 		poolp = npoolp;
772 	}
773 
774 	/*
775 	 * Remove this instance from any entries it may be on in
776 	 * the hash table by using the list of addresses maintained
777 	 * in the vsw_t structure.
778 	 */
779 	vsw_del_mcst_vsw(vswp);
780 
781 	vswp->mcap = NULL;
782 	mutex_destroy(&vswp->mca_lock);
783 
784 	/*
785 	 * By now any pending tasks have finished and the underlying
786 	 * ldc's have been destroyed, so its safe to delete the control
787 	 * message taskq.
788 	 */
789 	if (vswp->taskq_p != NULL)
790 		ddi_taskq_destroy(vswp->taskq_p);
791 
792 	/*
793 	 * At this stage all the data pointers in the hash table
794 	 * should be NULL, as all the ports have been removed and will
795 	 * have deleted themselves from the port lists which the data
796 	 * pointers point to. Hence we can destroy the table using the
797 	 * default destructors.
798 	 */
799 	D2(vswp, "vsw_detach: destroying hash tables..");
800 	vsw_destroy_vlans(vswp, VSW_LOCALDEV);
801 	mod_hash_destroy_hash(vswp->fdb_hashp);
802 	vswp->fdb_hashp = NULL;
803 
804 	WRITE_ENTER(&vswp->mfdbrw);
805 	mod_hash_destroy_hash(vswp->mfdb);
806 	vswp->mfdb = NULL;
807 	RW_EXIT(&vswp->mfdbrw);
808 	rw_destroy(&vswp->mfdbrw);
809 
810 	/* free pri_types table */
811 	if (VSW_PRI_ETH_DEFINED(vswp)) {
812 		kmem_free(vswp->pri_types,
813 		    sizeof (uint16_t) * vswp->pri_num_types);
814 		(void) vio_destroy_mblks(vswp->pri_tx_vmp);
815 	}
816 
817 	ddi_remove_minor_node(dip, NULL);
818 
819 	rw_destroy(&vswp->plist.lockrw);
820 	WRITE_ENTER(&vsw_rw);
821 	for (vswpp = &vsw_head; *vswpp; vswpp = &(*vswpp)->next) {
822 		if (*vswpp == vswp) {
823 			*vswpp = vswp->next;
824 			break;
825 		}
826 	}
827 	RW_EXIT(&vsw_rw);
828 	ddi_soft_state_free(vsw_state, instance);
829 
830 	return (DDI_SUCCESS);
831 }
832 
833 static int
834 vsw_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
835 {
836 	_NOTE(ARGUNUSED(dip))
837 
838 	vsw_t	*vswp = NULL;
839 	dev_t	dev = (dev_t)arg;
840 	int	instance;
841 
842 	instance = getminor(dev);
843 
844 	switch (infocmd) {
845 	case DDI_INFO_DEVT2DEVINFO:
846 		if ((vswp = ddi_get_soft_state(vsw_state, instance)) == NULL) {
847 			*result = NULL;
848 			return (DDI_FAILURE);
849 		}
850 		*result = vswp->dip;
851 		return (DDI_SUCCESS);
852 
853 	case DDI_INFO_DEVT2INSTANCE:
854 		*result = (void *)(uintptr_t)instance;
855 		return (DDI_SUCCESS);
856 
857 	default:
858 		*result = NULL;
859 		return (DDI_FAILURE);
860 	}
861 }
862 
863 /*
864  * Get the value of the "vsw-phys-dev" property in the specified
865  * node. This property is the name of the physical device that
866  * the virtual switch will use to talk to the outside world.
867  *
868  * Note it is valid for this property to be NULL (but the property
869  * itself must exist). Callers of this routine should verify that
870  * the value returned is what they expected (i.e. either NULL or non NULL).
871  *
872  * On success returns value of the property in region pointed to by
873  * the 'name' argument, and with return value of 0. Otherwise returns 1.
874  */
875 static int
876 vsw_get_md_physname(vsw_t *vswp, md_t *mdp, mde_cookie_t node, char *name)
877 {
878 	int		len = 0;
879 	int		instance;
880 	char		*physname = NULL;
881 	char		*dev;
882 	const char	*dev_name;
883 	char		myname[MAXNAMELEN];
884 
885 	dev_name = ddi_driver_name(vswp->dip);
886 	instance = ddi_get_instance(vswp->dip);
887 	(void) snprintf(myname, MAXNAMELEN, "%s%d", dev_name, instance);
888 
889 	if (md_get_prop_data(mdp, node, physdev_propname,
890 	    (uint8_t **)(&physname), &len) != 0) {
891 		cmn_err(CE_WARN, "!vsw%d: Unable to get name(s) of physical "
892 		    "device(s) from MD", vswp->instance);
893 		return (1);
894 	} else if ((strlen(physname) + 1) > LIFNAMSIZ) {
895 		cmn_err(CE_WARN, "!vsw%d: %s is too long a device name",
896 		    vswp->instance, physname);
897 		return (1);
898 	} else if (strcmp(myname, physname) == 0) {
899 		/*
900 		 * Prevent the vswitch from opening itself as the
901 		 * network device.
902 		 */
903 		cmn_err(CE_WARN, "!vsw%d: %s is an invalid device name",
904 		    vswp->instance, physname);
905 		return (1);
906 	} else {
907 		(void) strncpy(name, physname, strlen(physname) + 1);
908 		D2(vswp, "%s: using first device specified (%s)",
909 		    __func__, physname);
910 	}
911 
912 #ifdef DEBUG
913 	/*
914 	 * As a temporary measure to aid testing we check to see if there
915 	 * is a vsw.conf file present. If there is we use the value of the
916 	 * vsw_physname property in the file as the name of the physical
917 	 * device, overriding the value from the MD.
918 	 *
919 	 * There may be multiple devices listed, but for the moment
920 	 * we just use the first one.
921 	 */
922 	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vswp->dip, 0,
923 	    "vsw_physname", &dev) == DDI_PROP_SUCCESS) {
924 		if ((strlen(dev) + 1) > LIFNAMSIZ) {
925 			cmn_err(CE_WARN, "vsw%d: %s is too long a device name",
926 			    vswp->instance, dev);
927 			ddi_prop_free(dev);
928 			return (1);
929 		} else {
930 			cmn_err(CE_NOTE, "vsw%d: Using device name (%s) from "
931 			    "config file", vswp->instance, dev);
932 
933 			(void) strncpy(name, dev, strlen(dev) + 1);
934 		}
935 
936 		ddi_prop_free(dev);
937 	}
938 #endif
939 
940 	return (0);
941 }
942 
943 /*
944  * Read the 'vsw-switch-mode' property from the specified MD node.
945  *
946  * Returns 0 on success and the number of modes found in 'found',
947  * otherwise returns 1.
948  */
949 static int
950 vsw_get_md_smodes(vsw_t *vswp, md_t *mdp, mde_cookie_t node,
951 						uint8_t *modes, int *found)
952 {
953 	int		len = 0;
954 	int		smode_num = 0;
955 	char		*smode = NULL;
956 	char		*curr_mode = NULL;
957 
958 	D1(vswp, "%s: enter", __func__);
959 
960 	/*
961 	 * Get the switch-mode property. The modes are listed in
962 	 * decreasing order of preference, i.e. prefered mode is
963 	 * first item in list.
964 	 */
965 	len = 0;
966 	smode_num = 0;
967 	if (md_get_prop_data(mdp, node, smode_propname,
968 	    (uint8_t **)(&smode), &len) != 0) {
969 		/*
970 		 * Unable to get switch-mode property from MD, nothing
971 		 * more we can do.
972 		 */
973 		cmn_err(CE_WARN, "!vsw%d: Unable to get switch mode property"
974 		    " from the MD", vswp->instance);
975 		*found = 0;
976 		return (1);
977 	}
978 
979 	curr_mode = smode;
980 	/*
981 	 * Modes of operation:
982 	 * 'switched'	 - layer 2 switching, underlying HW in
983 	 *			programmed mode.
984 	 * 'promiscuous' - layer 2 switching, underlying HW in
985 	 *			promiscuous mode.
986 	 * 'routed'	 - layer 3 (i.e. IP) routing, underlying HW
987 	 *			in non-promiscuous mode.
988 	 */
989 	while ((curr_mode < (smode + len)) && (smode_num < NUM_SMODES)) {
990 		D2(vswp, "%s: curr_mode = [%s]", __func__, curr_mode);
991 		if (strcmp(curr_mode, "switched") == 0) {
992 			modes[smode_num++] = VSW_LAYER2;
993 		} else if (strcmp(curr_mode, "promiscuous") == 0) {
994 			modes[smode_num++] = VSW_LAYER2_PROMISC;
995 		} else if (strcmp(curr_mode, "routed") == 0) {
996 			modes[smode_num++] = VSW_LAYER3;
997 		} else {
998 			DWARN(vswp, "%s: Unknown switch mode %s, "
999 			    "setting to default 'switched' mode",
1000 			    __func__, curr_mode);
1001 			modes[smode_num++] = VSW_LAYER2;
1002 		}
1003 		curr_mode += strlen(curr_mode) + 1;
1004 	}
1005 	*found = smode_num;
1006 
1007 	D2(vswp, "%s: %d modes found", __func__, smode_num);
1008 
1009 	D1(vswp, "%s: exit", __func__);
1010 
1011 	return (0);
1012 }
1013 
1014 /*
1015  * Register with the MAC layer as a network device, so we
1016  * can be plumbed if necessary.
1017  */
1018 static int
1019 vsw_mac_register(vsw_t *vswp)
1020 {
1021 	mac_register_t	*macp;
1022 	int		rv;
1023 
1024 	D1(vswp, "%s: enter", __func__);
1025 
1026 	if ((macp = mac_alloc(MAC_VERSION)) == NULL)
1027 		return (EINVAL);
1028 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1029 	macp->m_driver = vswp;
1030 	macp->m_dip = vswp->dip;
1031 	macp->m_src_addr = (uint8_t *)&vswp->if_addr;
1032 	macp->m_callbacks = &vsw_m_callbacks;
1033 	macp->m_min_sdu = 0;
1034 	macp->m_max_sdu = vsw_ethermtu;
1035 	macp->m_margin = VLAN_TAGSZ;
1036 	rv = mac_register(macp, &vswp->if_mh);
1037 	mac_free(macp);
1038 	if (rv != 0) {
1039 		/*
1040 		 * Treat this as a non-fatal error as we may be
1041 		 * able to operate in some other mode.
1042 		 */
1043 		cmn_err(CE_NOTE, "!vsw%d: Unable to register as "
1044 		    "a provider with MAC layer", vswp->instance);
1045 		return (rv);
1046 	}
1047 
1048 	vswp->if_state |= VSW_IF_REG;
1049 
1050 	vswp->max_frame_size = vsw_ethermtu + sizeof (struct ether_header)
1051 	    + VLAN_TAGSZ;
1052 
1053 	D1(vswp, "%s: exit", __func__);
1054 
1055 	return (rv);
1056 }
1057 
1058 static int
1059 vsw_mac_unregister(vsw_t *vswp)
1060 {
1061 	int		rv = 0;
1062 
1063 	D1(vswp, "%s: enter", __func__);
1064 
1065 	WRITE_ENTER(&vswp->if_lockrw);
1066 
1067 	if (vswp->if_state & VSW_IF_REG) {
1068 		rv = mac_unregister(vswp->if_mh);
1069 		if (rv != 0) {
1070 			DWARN(vswp, "%s: unable to unregister from MAC "
1071 			    "framework", __func__);
1072 
1073 			RW_EXIT(&vswp->if_lockrw);
1074 			D1(vswp, "%s: fail exit", __func__);
1075 			return (rv);
1076 		}
1077 
1078 		/* mark i/f as down and unregistered */
1079 		vswp->if_state &= ~(VSW_IF_UP | VSW_IF_REG);
1080 	}
1081 	RW_EXIT(&vswp->if_lockrw);
1082 
1083 	D1(vswp, "%s: exit", __func__);
1084 
1085 	return (rv);
1086 }
1087 
1088 static int
1089 vsw_m_stat(void *arg, uint_t stat, uint64_t *val)
1090 {
1091 	vsw_t			*vswp = (vsw_t *)arg;
1092 
1093 	D1(vswp, "%s: enter", __func__);
1094 
1095 	mutex_enter(&vswp->mac_lock);
1096 	if (vswp->mh == NULL) {
1097 		mutex_exit(&vswp->mac_lock);
1098 		return (EINVAL);
1099 	}
1100 
1101 	/* return stats from underlying device */
1102 	*val = mac_stat_get(vswp->mh, stat);
1103 
1104 	mutex_exit(&vswp->mac_lock);
1105 
1106 	return (0);
1107 }
1108 
1109 static void
1110 vsw_m_stop(void *arg)
1111 {
1112 	vsw_t		*vswp = (vsw_t *)arg;
1113 
1114 	D1(vswp, "%s: enter", __func__);
1115 
1116 	WRITE_ENTER(&vswp->if_lockrw);
1117 	vswp->if_state &= ~VSW_IF_UP;
1118 	RW_EXIT(&vswp->if_lockrw);
1119 
1120 	mutex_enter(&vswp->hw_lock);
1121 
1122 	(void) vsw_unset_hw(vswp, NULL, VSW_LOCALDEV);
1123 
1124 	if (vswp->recfg_reqd)
1125 		vsw_reconfig_hw(vswp);
1126 
1127 	mutex_exit(&vswp->hw_lock);
1128 
1129 	D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state);
1130 }
1131 
1132 static int
1133 vsw_m_start(void *arg)
1134 {
1135 	vsw_t		*vswp = (vsw_t *)arg;
1136 
1137 	D1(vswp, "%s: enter", __func__);
1138 
1139 	WRITE_ENTER(&vswp->if_lockrw);
1140 
1141 	vswp->if_state |= VSW_IF_UP;
1142 
1143 	if (vswp->switching_setup_done == B_FALSE) {
1144 		/*
1145 		 * If the switching mode has not been setup yet, just
1146 		 * return. The unicast address will be programmed
1147 		 * after the physical device is successfully setup by the
1148 		 * timeout handler.
1149 		 */
1150 		RW_EXIT(&vswp->if_lockrw);
1151 		return (0);
1152 	}
1153 
1154 	/* if in layer2 mode, program unicast address. */
1155 	if (vswp->mh != NULL) {
1156 		mutex_enter(&vswp->hw_lock);
1157 		(void) vsw_set_hw(vswp, NULL, VSW_LOCALDEV);
1158 		mutex_exit(&vswp->hw_lock);
1159 	}
1160 
1161 	RW_EXIT(&vswp->if_lockrw);
1162 
1163 	D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state);
1164 	return (0);
1165 }
1166 
1167 /*
1168  * Change the local interface address.
1169  *
1170  * Note: we don't support this entry point. The local
1171  * mac address of the switch can only be changed via its
1172  * MD node properties.
1173  */
1174 static int
1175 vsw_m_unicst(void *arg, const uint8_t *macaddr)
1176 {
1177 	_NOTE(ARGUNUSED(arg, macaddr))
1178 
1179 	return (DDI_FAILURE);
1180 }
1181 
1182 static int
1183 vsw_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
1184 {
1185 	vsw_t		*vswp = (vsw_t *)arg;
1186 	mcst_addr_t	*mcst_p = NULL;
1187 	uint64_t	addr = 0x0;
1188 	int		i, ret = 0;
1189 
1190 	D1(vswp, "%s: enter", __func__);
1191 
1192 	/*
1193 	 * Convert address into form that can be used
1194 	 * as hash table key.
1195 	 */
1196 	for (i = 0; i < ETHERADDRL; i++) {
1197 		addr = (addr << 8) | mca[i];
1198 	}
1199 
1200 	D2(vswp, "%s: addr = 0x%llx", __func__, addr);
1201 
1202 	if (add) {
1203 		D2(vswp, "%s: adding multicast", __func__);
1204 		if (vsw_add_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) {
1205 			/*
1206 			 * Update the list of multicast addresses
1207 			 * contained within the vsw_t structure to
1208 			 * include this new one.
1209 			 */
1210 			mcst_p = kmem_zalloc(sizeof (mcst_addr_t), KM_NOSLEEP);
1211 			if (mcst_p == NULL) {
1212 				DERR(vswp, "%s unable to alloc mem", __func__);
1213 				(void) vsw_del_mcst(vswp,
1214 				    VSW_LOCALDEV, addr, NULL);
1215 				return (1);
1216 			}
1217 			mcst_p->addr = addr;
1218 			ether_copy(mca, &mcst_p->mca);
1219 
1220 			/*
1221 			 * Call into the underlying driver to program the
1222 			 * address into HW.
1223 			 */
1224 			mutex_enter(&vswp->mac_lock);
1225 			if (vswp->mh != NULL) {
1226 				ret = mac_multicst_add(vswp->mh, mca);
1227 				if (ret != 0) {
1228 					cmn_err(CE_NOTE, "!vsw%d: unable to "
1229 					    "add multicast address",
1230 					    vswp->instance);
1231 					mutex_exit(&vswp->mac_lock);
1232 					(void) vsw_del_mcst(vswp,
1233 					    VSW_LOCALDEV, addr, NULL);
1234 					kmem_free(mcst_p, sizeof (*mcst_p));
1235 					return (ret);
1236 				}
1237 				mcst_p->mac_added = B_TRUE;
1238 			}
1239 			mutex_exit(&vswp->mac_lock);
1240 
1241 			mutex_enter(&vswp->mca_lock);
1242 			mcst_p->nextp = vswp->mcap;
1243 			vswp->mcap = mcst_p;
1244 			mutex_exit(&vswp->mca_lock);
1245 		} else {
1246 			cmn_err(CE_NOTE, "!vsw%d: unable to add multicast "
1247 			    "address", vswp->instance);
1248 		}
1249 		return (ret);
1250 	}
1251 
1252 	D2(vswp, "%s: removing multicast", __func__);
1253 	/*
1254 	 * Remove the address from the hash table..
1255 	 */
1256 	if (vsw_del_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) {
1257 
1258 		/*
1259 		 * ..and then from the list maintained in the
1260 		 * vsw_t structure.
1261 		 */
1262 		mcst_p = vsw_del_addr(VSW_LOCALDEV, vswp, addr);
1263 		ASSERT(mcst_p != NULL);
1264 
1265 		mutex_enter(&vswp->mac_lock);
1266 		if (vswp->mh != NULL && mcst_p->mac_added) {
1267 			(void) mac_multicst_remove(vswp->mh, mca);
1268 			mcst_p->mac_added = B_FALSE;
1269 		}
1270 		mutex_exit(&vswp->mac_lock);
1271 		kmem_free(mcst_p, sizeof (*mcst_p));
1272 	}
1273 
1274 	D1(vswp, "%s: exit", __func__);
1275 
1276 	return (0);
1277 }
1278 
1279 static int
1280 vsw_m_promisc(void *arg, boolean_t on)
1281 {
1282 	vsw_t		*vswp = (vsw_t *)arg;
1283 
1284 	D1(vswp, "%s: enter", __func__);
1285 
1286 	WRITE_ENTER(&vswp->if_lockrw);
1287 	if (on)
1288 		vswp->if_state |= VSW_IF_PROMISC;
1289 	else
1290 		vswp->if_state &= ~VSW_IF_PROMISC;
1291 	RW_EXIT(&vswp->if_lockrw);
1292 
1293 	D1(vswp, "%s: exit", __func__);
1294 
1295 	return (0);
1296 }
1297 
1298 static mblk_t *
1299 vsw_m_tx(void *arg, mblk_t *mp)
1300 {
1301 	vsw_t		*vswp = (vsw_t *)arg;
1302 
1303 	D1(vswp, "%s: enter", __func__);
1304 
1305 	mp = vsw_vlan_frame_pretag(vswp, VSW_LOCALDEV, mp);
1306 
1307 	if (mp == NULL) {
1308 		return (NULL);
1309 	}
1310 
1311 	vswp->vsw_switch_frame(vswp, mp, VSW_LOCALDEV, NULL, NULL);
1312 
1313 	D1(vswp, "%s: exit", __func__);
1314 
1315 	return (NULL);
1316 }
1317 
1318 /*
1319  * Register for machine description (MD) updates.
1320  *
1321  * Returns 0 on success, 1 on failure.
1322  */
1323 static int
1324 vsw_mdeg_register(vsw_t *vswp)
1325 {
1326 	mdeg_prop_spec_t	*pspecp;
1327 	mdeg_node_spec_t	*inst_specp;
1328 	mdeg_handle_t		mdeg_hdl, mdeg_port_hdl;
1329 	size_t			templatesz;
1330 	int			rv;
1331 
1332 	D1(vswp, "%s: enter", __func__);
1333 
1334 	/*
1335 	 * Allocate and initialize a per-instance copy
1336 	 * of the global property spec array that will
1337 	 * uniquely identify this vsw instance.
1338 	 */
1339 	templatesz = sizeof (vsw_prop_template);
1340 	pspecp = kmem_zalloc(templatesz, KM_SLEEP);
1341 
1342 	bcopy(vsw_prop_template, pspecp, templatesz);
1343 
1344 	VSW_SET_MDEG_PROP_INST(pspecp, vswp->regprop);
1345 
1346 	/* initialize the complete prop spec structure */
1347 	inst_specp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP);
1348 	inst_specp->namep = "virtual-device";
1349 	inst_specp->specp = pspecp;
1350 
1351 	D2(vswp, "%s: instance %d registering with mdeg", __func__,
1352 	    vswp->regprop);
1353 	/*
1354 	 * Register an interest in 'virtual-device' nodes with a
1355 	 * 'name' property of 'virtual-network-switch'
1356 	 */
1357 	rv = mdeg_register(inst_specp, &vdev_match, vsw_mdeg_cb,
1358 	    (void *)vswp, &mdeg_hdl);
1359 	if (rv != MDEG_SUCCESS) {
1360 		DERR(vswp, "%s: mdeg_register failed (%d) for vsw node",
1361 		    __func__, rv);
1362 		goto mdeg_reg_fail;
1363 	}
1364 
1365 	/*
1366 	 * Register an interest in 'vsw-port' nodes.
1367 	 */
1368 	rv = mdeg_register(inst_specp, &vport_match, vsw_port_mdeg_cb,
1369 	    (void *)vswp, &mdeg_port_hdl);
1370 	if (rv != MDEG_SUCCESS) {
1371 		DERR(vswp, "%s: mdeg_register failed (%d)\n", __func__, rv);
1372 		(void) mdeg_unregister(mdeg_hdl);
1373 		goto mdeg_reg_fail;
1374 	}
1375 
1376 	/* save off data that will be needed later */
1377 	vswp->inst_spec = inst_specp;
1378 	vswp->mdeg_hdl = mdeg_hdl;
1379 	vswp->mdeg_port_hdl = mdeg_port_hdl;
1380 
1381 	D1(vswp, "%s: exit", __func__);
1382 	return (0);
1383 
1384 mdeg_reg_fail:
1385 	cmn_err(CE_WARN, "!vsw%d: Unable to register MDEG callbacks",
1386 	    vswp->instance);
1387 	kmem_free(pspecp, templatesz);
1388 	kmem_free(inst_specp, sizeof (mdeg_node_spec_t));
1389 
1390 	vswp->mdeg_hdl = NULL;
1391 	vswp->mdeg_port_hdl = NULL;
1392 
1393 	return (1);
1394 }
1395 
1396 static void
1397 vsw_mdeg_unregister(vsw_t *vswp)
1398 {
1399 	D1(vswp, "vsw_mdeg_unregister: enter");
1400 
1401 	if (vswp->mdeg_hdl != NULL)
1402 		(void) mdeg_unregister(vswp->mdeg_hdl);
1403 
1404 	if (vswp->mdeg_port_hdl != NULL)
1405 		(void) mdeg_unregister(vswp->mdeg_port_hdl);
1406 
1407 	if (vswp->inst_spec != NULL) {
1408 		if (vswp->inst_spec->specp != NULL) {
1409 			(void) kmem_free(vswp->inst_spec->specp,
1410 			    sizeof (vsw_prop_template));
1411 			vswp->inst_spec->specp = NULL;
1412 		}
1413 
1414 		(void) kmem_free(vswp->inst_spec, sizeof (mdeg_node_spec_t));
1415 		vswp->inst_spec = NULL;
1416 	}
1417 
1418 	D1(vswp, "vsw_mdeg_unregister: exit");
1419 }
1420 
1421 /*
1422  * Mdeg callback invoked for the vsw node itself.
1423  */
1424 static int
1425 vsw_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1426 {
1427 	vsw_t		*vswp;
1428 	md_t		*mdp;
1429 	mde_cookie_t	node;
1430 	uint64_t	inst;
1431 	char		*node_name = NULL;
1432 
1433 	if (resp == NULL)
1434 		return (MDEG_FAILURE);
1435 
1436 	vswp = (vsw_t *)cb_argp;
1437 
1438 	D1(vswp, "%s: added %d : removed %d : curr matched %d"
1439 	    " : prev matched %d", __func__, resp->added.nelem,
1440 	    resp->removed.nelem, resp->match_curr.nelem,
1441 	    resp->match_prev.nelem);
1442 
1443 	/*
1444 	 * We get an initial callback for this node as 'added'
1445 	 * after registering with mdeg. Note that we would have
1446 	 * already gathered information about this vsw node by
1447 	 * walking MD earlier during attach (in vsw_read_mdprops()).
1448 	 * So, there is a window where the properties of this
1449 	 * node might have changed when we get this initial 'added'
1450 	 * callback. We handle this as if an update occured
1451 	 * and invoke the same function which handles updates to
1452 	 * the properties of this vsw-node if any.
1453 	 *
1454 	 * A non-zero 'match' value indicates that the MD has been
1455 	 * updated and that a virtual-network-switch node is
1456 	 * present which may or may not have been updated. It is
1457 	 * up to the clients to examine their own nodes and
1458 	 * determine if they have changed.
1459 	 */
1460 	if (resp->added.nelem != 0) {
1461 
1462 		if (resp->added.nelem != 1) {
1463 			cmn_err(CE_NOTE, "!vsw%d: number of nodes added "
1464 			    "invalid: %d\n", vswp->instance, resp->added.nelem);
1465 			return (MDEG_FAILURE);
1466 		}
1467 
1468 		mdp = resp->added.mdp;
1469 		node = resp->added.mdep[0];
1470 
1471 	} else if (resp->match_curr.nelem != 0) {
1472 
1473 		if (resp->match_curr.nelem != 1) {
1474 			cmn_err(CE_NOTE, "!vsw%d: number of nodes updated "
1475 			    "invalid: %d\n", vswp->instance,
1476 			    resp->match_curr.nelem);
1477 			return (MDEG_FAILURE);
1478 		}
1479 
1480 		mdp = resp->match_curr.mdp;
1481 		node = resp->match_curr.mdep[0];
1482 
1483 	} else {
1484 		return (MDEG_FAILURE);
1485 	}
1486 
1487 	/* Validate name and instance */
1488 	if (md_get_prop_str(mdp, node, "name", &node_name) != 0) {
1489 		DERR(vswp, "%s: unable to get node name\n",  __func__);
1490 		return (MDEG_FAILURE);
1491 	}
1492 
1493 	/* is this a virtual-network-switch? */
1494 	if (strcmp(node_name, vsw_propname) != 0) {
1495 		DERR(vswp, "%s: Invalid node name: %s\n",
1496 		    __func__, node_name);
1497 		return (MDEG_FAILURE);
1498 	}
1499 
1500 	if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) {
1501 		DERR(vswp, "%s: prop(cfg-handle) not found\n",
1502 		    __func__);
1503 		return (MDEG_FAILURE);
1504 	}
1505 
1506 	/* is this the right instance of vsw? */
1507 	if (inst != vswp->regprop) {
1508 		DERR(vswp, "%s: Invalid cfg-handle: %lx\n",
1509 		    __func__, inst);
1510 		return (MDEG_FAILURE);
1511 	}
1512 
1513 	vsw_update_md_prop(vswp, mdp, node);
1514 
1515 	return (MDEG_SUCCESS);
1516 }
1517 
1518 /*
1519  * Mdeg callback invoked for changes to the vsw-port nodes
1520  * under the vsw node.
1521  */
1522 static int
1523 vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1524 {
1525 	vsw_t		*vswp;
1526 	int		idx;
1527 	md_t		*mdp;
1528 	mde_cookie_t	node;
1529 	uint64_t	inst;
1530 	int		rv;
1531 
1532 	if ((resp == NULL) || (cb_argp == NULL))
1533 		return (MDEG_FAILURE);
1534 
1535 	vswp = (vsw_t *)cb_argp;
1536 
1537 	D2(vswp, "%s: added %d : removed %d : curr matched %d"
1538 	    " : prev matched %d", __func__, resp->added.nelem,
1539 	    resp->removed.nelem, resp->match_curr.nelem,
1540 	    resp->match_prev.nelem);
1541 
1542 	/* process added ports */
1543 	for (idx = 0; idx < resp->added.nelem; idx++) {
1544 		mdp = resp->added.mdp;
1545 		node = resp->added.mdep[idx];
1546 
1547 		D2(vswp, "%s: adding node(%d) 0x%lx", __func__, idx, node);
1548 
1549 		if ((rv = vsw_port_add(vswp, mdp, &node)) != 0) {
1550 			cmn_err(CE_WARN, "!vsw%d: Unable to add new port "
1551 			    "(0x%lx), err=%d", vswp->instance, node, rv);
1552 		}
1553 	}
1554 
1555 	/* process removed ports */
1556 	for (idx = 0; idx < resp->removed.nelem; idx++) {
1557 		mdp = resp->removed.mdp;
1558 		node = resp->removed.mdep[idx];
1559 
1560 		if (md_get_prop_val(mdp, node, id_propname, &inst)) {
1561 			DERR(vswp, "%s: prop(%s) not found in port(%d)",
1562 			    __func__, id_propname, idx);
1563 			continue;
1564 		}
1565 
1566 		D2(vswp, "%s: removing node(%d) 0x%lx", __func__, idx, node);
1567 
1568 		if (vsw_port_detach(vswp, inst) != 0) {
1569 			cmn_err(CE_WARN, "!vsw%d: Unable to remove port %ld",
1570 			    vswp->instance, inst);
1571 		}
1572 	}
1573 
1574 	for (idx = 0; idx < resp->match_curr.nelem; idx++) {
1575 		(void) vsw_port_update(vswp, resp->match_curr.mdp,
1576 		    resp->match_curr.mdep[idx],
1577 		    resp->match_prev.mdp,
1578 		    resp->match_prev.mdep[idx]);
1579 	}
1580 
1581 	D1(vswp, "%s: exit", __func__);
1582 
1583 	return (MDEG_SUCCESS);
1584 }
1585 
1586 /*
1587  * Scan the machine description for this instance of vsw
1588  * and read its properties. Called only from vsw_attach().
1589  * Returns: 0 on success, 1 on failure.
1590  */
1591 static int
1592 vsw_read_mdprops(vsw_t *vswp)
1593 {
1594 	md_t		*mdp = NULL;
1595 	mde_cookie_t	rootnode;
1596 	mde_cookie_t	*listp = NULL;
1597 	uint64_t	inst;
1598 	uint64_t	cfgh;
1599 	char		*name;
1600 	int		rv = 1;
1601 	int		num_nodes = 0;
1602 	int		num_devs = 0;
1603 	int		listsz = 0;
1604 	int		i;
1605 
1606 	/*
1607 	 * In each 'virtual-device' node in the MD there is a
1608 	 * 'cfg-handle' property which is the MD's concept of
1609 	 * an instance number (this may be completely different from
1610 	 * the device drivers instance #). OBP reads that value and
1611 	 * stores it in the 'reg' property of the appropriate node in
1612 	 * the device tree. We first read this reg property and use this
1613 	 * to compare against the 'cfg-handle' property of vsw nodes
1614 	 * in MD to get to this specific vsw instance and then read
1615 	 * other properties that we are interested in.
1616 	 * We also cache the value of 'reg' property and use it later
1617 	 * to register callbacks with mdeg (see vsw_mdeg_register())
1618 	 */
1619 	inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip,
1620 	    DDI_PROP_DONTPASS, reg_propname, -1);
1621 	if (inst == -1) {
1622 		cmn_err(CE_NOTE, "!vsw%d: Unable to read %s property from "
1623 		    "OBP device tree", vswp->instance, reg_propname);
1624 		return (rv);
1625 	}
1626 
1627 	vswp->regprop = inst;
1628 
1629 	if ((mdp = md_get_handle()) == NULL) {
1630 		DWARN(vswp, "%s: cannot init MD\n", __func__);
1631 		return (rv);
1632 	}
1633 
1634 	num_nodes = md_node_count(mdp);
1635 	ASSERT(num_nodes > 0);
1636 
1637 	listsz = num_nodes * sizeof (mde_cookie_t);
1638 	listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP);
1639 
1640 	rootnode = md_root_node(mdp);
1641 
1642 	/* search for all "virtual_device" nodes */
1643 	num_devs = md_scan_dag(mdp, rootnode,
1644 	    md_find_name(mdp, vdev_propname),
1645 	    md_find_name(mdp, "fwd"), listp);
1646 	if (num_devs <= 0) {
1647 		DWARN(vswp, "%s: invalid num_devs:%d\n", __func__, num_devs);
1648 		goto vsw_readmd_exit;
1649 	}
1650 
1651 	/*
1652 	 * Now loop through the list of virtual-devices looking for
1653 	 * devices with name "virtual-network-switch" and for each
1654 	 * such device compare its instance with what we have from
1655 	 * the 'reg' property to find the right node in MD and then
1656 	 * read all its properties.
1657 	 */
1658 	for (i = 0; i < num_devs; i++) {
1659 
1660 		if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) {
1661 			DWARN(vswp, "%s: name property not found\n",
1662 			    __func__);
1663 			goto vsw_readmd_exit;
1664 		}
1665 
1666 		/* is this a virtual-network-switch? */
1667 		if (strcmp(name, vsw_propname) != 0)
1668 			continue;
1669 
1670 		if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) {
1671 			DWARN(vswp, "%s: cfg-handle property not found\n",
1672 			    __func__);
1673 			goto vsw_readmd_exit;
1674 		}
1675 
1676 		/* is this the required instance of vsw? */
1677 		if (inst != cfgh)
1678 			continue;
1679 
1680 		/* now read all properties of this vsw instance */
1681 		rv = vsw_get_initial_md_properties(vswp, mdp, listp[i]);
1682 		break;
1683 	}
1684 
1685 vsw_readmd_exit:
1686 
1687 	kmem_free(listp, listsz);
1688 	(void) md_fini_handle(mdp);
1689 	return (rv);
1690 }
1691 
1692 /*
1693  * Read the initial start-of-day values from the specified MD node.
1694  */
1695 static int
1696 vsw_get_initial_md_properties(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
1697 {
1698 	int		i;
1699 	uint64_t 	macaddr = 0;
1700 
1701 	D1(vswp, "%s: enter", __func__);
1702 
1703 	if (vsw_get_md_physname(vswp, mdp, node, vswp->physname) != 0) {
1704 		return (1);
1705 	}
1706 
1707 	/* mac address for vswitch device itself */
1708 	if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) {
1709 		cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD",
1710 		    vswp->instance);
1711 		return (1);
1712 	}
1713 
1714 	vsw_save_lmacaddr(vswp, macaddr);
1715 
1716 	if (vsw_get_md_smodes(vswp, mdp, node, vswp->smode, &vswp->smode_num)) {
1717 		DWARN(vswp, "%s: Unable to read %s property from MD, "
1718 		    "defaulting to 'switched' mode",
1719 		    __func__, smode_propname);
1720 
1721 		for (i = 0; i < NUM_SMODES; i++)
1722 			vswp->smode[i] = VSW_LAYER2;
1723 
1724 		vswp->smode_num = NUM_SMODES;
1725 	} else {
1726 		ASSERT(vswp->smode_num != 0);
1727 	}
1728 
1729 	/* read vlan id properties of this vsw instance */
1730 	vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &vswp->pvid,
1731 	    &vswp->vids, &vswp->nvids, &vswp->default_vlan_id);
1732 
1733 	/* read priority-ether-types */
1734 	vsw_read_pri_eth_types(vswp, mdp, node);
1735 
1736 	D1(vswp, "%s: exit", __func__);
1737 	return (0);
1738 }
1739 
1740 /*
1741  * Read vlan id properties of the given MD node.
1742  * Arguments:
1743  *   arg:          device argument(vsw device or a port)
1744  *   type:         type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port)
1745  *   mdp:          machine description
1746  *   node:         md node cookie
1747  *
1748  * Returns:
1749  *   pvidp:        port-vlan-id of the node
1750  *   vidspp:       list of vlan-ids of the node
1751  *   nvidsp:       # of vlan-ids in the list
1752  *   default_idp:  default-vlan-id of the node(if node is vsw device)
1753  */
1754 static void
1755 vsw_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node,
1756 	uint16_t *pvidp, uint16_t **vidspp, uint16_t *nvidsp,
1757 	uint16_t *default_idp)
1758 {
1759 	vsw_t		*vswp;
1760 	vsw_port_t	*portp;
1761 	char		*pvid_propname;
1762 	char		*vid_propname;
1763 	uint_t		nvids = 0;
1764 	uint32_t	vids_size;
1765 	int		rv;
1766 	int		i;
1767 	uint64_t	*data;
1768 	uint64_t	val;
1769 	int		size;
1770 	int		inst;
1771 
1772 	if (type == VSW_LOCALDEV) {
1773 
1774 		vswp = (vsw_t *)arg;
1775 		pvid_propname = vsw_pvid_propname;
1776 		vid_propname = vsw_vid_propname;
1777 		inst = vswp->instance;
1778 
1779 	} else if (type == VSW_VNETPORT) {
1780 
1781 		portp = (vsw_port_t *)arg;
1782 		vswp = portp->p_vswp;
1783 		pvid_propname = port_pvid_propname;
1784 		vid_propname = port_vid_propname;
1785 		inst = portp->p_instance;
1786 
1787 	} else {
1788 		return;
1789 	}
1790 
1791 	if (type == VSW_LOCALDEV && default_idp != NULL) {
1792 		rv = md_get_prop_val(mdp, node, vsw_dvid_propname, &val);
1793 		if (rv != 0) {
1794 			DWARN(vswp, "%s: prop(%s) not found", __func__,
1795 			    vsw_dvid_propname);
1796 
1797 			*default_idp = vsw_default_vlan_id;
1798 		} else {
1799 			*default_idp = val & 0xFFF;
1800 			D2(vswp, "%s: %s(%d): (%d)\n", __func__,
1801 			    vsw_dvid_propname, inst, *default_idp);
1802 		}
1803 	}
1804 
1805 	rv = md_get_prop_val(mdp, node, pvid_propname, &val);
1806 	if (rv != 0) {
1807 		DWARN(vswp, "%s: prop(%s) not found", __func__, pvid_propname);
1808 		*pvidp = vsw_default_vlan_id;
1809 	} else {
1810 
1811 		*pvidp = val & 0xFFF;
1812 		D2(vswp, "%s: %s(%d): (%d)\n", __func__,
1813 		    pvid_propname, inst, *pvidp);
1814 	}
1815 
1816 	rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data,
1817 	    &size);
1818 	if (rv != 0) {
1819 		D2(vswp, "%s: prop(%s) not found", __func__, vid_propname);
1820 		size = 0;
1821 	} else {
1822 		size /= sizeof (uint64_t);
1823 	}
1824 	nvids = size;
1825 
1826 	if (nvids != 0) {
1827 		D2(vswp, "%s: %s(%d): ", __func__, vid_propname, inst);
1828 		vids_size = sizeof (uint16_t) * nvids;
1829 		*vidspp = kmem_zalloc(vids_size, KM_SLEEP);
1830 		for (i = 0; i < nvids; i++) {
1831 			(*vidspp)[i] = data[i] & 0xFFFF;
1832 			D2(vswp, " %d ", (*vidspp)[i]);
1833 		}
1834 		D2(vswp, "\n");
1835 	}
1836 
1837 	*nvidsp = nvids;
1838 }
1839 
1840 /*
1841  * This function reads "priority-ether-types" property from md. This property
1842  * is used to enable support for priority frames. Applications which need
1843  * guaranteed and timely delivery of certain high priority frames to/from
1844  * a vnet or vsw within ldoms, should configure this property by providing
1845  * the ether type(s) for which the priority facility is needed.
1846  * Normal data frames are delivered over a ldc channel using the descriptor
1847  * ring mechanism which is constrained by factors such as descriptor ring size,
1848  * the rate at which the ring is processed at the peer ldc end point, etc.
1849  * The priority mechanism provides an Out-Of-Band path to send/receive frames
1850  * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the
1851  * descriptor ring path and enables a more reliable and timely delivery of
1852  * frames to the peer.
1853  */
1854 static void
1855 vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
1856 {
1857 	int		rv;
1858 	uint16_t	*types;
1859 	uint64_t	*data;
1860 	int		size;
1861 	int		i;
1862 	size_t		mblk_sz;
1863 
1864 	rv = md_get_prop_data(mdp, node, pri_types_propname,
1865 	    (uint8_t **)&data, &size);
1866 	if (rv != 0) {
1867 		/*
1868 		 * Property may not exist if we are running pre-ldoms1.1 f/w.
1869 		 * Check if 'vsw_pri_eth_type' has been set in that case.
1870 		 */
1871 		if (vsw_pri_eth_type != 0) {
1872 			size = sizeof (vsw_pri_eth_type);
1873 			data = &vsw_pri_eth_type;
1874 		} else {
1875 			D3(vswp, "%s: prop(%s) not found", __func__,
1876 			    pri_types_propname);
1877 			size = 0;
1878 		}
1879 	}
1880 
1881 	if (size == 0) {
1882 		vswp->pri_num_types = 0;
1883 		return;
1884 	}
1885 
1886 	/*
1887 	 * we have some priority-ether-types defined;
1888 	 * allocate a table of these types and also
1889 	 * allocate a pool of mblks to transmit these
1890 	 * priority packets.
1891 	 */
1892 	size /= sizeof (uint64_t);
1893 	vswp->pri_num_types = size;
1894 	vswp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP);
1895 	for (i = 0, types = vswp->pri_types; i < size; i++) {
1896 		types[i] = data[i] & 0xFFFF;
1897 	}
1898 	mblk_sz = (VIO_PKT_DATA_HDRSIZE + ETHERMAX + 7) & ~7;
1899 	(void) vio_create_mblks(vsw_pri_tx_nmblks, mblk_sz, &vswp->pri_tx_vmp);
1900 }
1901 
1902 /*
1903  * Check to see if the relevant properties in the specified node have
1904  * changed, and if so take the appropriate action.
1905  *
1906  * If any of the properties are missing or invalid we don't take
1907  * any action, as this function should only be invoked when modifications
1908  * have been made to what we assume is a working configuration, which
1909  * we leave active.
1910  *
1911  * Note it is legal for this routine to be invoked even if none of the
1912  * properties in the port node within the MD have actually changed.
1913  */
1914 static void
1915 vsw_update_md_prop(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
1916 {
1917 	char		physname[LIFNAMSIZ];
1918 	char		drv[LIFNAMSIZ];
1919 	uint_t		ddi_instance;
1920 	uint8_t		new_smode[NUM_SMODES];
1921 	int		i, smode_num = 0;
1922 	uint64_t 	macaddr = 0;
1923 	enum		{MD_init = 0x1,
1924 				MD_physname = 0x2,
1925 				MD_macaddr = 0x4,
1926 				MD_smode = 0x8,
1927 				MD_vlans = 0x10} updated;
1928 	int		rv;
1929 	uint16_t	pvid;
1930 	uint16_t	*vids;
1931 	uint16_t	nvids;
1932 
1933 	updated = MD_init;
1934 
1935 	D1(vswp, "%s: enter", __func__);
1936 
1937 	/*
1938 	 * Check if name of physical device in MD has changed.
1939 	 */
1940 	if (vsw_get_md_physname(vswp, mdp, node, (char *)&physname) == 0) {
1941 		/*
1942 		 * Do basic sanity check on new device name/instance,
1943 		 * if its non NULL. It is valid for the device name to
1944 		 * have changed from a non NULL to a NULL value, i.e.
1945 		 * the vsw is being changed to 'routed' mode.
1946 		 */
1947 		if ((strlen(physname) != 0) &&
1948 		    (ddi_parse(physname, drv,
1949 		    &ddi_instance) != DDI_SUCCESS)) {
1950 			cmn_err(CE_WARN, "!vsw%d: physical device %s is not"
1951 			    " a valid device name/instance",
1952 			    vswp->instance, physname);
1953 			goto fail_reconf;
1954 		}
1955 
1956 		if (strcmp(physname, vswp->physname)) {
1957 			D2(vswp, "%s: device name changed from %s to %s",
1958 			    __func__, vswp->physname, physname);
1959 
1960 			updated |= MD_physname;
1961 		} else {
1962 			D2(vswp, "%s: device name unchanged at %s",
1963 			    __func__, vswp->physname);
1964 		}
1965 	} else {
1966 		cmn_err(CE_WARN, "!vsw%d: Unable to read name of physical "
1967 		    "device from updated MD.", vswp->instance);
1968 		goto fail_reconf;
1969 	}
1970 
1971 	/*
1972 	 * Check if MAC address has changed.
1973 	 */
1974 	if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) {
1975 		cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD",
1976 		    vswp->instance);
1977 		goto fail_reconf;
1978 	} else {
1979 		uint64_t maddr = macaddr;
1980 		READ_ENTER(&vswp->if_lockrw);
1981 		for (i = ETHERADDRL - 1; i >= 0; i--) {
1982 			if (vswp->if_addr.ether_addr_octet[i]
1983 			    != (macaddr & 0xFF)) {
1984 				D2(vswp, "%s: octet[%d] 0x%x != 0x%x",
1985 				    __func__, i,
1986 				    vswp->if_addr.ether_addr_octet[i],
1987 				    (macaddr & 0xFF));
1988 				updated |= MD_macaddr;
1989 				macaddr = maddr;
1990 				break;
1991 			}
1992 			macaddr >>= 8;
1993 		}
1994 		RW_EXIT(&vswp->if_lockrw);
1995 		if (updated & MD_macaddr) {
1996 			vsw_save_lmacaddr(vswp, macaddr);
1997 		}
1998 	}
1999 
2000 	/*
2001 	 * Check if switching modes have changed.
2002 	 */
2003 	if (vsw_get_md_smodes(vswp, mdp, node,
2004 	    new_smode, &smode_num)) {
2005 		cmn_err(CE_WARN, "!vsw%d: Unable to read %s property from MD",
2006 		    vswp->instance, smode_propname);
2007 		goto fail_reconf;
2008 	} else {
2009 		ASSERT(smode_num != 0);
2010 		if (smode_num != vswp->smode_num) {
2011 			D2(vswp, "%s: number of modes changed from %d to %d",
2012 			    __func__, vswp->smode_num, smode_num);
2013 		}
2014 
2015 		for (i = 0; i < smode_num; i++) {
2016 			if (new_smode[i] != vswp->smode[i]) {
2017 				D2(vswp, "%s: mode changed from %d to %d",
2018 				    __func__, vswp->smode[i], new_smode[i]);
2019 				updated |= MD_smode;
2020 				break;
2021 			}
2022 		}
2023 	}
2024 
2025 	/* Read the vlan ids */
2026 	vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &pvid, &vids,
2027 	    &nvids, NULL);
2028 
2029 	/* Determine if there are any vlan id updates */
2030 	if ((pvid != vswp->pvid) ||		/* pvid changed? */
2031 	    (nvids != vswp->nvids) ||		/* # of vids changed? */
2032 	    ((nvids != 0) && (vswp->nvids != 0) &&	/* vids changed? */
2033 	    bcmp(vids, vswp->vids, sizeof (uint16_t) * nvids))) {
2034 		updated |= MD_vlans;
2035 	}
2036 
2037 	/*
2038 	 * Now make any changes which are needed...
2039 	 */
2040 
2041 	if (updated & (MD_physname | MD_smode)) {
2042 
2043 		/*
2044 		 * Stop any pending timeout to setup switching mode.
2045 		 */
2046 		vsw_stop_switching_timeout(vswp);
2047 
2048 		/* Cleanup HybridIO */
2049 		vsw_hio_cleanup(vswp);
2050 
2051 		/*
2052 		 * Remove unicst, mcst addrs of vsw interface
2053 		 * and ports from the physdev.
2054 		 */
2055 		vsw_unset_addrs(vswp);
2056 
2057 		/*
2058 		 * Stop, detach and close the old device..
2059 		 */
2060 		mutex_enter(&vswp->mac_lock);
2061 
2062 		vsw_mac_detach(vswp);
2063 		vsw_mac_close(vswp);
2064 
2065 		mutex_exit(&vswp->mac_lock);
2066 
2067 		/*
2068 		 * Update phys name.
2069 		 */
2070 		if (updated & MD_physname) {
2071 			cmn_err(CE_NOTE, "!vsw%d: changing from %s to %s",
2072 			    vswp->instance, vswp->physname, physname);
2073 			(void) strncpy(vswp->physname,
2074 			    physname, strlen(physname) + 1);
2075 		}
2076 
2077 		/*
2078 		 * Update array with the new switch mode values.
2079 		 */
2080 		if (updated & MD_smode) {
2081 			for (i = 0; i < smode_num; i++)
2082 				vswp->smode[i] = new_smode[i];
2083 
2084 			vswp->smode_num = smode_num;
2085 			vswp->smode_idx = 0;
2086 		}
2087 
2088 		/*
2089 		 * ..and attach, start the new device.
2090 		 */
2091 		rv = vsw_setup_switching(vswp);
2092 		if (rv == EAGAIN) {
2093 			/*
2094 			 * Unable to setup switching mode.
2095 			 * As the error is EAGAIN, schedule a timeout to retry
2096 			 * and return. Programming addresses of ports and
2097 			 * vsw interface will be done when the timeout handler
2098 			 * completes successfully.
2099 			 */
2100 			mutex_enter(&vswp->swtmout_lock);
2101 
2102 			vswp->swtmout_enabled = B_TRUE;
2103 			vswp->swtmout_id =
2104 			    timeout(vsw_setup_switching_timeout, vswp,
2105 			    (vsw_setup_switching_delay *
2106 			    drv_usectohz(MICROSEC)));
2107 
2108 			mutex_exit(&vswp->swtmout_lock);
2109 
2110 			return;
2111 
2112 		} else if (rv) {
2113 			goto fail_update;
2114 		}
2115 
2116 		/*
2117 		 * program unicst, mcst addrs of vsw interface
2118 		 * and ports in the physdev.
2119 		 */
2120 		vsw_set_addrs(vswp);
2121 
2122 		/* Start HIO for ports that have already connected */
2123 		vsw_hio_start_ports(vswp);
2124 
2125 	} else if (updated & MD_macaddr) {
2126 		/*
2127 		 * We enter here if only MD_macaddr is exclusively updated.
2128 		 * If MD_physname and/or MD_smode are also updated, then
2129 		 * as part of that, we would have implicitly processed
2130 		 * MD_macaddr update (above).
2131 		 */
2132 		cmn_err(CE_NOTE, "!vsw%d: changing mac address to 0x%lx",
2133 		    vswp->instance, macaddr);
2134 
2135 		READ_ENTER(&vswp->if_lockrw);
2136 		if (vswp->if_state & VSW_IF_UP) {
2137 
2138 			mutex_enter(&vswp->hw_lock);
2139 			/*
2140 			 * Remove old mac address of vsw interface
2141 			 * from the physdev
2142 			 */
2143 			(void) vsw_unset_hw(vswp, NULL, VSW_LOCALDEV);
2144 			/*
2145 			 * Program new mac address of vsw interface
2146 			 * in the physdev
2147 			 */
2148 			rv = vsw_set_hw(vswp, NULL, VSW_LOCALDEV);
2149 			mutex_exit(&vswp->hw_lock);
2150 			if (rv != 0) {
2151 				cmn_err(CE_NOTE,
2152 				    "!vsw%d: failed to program interface "
2153 				    "unicast address\n", vswp->instance);
2154 			}
2155 			/*
2156 			 * Notify the MAC layer of the changed address.
2157 			 */
2158 			mac_unicst_update(vswp->if_mh,
2159 			    (uint8_t *)&vswp->if_addr);
2160 
2161 		}
2162 		RW_EXIT(&vswp->if_lockrw);
2163 
2164 	}
2165 
2166 	if (updated & MD_vlans) {
2167 		/* Remove existing vlan ids from the hash table. */
2168 		vsw_vlan_remove_ids(vswp, VSW_LOCALDEV);
2169 
2170 		/* save the new vlan ids */
2171 		vswp->pvid = pvid;
2172 		if (vswp->nvids != 0) {
2173 			kmem_free(vswp->vids, sizeof (uint16_t) * vswp->nvids);
2174 			vswp->nvids = 0;
2175 		}
2176 		if (nvids != 0) {
2177 			vswp->nvids = nvids;
2178 			vswp->vids = vids;
2179 		}
2180 
2181 		/* add these new vlan ids into hash table */
2182 		vsw_vlan_add_ids(vswp, VSW_LOCALDEV);
2183 	} else {
2184 		if (nvids != 0) {
2185 			kmem_free(vids, sizeof (uint16_t) * nvids);
2186 		}
2187 	}
2188 
2189 	return;
2190 
2191 fail_reconf:
2192 	cmn_err(CE_WARN, "!vsw%d: configuration unchanged", vswp->instance);
2193 	return;
2194 
2195 fail_update:
2196 	cmn_err(CE_WARN, "!vsw%d: re-configuration failed",
2197 	    vswp->instance);
2198 }
2199 
2200 /*
2201  * Read the port's md properties.
2202  */
2203 static int
2204 vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp,
2205 	md_t *mdp, mde_cookie_t *node)
2206 {
2207 	uint64_t		ldc_id;
2208 	uint8_t			*addrp;
2209 	int			i, addrsz;
2210 	int			num_nodes = 0, nchan = 0;
2211 	int			listsz = 0;
2212 	mde_cookie_t		*listp = NULL;
2213 	struct ether_addr	ea;
2214 	uint64_t		macaddr;
2215 	uint64_t		inst = 0;
2216 	uint64_t		val;
2217 
2218 	if (md_get_prop_val(mdp, *node, id_propname, &inst)) {
2219 		DWARN(vswp, "%s: prop(%s) not found", __func__,
2220 		    id_propname);
2221 		return (1);
2222 	}
2223 
2224 	/*
2225 	 * Find the channel endpoint node(s) (which should be under this
2226 	 * port node) which contain the channel id(s).
2227 	 */
2228 	if ((num_nodes = md_node_count(mdp)) <= 0) {
2229 		DERR(vswp, "%s: invalid number of nodes found (%d)",
2230 		    __func__, num_nodes);
2231 		return (1);
2232 	}
2233 
2234 	D2(vswp, "%s: %d nodes found", __func__, num_nodes);
2235 
2236 	/* allocate enough space for node list */
2237 	listsz = num_nodes * sizeof (mde_cookie_t);
2238 	listp = kmem_zalloc(listsz, KM_SLEEP);
2239 
2240 	nchan = md_scan_dag(mdp, *node, md_find_name(mdp, chan_propname),
2241 	    md_find_name(mdp, "fwd"), listp);
2242 
2243 	if (nchan <= 0) {
2244 		DWARN(vswp, "%s: no %s nodes found", __func__, chan_propname);
2245 		kmem_free(listp, listsz);
2246 		return (1);
2247 	}
2248 
2249 	D2(vswp, "%s: %d %s nodes found", __func__, nchan, chan_propname);
2250 
2251 	/* use property from first node found */
2252 	if (md_get_prop_val(mdp, listp[0], id_propname, &ldc_id)) {
2253 		DWARN(vswp, "%s: prop(%s) not found\n", __func__,
2254 		    id_propname);
2255 		kmem_free(listp, listsz);
2256 		return (1);
2257 	}
2258 
2259 	/* don't need list any more */
2260 	kmem_free(listp, listsz);
2261 
2262 	D2(vswp, "%s: ldc_id 0x%llx", __func__, ldc_id);
2263 
2264 	/* read mac-address property */
2265 	if (md_get_prop_data(mdp, *node, remaddr_propname,
2266 	    &addrp, &addrsz)) {
2267 		DWARN(vswp, "%s: prop(%s) not found",
2268 		    __func__, remaddr_propname);
2269 		return (1);
2270 	}
2271 
2272 	if (addrsz < ETHERADDRL) {
2273 		DWARN(vswp, "%s: invalid address size", __func__);
2274 		return (1);
2275 	}
2276 
2277 	macaddr = *((uint64_t *)addrp);
2278 	D2(vswp, "%s: remote mac address 0x%llx", __func__, macaddr);
2279 
2280 	for (i = ETHERADDRL - 1; i >= 0; i--) {
2281 		ea.ether_addr_octet[i] = macaddr & 0xFF;
2282 		macaddr >>= 8;
2283 	}
2284 
2285 	/* now update all properties into the port */
2286 	portp->p_vswp = vswp;
2287 	portp->p_instance = inst;
2288 	portp->addr_set = VSW_ADDR_UNSET;
2289 	ether_copy(&ea, &portp->p_macaddr);
2290 	if (nchan > VSW_PORT_MAX_LDCS) {
2291 		D2(vswp, "%s: using first of %d ldc ids",
2292 		    __func__, nchan);
2293 		nchan = VSW_PORT_MAX_LDCS;
2294 	}
2295 	portp->num_ldcs = nchan;
2296 	portp->ldc_ids =
2297 	    kmem_zalloc(sizeof (uint64_t) * nchan, KM_SLEEP);
2298 	bcopy(&ldc_id, (portp->ldc_ids), sizeof (uint64_t) * nchan);
2299 
2300 	/* read vlan id properties of this port node */
2301 	vsw_vlan_read_ids(portp, VSW_VNETPORT, mdp, *node, &portp->pvid,
2302 	    &portp->vids, &portp->nvids, NULL);
2303 
2304 	/* Check if hybrid property is present */
2305 	if (md_get_prop_val(mdp, *node, hybrid_propname, &val) == 0) {
2306 		D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname);
2307 		portp->p_hio_enabled = B_TRUE;
2308 	} else {
2309 		portp->p_hio_enabled = B_FALSE;
2310 	}
2311 	/*
2312 	 * Port hio capability determined after version
2313 	 * negotiation, i.e., when we know the peer is HybridIO capable.
2314 	 */
2315 	portp->p_hio_capable = B_FALSE;
2316 	return (0);
2317 }
2318 
2319 /*
2320  * Add a new port to the system.
2321  *
2322  * Returns 0 on success, 1 on failure.
2323  */
2324 int
2325 vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node)
2326 {
2327 	vsw_port_t	*portp;
2328 	int		rv;
2329 
2330 	portp = kmem_zalloc(sizeof (vsw_port_t), KM_SLEEP);
2331 
2332 	rv = vsw_port_read_props(portp, vswp, mdp, node);
2333 	if (rv != 0) {
2334 		kmem_free(portp, sizeof (*portp));
2335 		return (1);
2336 	}
2337 
2338 	rv = vsw_port_attach(portp);
2339 	if (rv != 0) {
2340 		DERR(vswp, "%s: failed to attach port", __func__);
2341 		return (1);
2342 	}
2343 
2344 	return (0);
2345 }
2346 
2347 static int
2348 vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex,
2349 	md_t *prev_mdp, mde_cookie_t prev_mdex)
2350 {
2351 	uint64_t	cport_num;
2352 	uint64_t	pport_num;
2353 	vsw_port_list_t	*plistp;
2354 	vsw_port_t	*portp;
2355 	boolean_t	updated_vlans = B_FALSE;
2356 	uint16_t	pvid;
2357 	uint16_t	*vids;
2358 	uint16_t	nvids;
2359 	uint64_t	val;
2360 	boolean_t	hio_enabled = B_FALSE;
2361 
2362 	/*
2363 	 * For now, we get port updates only if vlan ids changed.
2364 	 * We read the port num and do some sanity check.
2365 	 */
2366 	if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) {
2367 		return (1);
2368 	}
2369 
2370 	if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) {
2371 		return (1);
2372 	}
2373 	if (cport_num != pport_num)
2374 		return (1);
2375 
2376 	plistp = &(vswp->plist);
2377 
2378 	READ_ENTER(&plistp->lockrw);
2379 
2380 	portp = vsw_lookup_port(vswp, cport_num);
2381 	if (portp == NULL) {
2382 		RW_EXIT(&plistp->lockrw);
2383 		return (1);
2384 	}
2385 
2386 	/* Read the vlan ids */
2387 	vsw_vlan_read_ids(portp, VSW_VNETPORT, curr_mdp, curr_mdex, &pvid,
2388 	    &vids, &nvids, NULL);
2389 
2390 	/* Determine if there are any vlan id updates */
2391 	if ((pvid != portp->pvid) ||		/* pvid changed? */
2392 	    (nvids != portp->nvids) ||		/* # of vids changed? */
2393 	    ((nvids != 0) && (portp->nvids != 0) &&	/* vids changed? */
2394 	    bcmp(vids, portp->vids, sizeof (uint16_t) * nvids))) {
2395 		updated_vlans = B_TRUE;
2396 	}
2397 
2398 	if (updated_vlans == B_TRUE) {
2399 
2400 		/* Remove existing vlan ids from the hash table. */
2401 		vsw_vlan_remove_ids(portp, VSW_VNETPORT);
2402 
2403 		/* save the new vlan ids */
2404 		portp->pvid = pvid;
2405 		if (portp->nvids != 0) {
2406 			kmem_free(portp->vids,
2407 			    sizeof (uint16_t) * portp->nvids);
2408 			portp->nvids = 0;
2409 		}
2410 		if (nvids != 0) {
2411 			portp->vids = kmem_zalloc(sizeof (uint16_t) *
2412 			    nvids, KM_SLEEP);
2413 			bcopy(vids, portp->vids, sizeof (uint16_t) * nvids);
2414 			portp->nvids = nvids;
2415 			kmem_free(vids, sizeof (uint16_t) * nvids);
2416 		}
2417 
2418 		/* add these new vlan ids into hash table */
2419 		vsw_vlan_add_ids(portp, VSW_VNETPORT);
2420 
2421 		/* reset the port if it is vlan unaware (ver < 1.3) */
2422 		vsw_vlan_unaware_port_reset(portp);
2423 	}
2424 
2425 	/* Check if hybrid property is present */
2426 	if (md_get_prop_val(curr_mdp, curr_mdex, hybrid_propname, &val) == 0) {
2427 		D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname);
2428 		hio_enabled = B_TRUE;
2429 	}
2430 
2431 	if (portp->p_hio_enabled != hio_enabled) {
2432 		vsw_hio_port_update(portp, hio_enabled);
2433 	}
2434 
2435 	RW_EXIT(&plistp->lockrw);
2436 
2437 	return (0);
2438 }
2439 
2440 /*
2441  * vsw_mac_rx -- A common function to send packets to the interface.
2442  * By default this function check if the interface is UP or not, the
2443  * rest of the behaviour depends on the flags as below:
2444  *
2445  *	VSW_MACRX_PROMISC -- Check if the promisc mode set or not.
2446  *	VSW_MACRX_COPYMSG -- Make a copy of the message(s).
2447  *	VSW_MACRX_FREEMSG -- Free if the messages cannot be sent up the stack.
2448  */
2449 void
2450 vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh,
2451     mblk_t *mp, vsw_macrx_flags_t flags)
2452 {
2453 	mblk_t		*mpt;
2454 
2455 	D1(vswp, "%s:enter\n", __func__);
2456 	READ_ENTER(&vswp->if_lockrw);
2457 	/* Check if the interface is up */
2458 	if (!(vswp->if_state & VSW_IF_UP)) {
2459 		RW_EXIT(&vswp->if_lockrw);
2460 		/* Free messages only if FREEMSG flag specified */
2461 		if (flags & VSW_MACRX_FREEMSG) {
2462 			freemsgchain(mp);
2463 		}
2464 		D1(vswp, "%s:exit\n", __func__);
2465 		return;
2466 	}
2467 	/*
2468 	 * If PROMISC flag is passed, then check if
2469 	 * the interface is in the PROMISC mode.
2470 	 * If not, drop the messages.
2471 	 */
2472 	if (flags & VSW_MACRX_PROMISC) {
2473 		if (!(vswp->if_state & VSW_IF_PROMISC)) {
2474 			RW_EXIT(&vswp->if_lockrw);
2475 			/* Free messages only if FREEMSG flag specified */
2476 			if (flags & VSW_MACRX_FREEMSG) {
2477 				freemsgchain(mp);
2478 			}
2479 			D1(vswp, "%s:exit\n", __func__);
2480 			return;
2481 		}
2482 	}
2483 	RW_EXIT(&vswp->if_lockrw);
2484 	/*
2485 	 * If COPYMSG flag is passed, then make a copy
2486 	 * of the message chain and send up the copy.
2487 	 */
2488 	if (flags & VSW_MACRX_COPYMSG) {
2489 		mp = copymsgchain(mp);
2490 		if (mp == NULL) {
2491 			D1(vswp, "%s:exit\n", __func__);
2492 			return;
2493 		}
2494 	}
2495 
2496 	D2(vswp, "%s: sending up stack", __func__);
2497 
2498 	mpt = NULL;
2499 	(void) vsw_vlan_frame_untag(vswp, VSW_LOCALDEV, &mp, &mpt);
2500 	if (mp != NULL) {
2501 		mac_rx(vswp->if_mh, mrh, mp);
2502 	}
2503 	D1(vswp, "%s:exit\n", __func__);
2504 }
2505 
2506 /* copy mac address of vsw into soft state structure */
2507 static void
2508 vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr)
2509 {
2510 	int	i;
2511 
2512 	WRITE_ENTER(&vswp->if_lockrw);
2513 	for (i = ETHERADDRL - 1; i >= 0; i--) {
2514 		vswp->if_addr.ether_addr_octet[i] = macaddr & 0xFF;
2515 		macaddr >>= 8;
2516 	}
2517 	RW_EXIT(&vswp->if_lockrw);
2518 }
2519