xref: /titanic_51/usr/src/uts/sun4v/io/vsw.c (revision b6917abefc343244b784f0cc34bc65b01469c3bf)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/errno.h>
31 #include <sys/debug.h>
32 #include <sys/time.h>
33 #include <sys/sysmacros.h>
34 #include <sys/systm.h>
35 #include <sys/user.h>
36 #include <sys/stropts.h>
37 #include <sys/stream.h>
38 #include <sys/strlog.h>
39 #include <sys/strsubr.h>
40 #include <sys/cmn_err.h>
41 #include <sys/cpu.h>
42 #include <sys/kmem.h>
43 #include <sys/conf.h>
44 #include <sys/ddi.h>
45 #include <sys/sunddi.h>
46 #include <sys/ksynch.h>
47 #include <sys/stat.h>
48 #include <sys/kstat.h>
49 #include <sys/vtrace.h>
50 #include <sys/strsun.h>
51 #include <sys/dlpi.h>
52 #include <sys/ethernet.h>
53 #include <net/if.h>
54 #include <sys/varargs.h>
55 #include <sys/machsystm.h>
56 #include <sys/modctl.h>
57 #include <sys/modhash.h>
58 #include <sys/mac.h>
59 #include <sys/mac_ether.h>
60 #include <sys/taskq.h>
61 #include <sys/note.h>
62 #include <sys/mach_descrip.h>
63 #include <sys/mac.h>
64 #include <sys/mdeg.h>
65 #include <sys/ldc.h>
66 #include <sys/vsw_fdb.h>
67 #include <sys/vsw.h>
68 #include <sys/vio_mailbox.h>
69 #include <sys/vnet_mailbox.h>
70 #include <sys/vnet_common.h>
71 #include <sys/vio_util.h>
72 #include <sys/sdt.h>
73 #include <sys/atomic.h>
74 #include <sys/callb.h>
75 #include <sys/vlan.h>
76 
77 /*
78  * Function prototypes.
79  */
80 static	int vsw_attach(dev_info_t *, ddi_attach_cmd_t);
81 static	int vsw_detach(dev_info_t *, ddi_detach_cmd_t);
82 static	int vsw_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
83 static	int vsw_get_md_physname(vsw_t *, md_t *, mde_cookie_t, char *);
84 static	int vsw_get_md_smodes(vsw_t *, md_t *, mde_cookie_t, uint8_t *, int *);
85 
86 /* MDEG routines */
87 static	int vsw_mdeg_register(vsw_t *vswp);
88 static	void vsw_mdeg_unregister(vsw_t *vswp);
89 static	int vsw_mdeg_cb(void *cb_argp, mdeg_result_t *);
90 static	int vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *);
91 static	int vsw_get_initial_md_properties(vsw_t *vswp, md_t *, mde_cookie_t);
92 static	int vsw_read_mdprops(vsw_t *vswp);
93 static	void vsw_vlan_read_ids(void *arg, int type, md_t *mdp,
94 	mde_cookie_t node, uint16_t *pvidp, uint16_t **vidspp,
95 	uint16_t *nvidsp, uint16_t *default_idp);
96 static	int vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp,
97 	md_t *mdp, mde_cookie_t *node);
98 static	void vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp,
99 	mde_cookie_t node);
100 static	void vsw_update_md_prop(vsw_t *, md_t *, mde_cookie_t);
101 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr);
102 
103 /* Mac driver related routines */
104 static int vsw_mac_register(vsw_t *);
105 static int vsw_mac_unregister(vsw_t *);
106 static int vsw_m_stat(void *, uint_t, uint64_t *);
107 static void vsw_m_stop(void *arg);
108 static int vsw_m_start(void *arg);
109 static int vsw_m_unicst(void *arg, const uint8_t *);
110 static int vsw_m_multicst(void *arg, boolean_t, const uint8_t *);
111 static int vsw_m_promisc(void *arg, boolean_t);
112 static mblk_t *vsw_m_tx(void *arg, mblk_t *);
113 void vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh,
114     mblk_t *mp, vsw_macrx_flags_t flags);
115 
116 /*
117  * Functions imported from other files.
118  */
119 extern void vsw_setup_switching_timeout(void *arg);
120 extern void vsw_stop_switching_timeout(vsw_t *vswp);
121 extern int vsw_setup_switching(vsw_t *);
122 extern void vsw_switch_frame_nop(vsw_t *vswp, mblk_t *mp, int caller,
123     vsw_port_t *port, mac_resource_handle_t mrh);
124 extern int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *);
125 extern int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *);
126 extern void vsw_del_mcst_vsw(vsw_t *);
127 extern mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr);
128 extern int vsw_detach_ports(vsw_t *vswp);
129 extern int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node);
130 extern int vsw_port_detach(vsw_t *vswp, int p_instance);
131 static int vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex,
132 	md_t *prev_mdp, mde_cookie_t prev_mdex);
133 extern	int vsw_port_attach(vsw_port_t *port);
134 extern vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance);
135 extern int vsw_mac_attach(vsw_t *vswp);
136 extern void vsw_mac_detach(vsw_t *vswp);
137 extern int vsw_mac_open(vsw_t *vswp);
138 extern void vsw_mac_close(vsw_t *vswp);
139 extern int vsw_set_hw(vsw_t *, vsw_port_t *, int);
140 extern int vsw_unset_hw(vsw_t *, vsw_port_t *, int);
141 extern void vsw_reconfig_hw(vsw_t *);
142 extern void vsw_unset_addrs(vsw_t *vswp);
143 extern void vsw_set_addrs(vsw_t *vswp);
144 extern void vsw_create_vlans(void *arg, int type);
145 extern void vsw_destroy_vlans(void *arg, int type);
146 extern void vsw_vlan_add_ids(void *arg, int type);
147 extern void vsw_vlan_remove_ids(void *arg, int type);
148 extern void vsw_vlan_unaware_port_reset(vsw_port_t *portp);
149 extern uint32_t vsw_vlan_frame_untag(void *arg, int type, mblk_t **np,
150 	mblk_t **npt);
151 extern mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp);
152 extern void vsw_hio_cleanup(vsw_t *vswp);
153 extern void vsw_hio_start_ports(vsw_t *vswp);
154 void vsw_hio_port_update(vsw_port_t *portp, boolean_t hio_enabled);
155 
156 /*
157  * Internal tunables.
158  */
159 int	vsw_num_handshakes = VNET_NUM_HANDSHAKES; /* # of handshake attempts */
160 int	vsw_wretries = 100;		/* # of write attempts */
161 int	vsw_desc_delay = 0;		/* delay in us */
162 int	vsw_read_attempts = 5;		/* # of reads of descriptor */
163 int	vsw_setup_switching_delay = 3;	/* setup sw timeout interval in sec */
164 int	vsw_mac_open_retries = 300;	/* max # of mac_open() retries */
165 					/* 300*3 = 900sec(15min) of max tmout */
166 int	vsw_ldc_tx_delay = 5;		/* delay(ticks) for tx retries */
167 int	vsw_ldc_tx_retries = 10;	/* # of ldc tx retries */
168 boolean_t vsw_ldc_rxthr_enabled = B_TRUE;	/* LDC Rx thread enabled */
169 boolean_t vsw_ldc_txthr_enabled = B_TRUE;	/* LDC Tx thread enabled */
170 
171 uint32_t	vsw_fdb_nchains = 8;	/* # of chains in fdb hash table */
172 uint32_t	vsw_vlan_nchains = 4;	/* # of chains in vlan id hash table */
173 uint32_t	vsw_ethermtu = 1500;	/* mtu of the device */
174 
175 /* sw timeout for boot delay only, in milliseconds */
176 int vsw_setup_switching_boot_delay = 100 * MILLISEC;
177 
178 /* delay in usec to wait for all references on a fdb entry to be dropped */
179 uint32_t vsw_fdbe_refcnt_delay = 10;
180 
181 /*
182  * Default vlan id. This is only used internally when the "default-vlan-id"
183  * property is not present in the MD device node. Therefore, this should not be
184  * used as a tunable; if this value is changed, the corresponding variable
185  * should be updated to the same value in all vnets connected to this vsw.
186  */
187 uint16_t	vsw_default_vlan_id = 1;
188 
189 /*
190  * Workaround for a version handshake bug in obp's vnet.
191  * If vsw initiates version negotiation starting from the highest version,
192  * obp sends a nack and terminates version handshake. To workaround
193  * this, we do not initiate version handshake when the channel comes up.
194  * Instead, we wait for the peer to send its version info msg and go through
195  * the version protocol exchange. If we successfully negotiate a version,
196  * before sending the ack, we send our version info msg to the peer
197  * using the <major,minor> version that we are about to ack.
198  */
199 boolean_t vsw_obp_ver_proto_workaround = B_TRUE;
200 
201 /*
202  * In the absence of "priority-ether-types" property in MD, the following
203  * internal tunable can be set to specify a single priority ethertype.
204  */
205 uint64_t vsw_pri_eth_type = 0;
206 
207 /*
208  * Number of transmit priority buffers that are preallocated per device.
209  * This number is chosen to be a small value to throttle transmission
210  * of priority packets. Note: Must be a power of 2 for vio_create_mblks().
211  */
212 uint32_t vsw_pri_tx_nmblks = 64;
213 
214 /*
215  * Number of RARP packets sent to announce macaddr to the physical switch,
216  * after vsw's physical device is changed dynamically or after a guest (client
217  * vnet) is live migrated in.
218  */
219 uint32_t vsw_publish_macaddr_count = 3;
220 
221 boolean_t vsw_hio_enabled = B_TRUE;	/* Enable/disable HybridIO */
222 int vsw_hio_max_cleanup_retries = 10;	/* Max retries for HybridIO cleanp */
223 int vsw_hio_cleanup_delay = 10000;	/* 10ms */
224 
225 /*
226  * External tunables.
227  */
228 /*
229  * Enable/disable thread per ring. This is a mode selection
230  * that is done a vsw driver attach time.
231  */
232 boolean_t vsw_multi_ring_enable = B_FALSE;
233 int vsw_mac_rx_rings = VSW_MAC_RX_RINGS;
234 
235 /* Number of transmit descriptors -  must be power of 2 */
236 uint32_t vsw_ntxds = VSW_RING_NUM_EL;
237 
238 /*
239  * Max number of mblks received in one receive operation.
240  */
241 uint32_t vsw_chain_len = (VSW_NUM_MBLKS * 0.6);
242 
243 /*
244  * Tunables for three different pools, that is, the size and
245  * number of mblks for each pool.
246  */
247 uint32_t vsw_mblk_size1 = VSW_MBLK_SZ_128;	/* size=128 for pool1 */
248 uint32_t vsw_mblk_size2 = VSW_MBLK_SZ_256;	/* size=256 for pool2 */
249 uint32_t vsw_mblk_size3 = VSW_MBLK_SZ_2048;	/* size=2048 for pool3 */
250 uint32_t vsw_num_mblks1 = VSW_NUM_MBLKS;	/* number of mblks for pool1 */
251 uint32_t vsw_num_mblks2 = VSW_NUM_MBLKS;	/* number of mblks for pool2 */
252 uint32_t vsw_num_mblks3 = VSW_NUM_MBLKS;	/* number of mblks for pool3 */
253 
254 /*
255  * vsw_max_tx_qcount is the maximum # of packets that can be queued
256  * before the tx worker thread begins processing the queue. Its value
257  * is chosen to be 4x the default length of tx descriptor ring.
258  */
259 uint32_t vsw_max_tx_qcount = 4 * VSW_RING_NUM_EL;
260 
261 /*
262  * MAC callbacks
263  */
264 static	mac_callbacks_t	vsw_m_callbacks = {
265 	0,
266 	vsw_m_stat,
267 	vsw_m_start,
268 	vsw_m_stop,
269 	vsw_m_promisc,
270 	vsw_m_multicst,
271 	vsw_m_unicst,
272 	vsw_m_tx,
273 	NULL,
274 	NULL,
275 	NULL
276 };
277 
278 static	struct	cb_ops	vsw_cb_ops = {
279 	nulldev,			/* cb_open */
280 	nulldev,			/* cb_close */
281 	nodev,				/* cb_strategy */
282 	nodev,				/* cb_print */
283 	nodev,				/* cb_dump */
284 	nodev,				/* cb_read */
285 	nodev,				/* cb_write */
286 	nodev,				/* cb_ioctl */
287 	nodev,				/* cb_devmap */
288 	nodev,				/* cb_mmap */
289 	nodev,				/* cb_segmap */
290 	nochpoll,			/* cb_chpoll */
291 	ddi_prop_op,			/* cb_prop_op */
292 	NULL,				/* cb_stream */
293 	D_MP,				/* cb_flag */
294 	CB_REV,				/* rev */
295 	nodev,				/* int (*cb_aread)() */
296 	nodev				/* int (*cb_awrite)() */
297 };
298 
299 static	struct	dev_ops	vsw_ops = {
300 	DEVO_REV,		/* devo_rev */
301 	0,			/* devo_refcnt */
302 	vsw_getinfo,		/* devo_getinfo */
303 	nulldev,		/* devo_identify */
304 	nulldev,		/* devo_probe */
305 	vsw_attach,		/* devo_attach */
306 	vsw_detach,		/* devo_detach */
307 	nodev,			/* devo_reset */
308 	&vsw_cb_ops,		/* devo_cb_ops */
309 	(struct bus_ops *)NULL,	/* devo_bus_ops */
310 	ddi_power		/* devo_power */
311 };
312 
313 extern	struct	mod_ops	mod_driverops;
314 static struct modldrv vswmodldrv = {
315 	&mod_driverops,
316 	"sun4v Virtual Switch",
317 	&vsw_ops,
318 };
319 
320 #define	LDC_ENTER_LOCK(ldcp)	\
321 				mutex_enter(&((ldcp)->ldc_cblock));\
322 				mutex_enter(&((ldcp)->ldc_rxlock));\
323 				mutex_enter(&((ldcp)->ldc_txlock));
324 #define	LDC_EXIT_LOCK(ldcp)	\
325 				mutex_exit(&((ldcp)->ldc_txlock));\
326 				mutex_exit(&((ldcp)->ldc_rxlock));\
327 				mutex_exit(&((ldcp)->ldc_cblock));
328 
329 /* Driver soft state ptr  */
330 static void	*vsw_state;
331 
332 /*
333  * Linked list of "vsw_t" structures - one per instance.
334  */
335 vsw_t		*vsw_head = NULL;
336 krwlock_t	vsw_rw;
337 
338 /*
339  * Property names
340  */
341 static char vdev_propname[] = "virtual-device";
342 static char vsw_propname[] = "virtual-network-switch";
343 static char physdev_propname[] = "vsw-phys-dev";
344 static char smode_propname[] = "vsw-switch-mode";
345 static char macaddr_propname[] = "local-mac-address";
346 static char remaddr_propname[] = "remote-mac-address";
347 static char ldcids_propname[] = "ldc-ids";
348 static char chan_propname[] = "channel-endpoint";
349 static char id_propname[] = "id";
350 static char reg_propname[] = "reg";
351 static char pri_types_propname[] = "priority-ether-types";
352 static char vsw_pvid_propname[] = "port-vlan-id";
353 static char vsw_vid_propname[] = "vlan-id";
354 static char vsw_dvid_propname[] = "default-vlan-id";
355 static char port_pvid_propname[] = "remote-port-vlan-id";
356 static char port_vid_propname[] = "remote-vlan-id";
357 static char hybrid_propname[] = "hybrid";
358 
359 /*
360  * Matching criteria passed to the MDEG to register interest
361  * in changes to 'virtual-device-port' nodes identified by their
362  * 'id' property.
363  */
364 static md_prop_match_t vport_prop_match[] = {
365 	{ MDET_PROP_VAL,    "id"   },
366 	{ MDET_LIST_END,    NULL    }
367 };
368 
369 static mdeg_node_match_t vport_match = { "virtual-device-port",
370 						vport_prop_match };
371 
372 /*
373  * Matching criteria passed to the MDEG to register interest
374  * in changes to 'virtual-device' nodes (i.e. vsw nodes) identified
375  * by their 'name' and 'cfg-handle' properties.
376  */
377 static md_prop_match_t vdev_prop_match[] = {
378 	{ MDET_PROP_STR,    "name"   },
379 	{ MDET_PROP_VAL,    "cfg-handle" },
380 	{ MDET_LIST_END,    NULL    }
381 };
382 
383 static mdeg_node_match_t vdev_match = { "virtual-device",
384 						vdev_prop_match };
385 
386 
387 /*
388  * Specification of an MD node passed to the MDEG to filter any
389  * 'vport' nodes that do not belong to the specified node. This
390  * template is copied for each vsw instance and filled in with
391  * the appropriate 'cfg-handle' value before being passed to the MDEG.
392  */
393 static mdeg_prop_spec_t vsw_prop_template[] = {
394 	{ MDET_PROP_STR,    "name",		vsw_propname },
395 	{ MDET_PROP_VAL,    "cfg-handle",	NULL	},
396 	{ MDET_LIST_END,    NULL,		NULL	}
397 };
398 
399 #define	VSW_SET_MDEG_PROP_INST(specp, val)	(specp)[1].ps_val = (val);
400 
401 #ifdef	DEBUG
402 /*
403  * Print debug messages - set to 0x1f to enable all msgs
404  * or 0x0 to turn all off.
405  */
406 int vswdbg = 0x0;
407 
408 /*
409  * debug levels:
410  * 0x01:	Function entry/exit tracing
411  * 0x02:	Internal function messages
412  * 0x04:	Verbose internal messages
413  * 0x08:	Warning messages
414  * 0x10:	Error messages
415  */
416 
417 void
418 vswdebug(vsw_t *vswp, const char *fmt, ...)
419 {
420 	char buf[512];
421 	va_list ap;
422 
423 	va_start(ap, fmt);
424 	(void) vsprintf(buf, fmt, ap);
425 	va_end(ap);
426 
427 	if (vswp == NULL)
428 		cmn_err(CE_CONT, "%s\n", buf);
429 	else
430 		cmn_err(CE_CONT, "vsw%d: %s\n", vswp->instance, buf);
431 }
432 
433 #endif	/* DEBUG */
434 
435 static struct modlinkage modlinkage = {
436 	MODREV_1,
437 	&vswmodldrv,
438 	NULL
439 };
440 
441 int
442 _init(void)
443 {
444 	int status;
445 
446 	rw_init(&vsw_rw, NULL, RW_DRIVER, NULL);
447 
448 	status = ddi_soft_state_init(&vsw_state, sizeof (vsw_t), 1);
449 	if (status != 0) {
450 		return (status);
451 	}
452 
453 	mac_init_ops(&vsw_ops, DRV_NAME);
454 	status = mod_install(&modlinkage);
455 	if (status != 0) {
456 		ddi_soft_state_fini(&vsw_state);
457 	}
458 	return (status);
459 }
460 
461 int
462 _fini(void)
463 {
464 	int status;
465 
466 	status = mod_remove(&modlinkage);
467 	if (status != 0)
468 		return (status);
469 	mac_fini_ops(&vsw_ops);
470 	ddi_soft_state_fini(&vsw_state);
471 
472 	rw_destroy(&vsw_rw);
473 
474 	return (status);
475 }
476 
477 int
478 _info(struct modinfo *modinfop)
479 {
480 	return (mod_info(&modlinkage, modinfop));
481 }
482 
483 static int
484 vsw_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
485 {
486 	vsw_t		*vswp;
487 	int		instance;
488 	char		hashname[MAXNAMELEN];
489 	char		qname[TASKQ_NAMELEN];
490 	enum		{ PROG_init = 0x00,
491 				PROG_locks = 0x01,
492 				PROG_readmd = 0x02,
493 				PROG_fdb = 0x04,
494 				PROG_mfdb = 0x08,
495 				PROG_taskq = 0x10,
496 				PROG_swmode = 0x20,
497 				PROG_macreg = 0x40,
498 				PROG_mdreg = 0x80}
499 			progress;
500 
501 	progress = PROG_init;
502 	int		rv;
503 
504 	switch (cmd) {
505 	case DDI_ATTACH:
506 		break;
507 	case DDI_RESUME:
508 		/* nothing to do for this non-device */
509 		return (DDI_SUCCESS);
510 	case DDI_PM_RESUME:
511 	default:
512 		return (DDI_FAILURE);
513 	}
514 
515 	instance = ddi_get_instance(dip);
516 	if (ddi_soft_state_zalloc(vsw_state, instance) != DDI_SUCCESS) {
517 		DERR(NULL, "vsw%d: ddi_soft_state_zalloc failed", instance);
518 		return (DDI_FAILURE);
519 	}
520 	vswp = ddi_get_soft_state(vsw_state, instance);
521 
522 	if (vswp == NULL) {
523 		DERR(NULL, "vsw%d: ddi_get_soft_state failed", instance);
524 		goto vsw_attach_fail;
525 	}
526 
527 	vswp->dip = dip;
528 	vswp->instance = instance;
529 	ddi_set_driver_private(dip, (caddr_t)vswp);
530 
531 	mutex_init(&vswp->hw_lock, NULL, MUTEX_DRIVER, NULL);
532 	mutex_init(&vswp->mca_lock, NULL, MUTEX_DRIVER, NULL);
533 	mutex_init(&vswp->swtmout_lock, NULL, MUTEX_DRIVER, NULL);
534 	rw_init(&vswp->if_lockrw, NULL, RW_DRIVER, NULL);
535 	rw_init(&vswp->mac_rwlock, NULL, RW_DRIVER, NULL);
536 	rw_init(&vswp->mfdbrw, NULL, RW_DRIVER, NULL);
537 	rw_init(&vswp->plist.lockrw, NULL, RW_DRIVER, NULL);
538 
539 	progress |= PROG_locks;
540 
541 	rv = vsw_read_mdprops(vswp);
542 	if (rv != 0)
543 		goto vsw_attach_fail;
544 
545 	progress |= PROG_readmd;
546 
547 	/* setup the unicast forwarding database  */
548 	(void) snprintf(hashname, MAXNAMELEN, "vsw_unicst_table-%d",
549 	    vswp->instance);
550 	D2(vswp, "creating unicast hash table (%s)...", hashname);
551 	vswp->fdb_nchains = vsw_fdb_nchains;
552 	vswp->fdb_hashp = mod_hash_create_ptrhash(hashname, vswp->fdb_nchains,
553 	    mod_hash_null_valdtor, sizeof (void *));
554 	vsw_create_vlans((void *)vswp, VSW_LOCALDEV);
555 	progress |= PROG_fdb;
556 
557 	/* setup the multicast fowarding database */
558 	(void) snprintf(hashname, MAXNAMELEN, "vsw_mcst_table-%d",
559 	    vswp->instance);
560 	D2(vswp, "creating multicast hash table %s)...", hashname);
561 	vswp->mfdb = mod_hash_create_ptrhash(hashname, vsw_fdb_nchains,
562 	    mod_hash_null_valdtor, sizeof (void *));
563 
564 	progress |= PROG_mfdb;
565 
566 	/*
567 	 * Create the taskq which will process all the VIO
568 	 * control messages.
569 	 */
570 	(void) snprintf(qname, TASKQ_NAMELEN, "vsw_taskq%d", vswp->instance);
571 	if ((vswp->taskq_p = ddi_taskq_create(vswp->dip, qname, 1,
572 	    TASKQ_DEFAULTPRI, 0)) == NULL) {
573 		cmn_err(CE_WARN, "!vsw%d: Unable to create task queue",
574 		    vswp->instance);
575 		goto vsw_attach_fail;
576 	}
577 
578 	progress |= PROG_taskq;
579 
580 	/* prevent auto-detaching */
581 	if (ddi_prop_update_int(DDI_DEV_T_NONE, vswp->dip,
582 	    DDI_NO_AUTODETACH, 1) != DDI_SUCCESS) {
583 		cmn_err(CE_NOTE, "!Unable to set \"%s\" property for "
584 		    "instance %u", DDI_NO_AUTODETACH, instance);
585 	}
586 
587 	/*
588 	 * The null switching function is set to avoid panic until
589 	 * switch mode is setup.
590 	 */
591 	vswp->vsw_switch_frame = vsw_switch_frame_nop;
592 
593 	/*
594 	 * Setup the required switching mode,
595 	 * based on the mdprops that we read earlier.
596 	 * schedule a short timeout (0.1 sec) for the first time
597 	 * setup and avoid calling mac_open() directly here,
598 	 * others are regular timeout 3 secs.
599 	 */
600 	mutex_enter(&vswp->swtmout_lock);
601 
602 	vswp->swtmout_enabled = B_TRUE;
603 	vswp->swtmout_id = timeout(vsw_setup_switching_timeout, vswp,
604 	    drv_usectohz(vsw_setup_switching_boot_delay));
605 
606 	mutex_exit(&vswp->swtmout_lock);
607 
608 	progress |= PROG_swmode;
609 
610 	/* Register with mac layer as a provider */
611 	rv = vsw_mac_register(vswp);
612 	if (rv != 0)
613 		goto vsw_attach_fail;
614 
615 	progress |= PROG_macreg;
616 
617 	/*
618 	 * Now we have everything setup, register an interest in
619 	 * specific MD nodes.
620 	 *
621 	 * The callback is invoked in 2 cases, firstly if upon mdeg
622 	 * registration there are existing nodes which match our specified
623 	 * criteria, and secondly if the MD is changed (and again, there
624 	 * are nodes which we are interested in present within it. Note
625 	 * that our callback will be invoked even if our specified nodes
626 	 * have not actually changed).
627 	 *
628 	 */
629 	rv = vsw_mdeg_register(vswp);
630 	if (rv != 0)
631 		goto vsw_attach_fail;
632 
633 	progress |= PROG_mdreg;
634 
635 	WRITE_ENTER(&vsw_rw);
636 	vswp->next = vsw_head;
637 	vsw_head = vswp;
638 	RW_EXIT(&vsw_rw);
639 
640 	ddi_report_dev(vswp->dip);
641 	return (DDI_SUCCESS);
642 
643 vsw_attach_fail:
644 	DERR(NULL, "vsw_attach: failed");
645 
646 	if (progress & PROG_mdreg) {
647 		vsw_mdeg_unregister(vswp);
648 		(void) vsw_detach_ports(vswp);
649 	}
650 
651 	if (progress & PROG_macreg)
652 		(void) vsw_mac_unregister(vswp);
653 
654 	if (progress & PROG_swmode) {
655 		vsw_stop_switching_timeout(vswp);
656 		vsw_hio_cleanup(vswp);
657 		WRITE_ENTER(&vswp->mac_rwlock);
658 		vsw_mac_detach(vswp);
659 		vsw_mac_close(vswp);
660 		RW_EXIT(&vswp->mac_rwlock);
661 	}
662 
663 	if (progress & PROG_taskq)
664 		ddi_taskq_destroy(vswp->taskq_p);
665 
666 	if (progress & PROG_mfdb)
667 		mod_hash_destroy_hash(vswp->mfdb);
668 
669 	if (progress & PROG_fdb) {
670 		vsw_destroy_vlans(vswp, VSW_LOCALDEV);
671 		mod_hash_destroy_hash(vswp->fdb_hashp);
672 	}
673 
674 	if (progress & PROG_readmd) {
675 		if (VSW_PRI_ETH_DEFINED(vswp)) {
676 			kmem_free(vswp->pri_types,
677 			    sizeof (uint16_t) * vswp->pri_num_types);
678 		}
679 		(void) vio_destroy_mblks(vswp->pri_tx_vmp);
680 	}
681 
682 	if (progress & PROG_locks) {
683 		rw_destroy(&vswp->plist.lockrw);
684 		rw_destroy(&vswp->mfdbrw);
685 		rw_destroy(&vswp->mac_rwlock);
686 		rw_destroy(&vswp->if_lockrw);
687 		mutex_destroy(&vswp->swtmout_lock);
688 		mutex_destroy(&vswp->mca_lock);
689 		mutex_destroy(&vswp->hw_lock);
690 	}
691 
692 	ddi_soft_state_free(vsw_state, instance);
693 	return (DDI_FAILURE);
694 }
695 
696 static int
697 vsw_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
698 {
699 	vio_mblk_pool_t		*poolp, *npoolp;
700 	vsw_t			**vswpp, *vswp;
701 	int 			instance;
702 
703 	instance = ddi_get_instance(dip);
704 	vswp = ddi_get_soft_state(vsw_state, instance);
705 
706 	if (vswp == NULL) {
707 		return (DDI_FAILURE);
708 	}
709 
710 	switch (cmd) {
711 	case DDI_DETACH:
712 		break;
713 	case DDI_SUSPEND:
714 	case DDI_PM_SUSPEND:
715 	default:
716 		return (DDI_FAILURE);
717 	}
718 
719 	D2(vswp, "detaching instance %d", instance);
720 
721 	/* Stop any pending timeout to setup switching mode. */
722 	vsw_stop_switching_timeout(vswp);
723 
724 	if (vswp->if_state & VSW_IF_REG) {
725 		if (vsw_mac_unregister(vswp) != 0) {
726 			cmn_err(CE_WARN, "!vsw%d: Unable to detach from "
727 			    "MAC layer", vswp->instance);
728 			return (DDI_FAILURE);
729 		}
730 	}
731 
732 	vsw_mdeg_unregister(vswp);
733 
734 	/* remove mac layer callback */
735 	WRITE_ENTER(&vswp->mac_rwlock);
736 	if ((vswp->mh != NULL) && (vswp->mrh != NULL)) {
737 		mac_rx_remove(vswp->mh, vswp->mrh, B_TRUE);
738 		vswp->mrh = NULL;
739 	}
740 	RW_EXIT(&vswp->mac_rwlock);
741 
742 	if (vsw_detach_ports(vswp) != 0) {
743 		cmn_err(CE_WARN, "!vsw%d: Unable to unconfigure ports",
744 		    vswp->instance);
745 		return (DDI_FAILURE);
746 	}
747 
748 	rw_destroy(&vswp->if_lockrw);
749 
750 	/* cleanup HybridIO */
751 	vsw_hio_cleanup(vswp);
752 
753 	mutex_destroy(&vswp->hw_lock);
754 
755 	/*
756 	 * Now that the ports have been deleted, stop and close
757 	 * the physical device.
758 	 */
759 	WRITE_ENTER(&vswp->mac_rwlock);
760 
761 	vsw_mac_detach(vswp);
762 	vsw_mac_close(vswp);
763 
764 	RW_EXIT(&vswp->mac_rwlock);
765 
766 	rw_destroy(&vswp->mac_rwlock);
767 	mutex_destroy(&vswp->swtmout_lock);
768 
769 	/*
770 	 * Destroy any free pools that may still exist.
771 	 */
772 	poolp = vswp->rxh;
773 	while (poolp != NULL) {
774 		npoolp = vswp->rxh = poolp->nextp;
775 		if (vio_destroy_mblks(poolp) != 0) {
776 			vswp->rxh = poolp;
777 			return (DDI_FAILURE);
778 		}
779 		poolp = npoolp;
780 	}
781 
782 	/*
783 	 * Remove this instance from any entries it may be on in
784 	 * the hash table by using the list of addresses maintained
785 	 * in the vsw_t structure.
786 	 */
787 	vsw_del_mcst_vsw(vswp);
788 
789 	vswp->mcap = NULL;
790 	mutex_destroy(&vswp->mca_lock);
791 
792 	/*
793 	 * By now any pending tasks have finished and the underlying
794 	 * ldc's have been destroyed, so its safe to delete the control
795 	 * message taskq.
796 	 */
797 	if (vswp->taskq_p != NULL)
798 		ddi_taskq_destroy(vswp->taskq_p);
799 
800 	/*
801 	 * At this stage all the data pointers in the hash table
802 	 * should be NULL, as all the ports have been removed and will
803 	 * have deleted themselves from the port lists which the data
804 	 * pointers point to. Hence we can destroy the table using the
805 	 * default destructors.
806 	 */
807 	D2(vswp, "vsw_detach: destroying hash tables..");
808 	vsw_destroy_vlans(vswp, VSW_LOCALDEV);
809 	mod_hash_destroy_hash(vswp->fdb_hashp);
810 	vswp->fdb_hashp = NULL;
811 
812 	WRITE_ENTER(&vswp->mfdbrw);
813 	mod_hash_destroy_hash(vswp->mfdb);
814 	vswp->mfdb = NULL;
815 	RW_EXIT(&vswp->mfdbrw);
816 	rw_destroy(&vswp->mfdbrw);
817 
818 	/* free pri_types table */
819 	if (VSW_PRI_ETH_DEFINED(vswp)) {
820 		kmem_free(vswp->pri_types,
821 		    sizeof (uint16_t) * vswp->pri_num_types);
822 		(void) vio_destroy_mblks(vswp->pri_tx_vmp);
823 	}
824 
825 	ddi_remove_minor_node(dip, NULL);
826 
827 	rw_destroy(&vswp->plist.lockrw);
828 	WRITE_ENTER(&vsw_rw);
829 	for (vswpp = &vsw_head; *vswpp; vswpp = &(*vswpp)->next) {
830 		if (*vswpp == vswp) {
831 			*vswpp = vswp->next;
832 			break;
833 		}
834 	}
835 	RW_EXIT(&vsw_rw);
836 	ddi_soft_state_free(vsw_state, instance);
837 
838 	return (DDI_SUCCESS);
839 }
840 
841 static int
842 vsw_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
843 {
844 	_NOTE(ARGUNUSED(dip))
845 
846 	vsw_t	*vswp = NULL;
847 	dev_t	dev = (dev_t)arg;
848 	int	instance;
849 
850 	instance = getminor(dev);
851 
852 	switch (infocmd) {
853 	case DDI_INFO_DEVT2DEVINFO:
854 		if ((vswp = ddi_get_soft_state(vsw_state, instance)) == NULL) {
855 			*result = NULL;
856 			return (DDI_FAILURE);
857 		}
858 		*result = vswp->dip;
859 		return (DDI_SUCCESS);
860 
861 	case DDI_INFO_DEVT2INSTANCE:
862 		*result = (void *)(uintptr_t)instance;
863 		return (DDI_SUCCESS);
864 
865 	default:
866 		*result = NULL;
867 		return (DDI_FAILURE);
868 	}
869 }
870 
871 /*
872  * Get the value of the "vsw-phys-dev" property in the specified
873  * node. This property is the name of the physical device that
874  * the virtual switch will use to talk to the outside world.
875  *
876  * Note it is valid for this property to be NULL (but the property
877  * itself must exist). Callers of this routine should verify that
878  * the value returned is what they expected (i.e. either NULL or non NULL).
879  *
880  * On success returns value of the property in region pointed to by
881  * the 'name' argument, and with return value of 0. Otherwise returns 1.
882  */
883 static int
884 vsw_get_md_physname(vsw_t *vswp, md_t *mdp, mde_cookie_t node, char *name)
885 {
886 	int		len = 0;
887 	int		instance;
888 	char		*physname = NULL;
889 	char		*dev;
890 	const char	*dev_name;
891 	char		myname[MAXNAMELEN];
892 
893 	dev_name = ddi_driver_name(vswp->dip);
894 	instance = ddi_get_instance(vswp->dip);
895 	(void) snprintf(myname, MAXNAMELEN, "%s%d", dev_name, instance);
896 
897 	if (md_get_prop_data(mdp, node, physdev_propname,
898 	    (uint8_t **)(&physname), &len) != 0) {
899 		cmn_err(CE_WARN, "!vsw%d: Unable to get name(s) of physical "
900 		    "device(s) from MD", vswp->instance);
901 		return (1);
902 	} else if ((strlen(physname) + 1) > LIFNAMSIZ) {
903 		cmn_err(CE_WARN, "!vsw%d: %s is too long a device name",
904 		    vswp->instance, physname);
905 		return (1);
906 	} else if (strcmp(myname, physname) == 0) {
907 		/*
908 		 * Prevent the vswitch from opening itself as the
909 		 * network device.
910 		 */
911 		cmn_err(CE_WARN, "!vsw%d: %s is an invalid device name",
912 		    vswp->instance, physname);
913 		return (1);
914 	} else {
915 		(void) strncpy(name, physname, strlen(physname) + 1);
916 		D2(vswp, "%s: using first device specified (%s)",
917 		    __func__, physname);
918 	}
919 
920 #ifdef DEBUG
921 	/*
922 	 * As a temporary measure to aid testing we check to see if there
923 	 * is a vsw.conf file present. If there is we use the value of the
924 	 * vsw_physname property in the file as the name of the physical
925 	 * device, overriding the value from the MD.
926 	 *
927 	 * There may be multiple devices listed, but for the moment
928 	 * we just use the first one.
929 	 */
930 	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vswp->dip, 0,
931 	    "vsw_physname", &dev) == DDI_PROP_SUCCESS) {
932 		if ((strlen(dev) + 1) > LIFNAMSIZ) {
933 			cmn_err(CE_WARN, "vsw%d: %s is too long a device name",
934 			    vswp->instance, dev);
935 			ddi_prop_free(dev);
936 			return (1);
937 		} else {
938 			cmn_err(CE_NOTE, "vsw%d: Using device name (%s) from "
939 			    "config file", vswp->instance, dev);
940 
941 			(void) strncpy(name, dev, strlen(dev) + 1);
942 		}
943 
944 		ddi_prop_free(dev);
945 	}
946 #endif
947 
948 	return (0);
949 }
950 
951 /*
952  * Read the 'vsw-switch-mode' property from the specified MD node.
953  *
954  * Returns 0 on success and the number of modes found in 'found',
955  * otherwise returns 1.
956  */
957 static int
958 vsw_get_md_smodes(vsw_t *vswp, md_t *mdp, mde_cookie_t node,
959 						uint8_t *modes, int *found)
960 {
961 	int		len = 0;
962 	int		smode_num = 0;
963 	char		*smode = NULL;
964 	char		*curr_mode = NULL;
965 
966 	D1(vswp, "%s: enter", __func__);
967 
968 	/*
969 	 * Get the switch-mode property. The modes are listed in
970 	 * decreasing order of preference, i.e. prefered mode is
971 	 * first item in list.
972 	 */
973 	len = 0;
974 	smode_num = 0;
975 	if (md_get_prop_data(mdp, node, smode_propname,
976 	    (uint8_t **)(&smode), &len) != 0) {
977 		/*
978 		 * Unable to get switch-mode property from MD, nothing
979 		 * more we can do.
980 		 */
981 		cmn_err(CE_WARN, "!vsw%d: Unable to get switch mode property"
982 		    " from the MD", vswp->instance);
983 		*found = 0;
984 		return (1);
985 	}
986 
987 	curr_mode = smode;
988 	/*
989 	 * Modes of operation:
990 	 * 'switched'	 - layer 2 switching, underlying HW in
991 	 *			programmed mode.
992 	 * 'promiscuous' - layer 2 switching, underlying HW in
993 	 *			promiscuous mode.
994 	 * 'routed'	 - layer 3 (i.e. IP) routing, underlying HW
995 	 *			in non-promiscuous mode.
996 	 */
997 	while ((curr_mode < (smode + len)) && (smode_num < NUM_SMODES)) {
998 		D2(vswp, "%s: curr_mode = [%s]", __func__, curr_mode);
999 		if (strcmp(curr_mode, "switched") == 0) {
1000 			modes[smode_num++] = VSW_LAYER2;
1001 		} else if (strcmp(curr_mode, "promiscuous") == 0) {
1002 			modes[smode_num++] = VSW_LAYER2_PROMISC;
1003 		} else if (strcmp(curr_mode, "routed") == 0) {
1004 			modes[smode_num++] = VSW_LAYER3;
1005 		} else {
1006 			DWARN(vswp, "%s: Unknown switch mode %s, "
1007 			    "setting to default 'switched' mode",
1008 			    __func__, curr_mode);
1009 			modes[smode_num++] = VSW_LAYER2;
1010 		}
1011 		curr_mode += strlen(curr_mode) + 1;
1012 	}
1013 	*found = smode_num;
1014 
1015 	D2(vswp, "%s: %d modes found", __func__, smode_num);
1016 
1017 	D1(vswp, "%s: exit", __func__);
1018 
1019 	return (0);
1020 }
1021 
1022 /*
1023  * Register with the MAC layer as a network device, so we
1024  * can be plumbed if necessary.
1025  */
1026 static int
1027 vsw_mac_register(vsw_t *vswp)
1028 {
1029 	mac_register_t	*macp;
1030 	int		rv;
1031 
1032 	D1(vswp, "%s: enter", __func__);
1033 
1034 	if ((macp = mac_alloc(MAC_VERSION)) == NULL)
1035 		return (EINVAL);
1036 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1037 	macp->m_driver = vswp;
1038 	macp->m_dip = vswp->dip;
1039 	macp->m_src_addr = (uint8_t *)&vswp->if_addr;
1040 	macp->m_callbacks = &vsw_m_callbacks;
1041 	macp->m_min_sdu = 0;
1042 	macp->m_max_sdu = vsw_ethermtu;
1043 	macp->m_margin = VLAN_TAGSZ;
1044 	rv = mac_register(macp, &vswp->if_mh);
1045 	mac_free(macp);
1046 	if (rv != 0) {
1047 		/*
1048 		 * Treat this as a non-fatal error as we may be
1049 		 * able to operate in some other mode.
1050 		 */
1051 		cmn_err(CE_NOTE, "!vsw%d: Unable to register as "
1052 		    "a provider with MAC layer", vswp->instance);
1053 		return (rv);
1054 	}
1055 
1056 	vswp->if_state |= VSW_IF_REG;
1057 
1058 	vswp->max_frame_size = vsw_ethermtu + sizeof (struct ether_header)
1059 	    + VLAN_TAGSZ;
1060 
1061 	D1(vswp, "%s: exit", __func__);
1062 
1063 	return (rv);
1064 }
1065 
1066 static int
1067 vsw_mac_unregister(vsw_t *vswp)
1068 {
1069 	int		rv = 0;
1070 
1071 	D1(vswp, "%s: enter", __func__);
1072 
1073 	WRITE_ENTER(&vswp->if_lockrw);
1074 
1075 	if (vswp->if_state & VSW_IF_REG) {
1076 		rv = mac_unregister(vswp->if_mh);
1077 		if (rv != 0) {
1078 			DWARN(vswp, "%s: unable to unregister from MAC "
1079 			    "framework", __func__);
1080 
1081 			RW_EXIT(&vswp->if_lockrw);
1082 			D1(vswp, "%s: fail exit", __func__);
1083 			return (rv);
1084 		}
1085 
1086 		/* mark i/f as down and unregistered */
1087 		vswp->if_state &= ~(VSW_IF_UP | VSW_IF_REG);
1088 	}
1089 	RW_EXIT(&vswp->if_lockrw);
1090 
1091 	D1(vswp, "%s: exit", __func__);
1092 
1093 	return (rv);
1094 }
1095 
1096 static int
1097 vsw_m_stat(void *arg, uint_t stat, uint64_t *val)
1098 {
1099 	vsw_t			*vswp = (vsw_t *)arg;
1100 
1101 	D1(vswp, "%s: enter", __func__);
1102 
1103 	WRITE_ENTER(&vswp->mac_rwlock);
1104 	if (vswp->mh == NULL) {
1105 		RW_EXIT(&vswp->mac_rwlock);
1106 		return (EINVAL);
1107 	}
1108 
1109 	/* return stats from underlying device */
1110 	*val = mac_stat_get(vswp->mh, stat);
1111 
1112 	RW_EXIT(&vswp->mac_rwlock);
1113 
1114 	return (0);
1115 }
1116 
1117 static void
1118 vsw_m_stop(void *arg)
1119 {
1120 	vsw_t		*vswp = (vsw_t *)arg;
1121 
1122 	D1(vswp, "%s: enter", __func__);
1123 
1124 	WRITE_ENTER(&vswp->if_lockrw);
1125 	vswp->if_state &= ~VSW_IF_UP;
1126 	RW_EXIT(&vswp->if_lockrw);
1127 
1128 	mutex_enter(&vswp->hw_lock);
1129 
1130 	(void) vsw_unset_hw(vswp, NULL, VSW_LOCALDEV);
1131 
1132 	if (vswp->recfg_reqd)
1133 		vsw_reconfig_hw(vswp);
1134 
1135 	mutex_exit(&vswp->hw_lock);
1136 
1137 	D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state);
1138 }
1139 
1140 static int
1141 vsw_m_start(void *arg)
1142 {
1143 	vsw_t		*vswp = (vsw_t *)arg;
1144 
1145 	D1(vswp, "%s: enter", __func__);
1146 
1147 	WRITE_ENTER(&vswp->if_lockrw);
1148 
1149 	vswp->if_state |= VSW_IF_UP;
1150 
1151 	if (vswp->switching_setup_done == B_FALSE) {
1152 		/*
1153 		 * If the switching mode has not been setup yet, just
1154 		 * return. The unicast address will be programmed
1155 		 * after the physical device is successfully setup by the
1156 		 * timeout handler.
1157 		 */
1158 		RW_EXIT(&vswp->if_lockrw);
1159 		return (0);
1160 	}
1161 
1162 	/* if in layer2 mode, program unicast address. */
1163 	if (vswp->mh != NULL) {
1164 		mutex_enter(&vswp->hw_lock);
1165 		(void) vsw_set_hw(vswp, NULL, VSW_LOCALDEV);
1166 		mutex_exit(&vswp->hw_lock);
1167 	}
1168 
1169 	RW_EXIT(&vswp->if_lockrw);
1170 
1171 	D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state);
1172 	return (0);
1173 }
1174 
1175 /*
1176  * Change the local interface address.
1177  *
1178  * Note: we don't support this entry point. The local
1179  * mac address of the switch can only be changed via its
1180  * MD node properties.
1181  */
1182 static int
1183 vsw_m_unicst(void *arg, const uint8_t *macaddr)
1184 {
1185 	_NOTE(ARGUNUSED(arg, macaddr))
1186 
1187 	return (DDI_FAILURE);
1188 }
1189 
1190 static int
1191 vsw_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
1192 {
1193 	vsw_t		*vswp = (vsw_t *)arg;
1194 	mcst_addr_t	*mcst_p = NULL;
1195 	uint64_t	addr = 0x0;
1196 	int		i, ret = 0;
1197 
1198 	D1(vswp, "%s: enter", __func__);
1199 
1200 	/*
1201 	 * Convert address into form that can be used
1202 	 * as hash table key.
1203 	 */
1204 	for (i = 0; i < ETHERADDRL; i++) {
1205 		addr = (addr << 8) | mca[i];
1206 	}
1207 
1208 	D2(vswp, "%s: addr = 0x%llx", __func__, addr);
1209 
1210 	if (add) {
1211 		D2(vswp, "%s: adding multicast", __func__);
1212 		if (vsw_add_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) {
1213 			/*
1214 			 * Update the list of multicast addresses
1215 			 * contained within the vsw_t structure to
1216 			 * include this new one.
1217 			 */
1218 			mcst_p = kmem_zalloc(sizeof (mcst_addr_t), KM_NOSLEEP);
1219 			if (mcst_p == NULL) {
1220 				DERR(vswp, "%s unable to alloc mem", __func__);
1221 				(void) vsw_del_mcst(vswp,
1222 				    VSW_LOCALDEV, addr, NULL);
1223 				return (1);
1224 			}
1225 			mcst_p->addr = addr;
1226 			ether_copy(mca, &mcst_p->mca);
1227 
1228 			/*
1229 			 * Call into the underlying driver to program the
1230 			 * address into HW.
1231 			 */
1232 			WRITE_ENTER(&vswp->mac_rwlock);
1233 			if (vswp->mh != NULL) {
1234 				ret = mac_multicst_add(vswp->mh, mca);
1235 				if (ret != 0) {
1236 					cmn_err(CE_NOTE, "!vsw%d: unable to "
1237 					    "add multicast address",
1238 					    vswp->instance);
1239 					RW_EXIT(&vswp->mac_rwlock);
1240 					(void) vsw_del_mcst(vswp,
1241 					    VSW_LOCALDEV, addr, NULL);
1242 					kmem_free(mcst_p, sizeof (*mcst_p));
1243 					return (ret);
1244 				}
1245 				mcst_p->mac_added = B_TRUE;
1246 			}
1247 			RW_EXIT(&vswp->mac_rwlock);
1248 
1249 			mutex_enter(&vswp->mca_lock);
1250 			mcst_p->nextp = vswp->mcap;
1251 			vswp->mcap = mcst_p;
1252 			mutex_exit(&vswp->mca_lock);
1253 		} else {
1254 			cmn_err(CE_NOTE, "!vsw%d: unable to add multicast "
1255 			    "address", vswp->instance);
1256 		}
1257 		return (ret);
1258 	}
1259 
1260 	D2(vswp, "%s: removing multicast", __func__);
1261 	/*
1262 	 * Remove the address from the hash table..
1263 	 */
1264 	if (vsw_del_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) {
1265 
1266 		/*
1267 		 * ..and then from the list maintained in the
1268 		 * vsw_t structure.
1269 		 */
1270 		mcst_p = vsw_del_addr(VSW_LOCALDEV, vswp, addr);
1271 		ASSERT(mcst_p != NULL);
1272 
1273 		WRITE_ENTER(&vswp->mac_rwlock);
1274 		if (vswp->mh != NULL && mcst_p->mac_added) {
1275 			(void) mac_multicst_remove(vswp->mh, mca);
1276 			mcst_p->mac_added = B_FALSE;
1277 		}
1278 		RW_EXIT(&vswp->mac_rwlock);
1279 		kmem_free(mcst_p, sizeof (*mcst_p));
1280 	}
1281 
1282 	D1(vswp, "%s: exit", __func__);
1283 
1284 	return (0);
1285 }
1286 
1287 static int
1288 vsw_m_promisc(void *arg, boolean_t on)
1289 {
1290 	vsw_t		*vswp = (vsw_t *)arg;
1291 
1292 	D1(vswp, "%s: enter", __func__);
1293 
1294 	WRITE_ENTER(&vswp->if_lockrw);
1295 	if (on)
1296 		vswp->if_state |= VSW_IF_PROMISC;
1297 	else
1298 		vswp->if_state &= ~VSW_IF_PROMISC;
1299 	RW_EXIT(&vswp->if_lockrw);
1300 
1301 	D1(vswp, "%s: exit", __func__);
1302 
1303 	return (0);
1304 }
1305 
1306 static mblk_t *
1307 vsw_m_tx(void *arg, mblk_t *mp)
1308 {
1309 	vsw_t		*vswp = (vsw_t *)arg;
1310 
1311 	D1(vswp, "%s: enter", __func__);
1312 
1313 	mp = vsw_vlan_frame_pretag(vswp, VSW_LOCALDEV, mp);
1314 
1315 	if (mp == NULL) {
1316 		return (NULL);
1317 	}
1318 
1319 	vswp->vsw_switch_frame(vswp, mp, VSW_LOCALDEV, NULL, NULL);
1320 
1321 	D1(vswp, "%s: exit", __func__);
1322 
1323 	return (NULL);
1324 }
1325 
1326 /*
1327  * Register for machine description (MD) updates.
1328  *
1329  * Returns 0 on success, 1 on failure.
1330  */
1331 static int
1332 vsw_mdeg_register(vsw_t *vswp)
1333 {
1334 	mdeg_prop_spec_t	*pspecp;
1335 	mdeg_node_spec_t	*inst_specp;
1336 	mdeg_handle_t		mdeg_hdl, mdeg_port_hdl;
1337 	size_t			templatesz;
1338 	int			rv;
1339 
1340 	D1(vswp, "%s: enter", __func__);
1341 
1342 	/*
1343 	 * Allocate and initialize a per-instance copy
1344 	 * of the global property spec array that will
1345 	 * uniquely identify this vsw instance.
1346 	 */
1347 	templatesz = sizeof (vsw_prop_template);
1348 	pspecp = kmem_zalloc(templatesz, KM_SLEEP);
1349 
1350 	bcopy(vsw_prop_template, pspecp, templatesz);
1351 
1352 	VSW_SET_MDEG_PROP_INST(pspecp, vswp->regprop);
1353 
1354 	/* initialize the complete prop spec structure */
1355 	inst_specp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP);
1356 	inst_specp->namep = "virtual-device";
1357 	inst_specp->specp = pspecp;
1358 
1359 	D2(vswp, "%s: instance %d registering with mdeg", __func__,
1360 	    vswp->regprop);
1361 	/*
1362 	 * Register an interest in 'virtual-device' nodes with a
1363 	 * 'name' property of 'virtual-network-switch'
1364 	 */
1365 	rv = mdeg_register(inst_specp, &vdev_match, vsw_mdeg_cb,
1366 	    (void *)vswp, &mdeg_hdl);
1367 	if (rv != MDEG_SUCCESS) {
1368 		DERR(vswp, "%s: mdeg_register failed (%d) for vsw node",
1369 		    __func__, rv);
1370 		goto mdeg_reg_fail;
1371 	}
1372 
1373 	/*
1374 	 * Register an interest in 'vsw-port' nodes.
1375 	 */
1376 	rv = mdeg_register(inst_specp, &vport_match, vsw_port_mdeg_cb,
1377 	    (void *)vswp, &mdeg_port_hdl);
1378 	if (rv != MDEG_SUCCESS) {
1379 		DERR(vswp, "%s: mdeg_register failed (%d)\n", __func__, rv);
1380 		(void) mdeg_unregister(mdeg_hdl);
1381 		goto mdeg_reg_fail;
1382 	}
1383 
1384 	/* save off data that will be needed later */
1385 	vswp->inst_spec = inst_specp;
1386 	vswp->mdeg_hdl = mdeg_hdl;
1387 	vswp->mdeg_port_hdl = mdeg_port_hdl;
1388 
1389 	D1(vswp, "%s: exit", __func__);
1390 	return (0);
1391 
1392 mdeg_reg_fail:
1393 	cmn_err(CE_WARN, "!vsw%d: Unable to register MDEG callbacks",
1394 	    vswp->instance);
1395 	kmem_free(pspecp, templatesz);
1396 	kmem_free(inst_specp, sizeof (mdeg_node_spec_t));
1397 
1398 	vswp->mdeg_hdl = NULL;
1399 	vswp->mdeg_port_hdl = NULL;
1400 
1401 	return (1);
1402 }
1403 
1404 static void
1405 vsw_mdeg_unregister(vsw_t *vswp)
1406 {
1407 	D1(vswp, "vsw_mdeg_unregister: enter");
1408 
1409 	if (vswp->mdeg_hdl != NULL)
1410 		(void) mdeg_unregister(vswp->mdeg_hdl);
1411 
1412 	if (vswp->mdeg_port_hdl != NULL)
1413 		(void) mdeg_unregister(vswp->mdeg_port_hdl);
1414 
1415 	if (vswp->inst_spec != NULL) {
1416 		if (vswp->inst_spec->specp != NULL) {
1417 			(void) kmem_free(vswp->inst_spec->specp,
1418 			    sizeof (vsw_prop_template));
1419 			vswp->inst_spec->specp = NULL;
1420 		}
1421 
1422 		(void) kmem_free(vswp->inst_spec, sizeof (mdeg_node_spec_t));
1423 		vswp->inst_spec = NULL;
1424 	}
1425 
1426 	D1(vswp, "vsw_mdeg_unregister: exit");
1427 }
1428 
1429 /*
1430  * Mdeg callback invoked for the vsw node itself.
1431  */
1432 static int
1433 vsw_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1434 {
1435 	vsw_t		*vswp;
1436 	md_t		*mdp;
1437 	mde_cookie_t	node;
1438 	uint64_t	inst;
1439 	char		*node_name = NULL;
1440 
1441 	if (resp == NULL)
1442 		return (MDEG_FAILURE);
1443 
1444 	vswp = (vsw_t *)cb_argp;
1445 
1446 	D1(vswp, "%s: added %d : removed %d : curr matched %d"
1447 	    " : prev matched %d", __func__, resp->added.nelem,
1448 	    resp->removed.nelem, resp->match_curr.nelem,
1449 	    resp->match_prev.nelem);
1450 
1451 	/*
1452 	 * We get an initial callback for this node as 'added'
1453 	 * after registering with mdeg. Note that we would have
1454 	 * already gathered information about this vsw node by
1455 	 * walking MD earlier during attach (in vsw_read_mdprops()).
1456 	 * So, there is a window where the properties of this
1457 	 * node might have changed when we get this initial 'added'
1458 	 * callback. We handle this as if an update occured
1459 	 * and invoke the same function which handles updates to
1460 	 * the properties of this vsw-node if any.
1461 	 *
1462 	 * A non-zero 'match' value indicates that the MD has been
1463 	 * updated and that a virtual-network-switch node is
1464 	 * present which may or may not have been updated. It is
1465 	 * up to the clients to examine their own nodes and
1466 	 * determine if they have changed.
1467 	 */
1468 	if (resp->added.nelem != 0) {
1469 
1470 		if (resp->added.nelem != 1) {
1471 			cmn_err(CE_NOTE, "!vsw%d: number of nodes added "
1472 			    "invalid: %d\n", vswp->instance, resp->added.nelem);
1473 			return (MDEG_FAILURE);
1474 		}
1475 
1476 		mdp = resp->added.mdp;
1477 		node = resp->added.mdep[0];
1478 
1479 	} else if (resp->match_curr.nelem != 0) {
1480 
1481 		if (resp->match_curr.nelem != 1) {
1482 			cmn_err(CE_NOTE, "!vsw%d: number of nodes updated "
1483 			    "invalid: %d\n", vswp->instance,
1484 			    resp->match_curr.nelem);
1485 			return (MDEG_FAILURE);
1486 		}
1487 
1488 		mdp = resp->match_curr.mdp;
1489 		node = resp->match_curr.mdep[0];
1490 
1491 	} else {
1492 		return (MDEG_FAILURE);
1493 	}
1494 
1495 	/* Validate name and instance */
1496 	if (md_get_prop_str(mdp, node, "name", &node_name) != 0) {
1497 		DERR(vswp, "%s: unable to get node name\n",  __func__);
1498 		return (MDEG_FAILURE);
1499 	}
1500 
1501 	/* is this a virtual-network-switch? */
1502 	if (strcmp(node_name, vsw_propname) != 0) {
1503 		DERR(vswp, "%s: Invalid node name: %s\n",
1504 		    __func__, node_name);
1505 		return (MDEG_FAILURE);
1506 	}
1507 
1508 	if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) {
1509 		DERR(vswp, "%s: prop(cfg-handle) not found\n",
1510 		    __func__);
1511 		return (MDEG_FAILURE);
1512 	}
1513 
1514 	/* is this the right instance of vsw? */
1515 	if (inst != vswp->regprop) {
1516 		DERR(vswp, "%s: Invalid cfg-handle: %lx\n",
1517 		    __func__, inst);
1518 		return (MDEG_FAILURE);
1519 	}
1520 
1521 	vsw_update_md_prop(vswp, mdp, node);
1522 
1523 	return (MDEG_SUCCESS);
1524 }
1525 
1526 /*
1527  * Mdeg callback invoked for changes to the vsw-port nodes
1528  * under the vsw node.
1529  */
1530 static int
1531 vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1532 {
1533 	vsw_t		*vswp;
1534 	int		idx;
1535 	md_t		*mdp;
1536 	mde_cookie_t	node;
1537 	uint64_t	inst;
1538 	int		rv;
1539 
1540 	if ((resp == NULL) || (cb_argp == NULL))
1541 		return (MDEG_FAILURE);
1542 
1543 	vswp = (vsw_t *)cb_argp;
1544 
1545 	D2(vswp, "%s: added %d : removed %d : curr matched %d"
1546 	    " : prev matched %d", __func__, resp->added.nelem,
1547 	    resp->removed.nelem, resp->match_curr.nelem,
1548 	    resp->match_prev.nelem);
1549 
1550 	/* process added ports */
1551 	for (idx = 0; idx < resp->added.nelem; idx++) {
1552 		mdp = resp->added.mdp;
1553 		node = resp->added.mdep[idx];
1554 
1555 		D2(vswp, "%s: adding node(%d) 0x%lx", __func__, idx, node);
1556 
1557 		if ((rv = vsw_port_add(vswp, mdp, &node)) != 0) {
1558 			cmn_err(CE_WARN, "!vsw%d: Unable to add new port "
1559 			    "(0x%lx), err=%d", vswp->instance, node, rv);
1560 		}
1561 	}
1562 
1563 	/* process removed ports */
1564 	for (idx = 0; idx < resp->removed.nelem; idx++) {
1565 		mdp = resp->removed.mdp;
1566 		node = resp->removed.mdep[idx];
1567 
1568 		if (md_get_prop_val(mdp, node, id_propname, &inst)) {
1569 			DERR(vswp, "%s: prop(%s) not found in port(%d)",
1570 			    __func__, id_propname, idx);
1571 			continue;
1572 		}
1573 
1574 		D2(vswp, "%s: removing node(%d) 0x%lx", __func__, idx, node);
1575 
1576 		if (vsw_port_detach(vswp, inst) != 0) {
1577 			cmn_err(CE_WARN, "!vsw%d: Unable to remove port %ld",
1578 			    vswp->instance, inst);
1579 		}
1580 	}
1581 
1582 	for (idx = 0; idx < resp->match_curr.nelem; idx++) {
1583 		(void) vsw_port_update(vswp, resp->match_curr.mdp,
1584 		    resp->match_curr.mdep[idx],
1585 		    resp->match_prev.mdp,
1586 		    resp->match_prev.mdep[idx]);
1587 	}
1588 
1589 	D1(vswp, "%s: exit", __func__);
1590 
1591 	return (MDEG_SUCCESS);
1592 }
1593 
1594 /*
1595  * Scan the machine description for this instance of vsw
1596  * and read its properties. Called only from vsw_attach().
1597  * Returns: 0 on success, 1 on failure.
1598  */
1599 static int
1600 vsw_read_mdprops(vsw_t *vswp)
1601 {
1602 	md_t		*mdp = NULL;
1603 	mde_cookie_t	rootnode;
1604 	mde_cookie_t	*listp = NULL;
1605 	uint64_t	inst;
1606 	uint64_t	cfgh;
1607 	char		*name;
1608 	int		rv = 1;
1609 	int		num_nodes = 0;
1610 	int		num_devs = 0;
1611 	int		listsz = 0;
1612 	int		i;
1613 
1614 	/*
1615 	 * In each 'virtual-device' node in the MD there is a
1616 	 * 'cfg-handle' property which is the MD's concept of
1617 	 * an instance number (this may be completely different from
1618 	 * the device drivers instance #). OBP reads that value and
1619 	 * stores it in the 'reg' property of the appropriate node in
1620 	 * the device tree. We first read this reg property and use this
1621 	 * to compare against the 'cfg-handle' property of vsw nodes
1622 	 * in MD to get to this specific vsw instance and then read
1623 	 * other properties that we are interested in.
1624 	 * We also cache the value of 'reg' property and use it later
1625 	 * to register callbacks with mdeg (see vsw_mdeg_register())
1626 	 */
1627 	inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip,
1628 	    DDI_PROP_DONTPASS, reg_propname, -1);
1629 	if (inst == -1) {
1630 		cmn_err(CE_NOTE, "!vsw%d: Unable to read %s property from "
1631 		    "OBP device tree", vswp->instance, reg_propname);
1632 		return (rv);
1633 	}
1634 
1635 	vswp->regprop = inst;
1636 
1637 	if ((mdp = md_get_handle()) == NULL) {
1638 		DWARN(vswp, "%s: cannot init MD\n", __func__);
1639 		return (rv);
1640 	}
1641 
1642 	num_nodes = md_node_count(mdp);
1643 	ASSERT(num_nodes > 0);
1644 
1645 	listsz = num_nodes * sizeof (mde_cookie_t);
1646 	listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP);
1647 
1648 	rootnode = md_root_node(mdp);
1649 
1650 	/* search for all "virtual_device" nodes */
1651 	num_devs = md_scan_dag(mdp, rootnode,
1652 	    md_find_name(mdp, vdev_propname),
1653 	    md_find_name(mdp, "fwd"), listp);
1654 	if (num_devs <= 0) {
1655 		DWARN(vswp, "%s: invalid num_devs:%d\n", __func__, num_devs);
1656 		goto vsw_readmd_exit;
1657 	}
1658 
1659 	/*
1660 	 * Now loop through the list of virtual-devices looking for
1661 	 * devices with name "virtual-network-switch" and for each
1662 	 * such device compare its instance with what we have from
1663 	 * the 'reg' property to find the right node in MD and then
1664 	 * read all its properties.
1665 	 */
1666 	for (i = 0; i < num_devs; i++) {
1667 
1668 		if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) {
1669 			DWARN(vswp, "%s: name property not found\n",
1670 			    __func__);
1671 			goto vsw_readmd_exit;
1672 		}
1673 
1674 		/* is this a virtual-network-switch? */
1675 		if (strcmp(name, vsw_propname) != 0)
1676 			continue;
1677 
1678 		if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) {
1679 			DWARN(vswp, "%s: cfg-handle property not found\n",
1680 			    __func__);
1681 			goto vsw_readmd_exit;
1682 		}
1683 
1684 		/* is this the required instance of vsw? */
1685 		if (inst != cfgh)
1686 			continue;
1687 
1688 		/* now read all properties of this vsw instance */
1689 		rv = vsw_get_initial_md_properties(vswp, mdp, listp[i]);
1690 		break;
1691 	}
1692 
1693 vsw_readmd_exit:
1694 
1695 	kmem_free(listp, listsz);
1696 	(void) md_fini_handle(mdp);
1697 	return (rv);
1698 }
1699 
1700 /*
1701  * Read the initial start-of-day values from the specified MD node.
1702  */
1703 static int
1704 vsw_get_initial_md_properties(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
1705 {
1706 	int		i;
1707 	uint64_t 	macaddr = 0;
1708 
1709 	D1(vswp, "%s: enter", __func__);
1710 
1711 	if (vsw_get_md_physname(vswp, mdp, node, vswp->physname) != 0) {
1712 		return (1);
1713 	}
1714 
1715 	/* mac address for vswitch device itself */
1716 	if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) {
1717 		cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD",
1718 		    vswp->instance);
1719 		return (1);
1720 	}
1721 
1722 	vsw_save_lmacaddr(vswp, macaddr);
1723 
1724 	if (vsw_get_md_smodes(vswp, mdp, node, vswp->smode, &vswp->smode_num)) {
1725 		DWARN(vswp, "%s: Unable to read %s property from MD, "
1726 		    "defaulting to 'switched' mode",
1727 		    __func__, smode_propname);
1728 
1729 		for (i = 0; i < NUM_SMODES; i++)
1730 			vswp->smode[i] = VSW_LAYER2;
1731 
1732 		vswp->smode_num = NUM_SMODES;
1733 	} else {
1734 		ASSERT(vswp->smode_num != 0);
1735 	}
1736 
1737 	/* read vlan id properties of this vsw instance */
1738 	vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &vswp->pvid,
1739 	    &vswp->vids, &vswp->nvids, &vswp->default_vlan_id);
1740 
1741 	/* read priority-ether-types */
1742 	vsw_read_pri_eth_types(vswp, mdp, node);
1743 
1744 	D1(vswp, "%s: exit", __func__);
1745 	return (0);
1746 }
1747 
1748 /*
1749  * Read vlan id properties of the given MD node.
1750  * Arguments:
1751  *   arg:          device argument(vsw device or a port)
1752  *   type:         type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port)
1753  *   mdp:          machine description
1754  *   node:         md node cookie
1755  *
1756  * Returns:
1757  *   pvidp:        port-vlan-id of the node
1758  *   vidspp:       list of vlan-ids of the node
1759  *   nvidsp:       # of vlan-ids in the list
1760  *   default_idp:  default-vlan-id of the node(if node is vsw device)
1761  */
1762 static void
1763 vsw_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node,
1764 	uint16_t *pvidp, uint16_t **vidspp, uint16_t *nvidsp,
1765 	uint16_t *default_idp)
1766 {
1767 	vsw_t		*vswp;
1768 	vsw_port_t	*portp;
1769 	char		*pvid_propname;
1770 	char		*vid_propname;
1771 	uint_t		nvids = 0;
1772 	uint32_t	vids_size;
1773 	int		rv;
1774 	int		i;
1775 	uint64_t	*data;
1776 	uint64_t	val;
1777 	int		size;
1778 	int		inst;
1779 
1780 	if (type == VSW_LOCALDEV) {
1781 
1782 		vswp = (vsw_t *)arg;
1783 		pvid_propname = vsw_pvid_propname;
1784 		vid_propname = vsw_vid_propname;
1785 		inst = vswp->instance;
1786 
1787 	} else if (type == VSW_VNETPORT) {
1788 
1789 		portp = (vsw_port_t *)arg;
1790 		vswp = portp->p_vswp;
1791 		pvid_propname = port_pvid_propname;
1792 		vid_propname = port_vid_propname;
1793 		inst = portp->p_instance;
1794 
1795 	} else {
1796 		return;
1797 	}
1798 
1799 	if (type == VSW_LOCALDEV && default_idp != NULL) {
1800 		rv = md_get_prop_val(mdp, node, vsw_dvid_propname, &val);
1801 		if (rv != 0) {
1802 			DWARN(vswp, "%s: prop(%s) not found", __func__,
1803 			    vsw_dvid_propname);
1804 
1805 			*default_idp = vsw_default_vlan_id;
1806 		} else {
1807 			*default_idp = val & 0xFFF;
1808 			D2(vswp, "%s: %s(%d): (%d)\n", __func__,
1809 			    vsw_dvid_propname, inst, *default_idp);
1810 		}
1811 	}
1812 
1813 	rv = md_get_prop_val(mdp, node, pvid_propname, &val);
1814 	if (rv != 0) {
1815 		DWARN(vswp, "%s: prop(%s) not found", __func__, pvid_propname);
1816 		*pvidp = vsw_default_vlan_id;
1817 	} else {
1818 
1819 		*pvidp = val & 0xFFF;
1820 		D2(vswp, "%s: %s(%d): (%d)\n", __func__,
1821 		    pvid_propname, inst, *pvidp);
1822 	}
1823 
1824 	rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data,
1825 	    &size);
1826 	if (rv != 0) {
1827 		D2(vswp, "%s: prop(%s) not found", __func__, vid_propname);
1828 		size = 0;
1829 	} else {
1830 		size /= sizeof (uint64_t);
1831 	}
1832 	nvids = size;
1833 
1834 	if (nvids != 0) {
1835 		D2(vswp, "%s: %s(%d): ", __func__, vid_propname, inst);
1836 		vids_size = sizeof (uint16_t) * nvids;
1837 		*vidspp = kmem_zalloc(vids_size, KM_SLEEP);
1838 		for (i = 0; i < nvids; i++) {
1839 			(*vidspp)[i] = data[i] & 0xFFFF;
1840 			D2(vswp, " %d ", (*vidspp)[i]);
1841 		}
1842 		D2(vswp, "\n");
1843 	}
1844 
1845 	*nvidsp = nvids;
1846 }
1847 
1848 /*
1849  * This function reads "priority-ether-types" property from md. This property
1850  * is used to enable support for priority frames. Applications which need
1851  * guaranteed and timely delivery of certain high priority frames to/from
1852  * a vnet or vsw within ldoms, should configure this property by providing
1853  * the ether type(s) for which the priority facility is needed.
1854  * Normal data frames are delivered over a ldc channel using the descriptor
1855  * ring mechanism which is constrained by factors such as descriptor ring size,
1856  * the rate at which the ring is processed at the peer ldc end point, etc.
1857  * The priority mechanism provides an Out-Of-Band path to send/receive frames
1858  * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the
1859  * descriptor ring path and enables a more reliable and timely delivery of
1860  * frames to the peer.
1861  */
1862 static void
1863 vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
1864 {
1865 	int		rv;
1866 	uint16_t	*types;
1867 	uint64_t	*data;
1868 	int		size;
1869 	int		i;
1870 	size_t		mblk_sz;
1871 
1872 	rv = md_get_prop_data(mdp, node, pri_types_propname,
1873 	    (uint8_t **)&data, &size);
1874 	if (rv != 0) {
1875 		/*
1876 		 * Property may not exist if we are running pre-ldoms1.1 f/w.
1877 		 * Check if 'vsw_pri_eth_type' has been set in that case.
1878 		 */
1879 		if (vsw_pri_eth_type != 0) {
1880 			size = sizeof (vsw_pri_eth_type);
1881 			data = &vsw_pri_eth_type;
1882 		} else {
1883 			D3(vswp, "%s: prop(%s) not found", __func__,
1884 			    pri_types_propname);
1885 			size = 0;
1886 		}
1887 	}
1888 
1889 	if (size == 0) {
1890 		vswp->pri_num_types = 0;
1891 		return;
1892 	}
1893 
1894 	/*
1895 	 * we have some priority-ether-types defined;
1896 	 * allocate a table of these types and also
1897 	 * allocate a pool of mblks to transmit these
1898 	 * priority packets.
1899 	 */
1900 	size /= sizeof (uint64_t);
1901 	vswp->pri_num_types = size;
1902 	vswp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP);
1903 	for (i = 0, types = vswp->pri_types; i < size; i++) {
1904 		types[i] = data[i] & 0xFFFF;
1905 	}
1906 	mblk_sz = (VIO_PKT_DATA_HDRSIZE + ETHERMAX + 7) & ~7;
1907 	(void) vio_create_mblks(vsw_pri_tx_nmblks, mblk_sz, &vswp->pri_tx_vmp);
1908 }
1909 
1910 /*
1911  * Check to see if the relevant properties in the specified node have
1912  * changed, and if so take the appropriate action.
1913  *
1914  * If any of the properties are missing or invalid we don't take
1915  * any action, as this function should only be invoked when modifications
1916  * have been made to what we assume is a working configuration, which
1917  * we leave active.
1918  *
1919  * Note it is legal for this routine to be invoked even if none of the
1920  * properties in the port node within the MD have actually changed.
1921  */
1922 static void
1923 vsw_update_md_prop(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
1924 {
1925 	char		physname[LIFNAMSIZ];
1926 	char		drv[LIFNAMSIZ];
1927 	uint_t		ddi_instance;
1928 	uint8_t		new_smode[NUM_SMODES];
1929 	int		i, smode_num = 0;
1930 	uint64_t 	macaddr = 0;
1931 	enum		{MD_init = 0x1,
1932 				MD_physname = 0x2,
1933 				MD_macaddr = 0x4,
1934 				MD_smode = 0x8,
1935 				MD_vlans = 0x10} updated;
1936 	int		rv;
1937 	uint16_t	pvid;
1938 	uint16_t	*vids;
1939 	uint16_t	nvids;
1940 
1941 	updated = MD_init;
1942 
1943 	D1(vswp, "%s: enter", __func__);
1944 
1945 	/*
1946 	 * Check if name of physical device in MD has changed.
1947 	 */
1948 	if (vsw_get_md_physname(vswp, mdp, node, (char *)&physname) == 0) {
1949 		/*
1950 		 * Do basic sanity check on new device name/instance,
1951 		 * if its non NULL. It is valid for the device name to
1952 		 * have changed from a non NULL to a NULL value, i.e.
1953 		 * the vsw is being changed to 'routed' mode.
1954 		 */
1955 		if ((strlen(physname) != 0) &&
1956 		    (ddi_parse(physname, drv,
1957 		    &ddi_instance) != DDI_SUCCESS)) {
1958 			cmn_err(CE_WARN, "!vsw%d: physical device %s is not"
1959 			    " a valid device name/instance",
1960 			    vswp->instance, physname);
1961 			goto fail_reconf;
1962 		}
1963 
1964 		if (strcmp(physname, vswp->physname)) {
1965 			D2(vswp, "%s: device name changed from %s to %s",
1966 			    __func__, vswp->physname, physname);
1967 
1968 			updated |= MD_physname;
1969 		} else {
1970 			D2(vswp, "%s: device name unchanged at %s",
1971 			    __func__, vswp->physname);
1972 		}
1973 	} else {
1974 		cmn_err(CE_WARN, "!vsw%d: Unable to read name of physical "
1975 		    "device from updated MD.", vswp->instance);
1976 		goto fail_reconf;
1977 	}
1978 
1979 	/*
1980 	 * Check if MAC address has changed.
1981 	 */
1982 	if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) {
1983 		cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD",
1984 		    vswp->instance);
1985 		goto fail_reconf;
1986 	} else {
1987 		uint64_t maddr = macaddr;
1988 		READ_ENTER(&vswp->if_lockrw);
1989 		for (i = ETHERADDRL - 1; i >= 0; i--) {
1990 			if (vswp->if_addr.ether_addr_octet[i]
1991 			    != (macaddr & 0xFF)) {
1992 				D2(vswp, "%s: octet[%d] 0x%x != 0x%x",
1993 				    __func__, i,
1994 				    vswp->if_addr.ether_addr_octet[i],
1995 				    (macaddr & 0xFF));
1996 				updated |= MD_macaddr;
1997 				macaddr = maddr;
1998 				break;
1999 			}
2000 			macaddr >>= 8;
2001 		}
2002 		RW_EXIT(&vswp->if_lockrw);
2003 		if (updated & MD_macaddr) {
2004 			vsw_save_lmacaddr(vswp, macaddr);
2005 		}
2006 	}
2007 
2008 	/*
2009 	 * Check if switching modes have changed.
2010 	 */
2011 	if (vsw_get_md_smodes(vswp, mdp, node,
2012 	    new_smode, &smode_num)) {
2013 		cmn_err(CE_WARN, "!vsw%d: Unable to read %s property from MD",
2014 		    vswp->instance, smode_propname);
2015 		goto fail_reconf;
2016 	} else {
2017 		ASSERT(smode_num != 0);
2018 		if (smode_num != vswp->smode_num) {
2019 			D2(vswp, "%s: number of modes changed from %d to %d",
2020 			    __func__, vswp->smode_num, smode_num);
2021 		}
2022 
2023 		for (i = 0; i < smode_num; i++) {
2024 			if (new_smode[i] != vswp->smode[i]) {
2025 				D2(vswp, "%s: mode changed from %d to %d",
2026 				    __func__, vswp->smode[i], new_smode[i]);
2027 				updated |= MD_smode;
2028 				break;
2029 			}
2030 		}
2031 	}
2032 
2033 	/* Read the vlan ids */
2034 	vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &pvid, &vids,
2035 	    &nvids, NULL);
2036 
2037 	/* Determine if there are any vlan id updates */
2038 	if ((pvid != vswp->pvid) ||		/* pvid changed? */
2039 	    (nvids != vswp->nvids) ||		/* # of vids changed? */
2040 	    ((nvids != 0) && (vswp->nvids != 0) &&	/* vids changed? */
2041 	    bcmp(vids, vswp->vids, sizeof (uint16_t) * nvids))) {
2042 		updated |= MD_vlans;
2043 	}
2044 
2045 	/*
2046 	 * Now make any changes which are needed...
2047 	 */
2048 
2049 	if (updated & (MD_physname | MD_smode)) {
2050 
2051 		/*
2052 		 * Stop any pending timeout to setup switching mode.
2053 		 */
2054 		vsw_stop_switching_timeout(vswp);
2055 
2056 		/* Cleanup HybridIO */
2057 		vsw_hio_cleanup(vswp);
2058 
2059 		/*
2060 		 * Remove unicst, mcst addrs of vsw interface
2061 		 * and ports from the physdev.
2062 		 */
2063 		vsw_unset_addrs(vswp);
2064 
2065 		/*
2066 		 * Stop, detach and close the old device..
2067 		 */
2068 		WRITE_ENTER(&vswp->mac_rwlock);
2069 
2070 		vsw_mac_detach(vswp);
2071 		vsw_mac_close(vswp);
2072 
2073 		RW_EXIT(&vswp->mac_rwlock);
2074 
2075 		/*
2076 		 * Update phys name.
2077 		 */
2078 		if (updated & MD_physname) {
2079 			cmn_err(CE_NOTE, "!vsw%d: changing from %s to %s",
2080 			    vswp->instance, vswp->physname, physname);
2081 			(void) strncpy(vswp->physname,
2082 			    physname, strlen(physname) + 1);
2083 		}
2084 
2085 		/*
2086 		 * Update array with the new switch mode values.
2087 		 */
2088 		if (updated & MD_smode) {
2089 			for (i = 0; i < smode_num; i++)
2090 				vswp->smode[i] = new_smode[i];
2091 
2092 			vswp->smode_num = smode_num;
2093 			vswp->smode_idx = 0;
2094 		}
2095 
2096 		/*
2097 		 * ..and attach, start the new device.
2098 		 */
2099 		rv = vsw_setup_switching(vswp);
2100 		if (rv == EAGAIN) {
2101 			/*
2102 			 * Unable to setup switching mode.
2103 			 * As the error is EAGAIN, schedule a timeout to retry
2104 			 * and return. Programming addresses of ports and
2105 			 * vsw interface will be done when the timeout handler
2106 			 * completes successfully.
2107 			 */
2108 			mutex_enter(&vswp->swtmout_lock);
2109 
2110 			vswp->swtmout_enabled = B_TRUE;
2111 			vswp->swtmout_id =
2112 			    timeout(vsw_setup_switching_timeout, vswp,
2113 			    (vsw_setup_switching_delay *
2114 			    drv_usectohz(MICROSEC)));
2115 
2116 			mutex_exit(&vswp->swtmout_lock);
2117 
2118 			return;
2119 
2120 		} else if (rv) {
2121 			goto fail_update;
2122 		}
2123 
2124 		/*
2125 		 * program unicst, mcst addrs of vsw interface
2126 		 * and ports in the physdev.
2127 		 */
2128 		vsw_set_addrs(vswp);
2129 
2130 		/* Start HIO for ports that have already connected */
2131 		vsw_hio_start_ports(vswp);
2132 
2133 	} else if (updated & MD_macaddr) {
2134 		/*
2135 		 * We enter here if only MD_macaddr is exclusively updated.
2136 		 * If MD_physname and/or MD_smode are also updated, then
2137 		 * as part of that, we would have implicitly processed
2138 		 * MD_macaddr update (above).
2139 		 */
2140 		cmn_err(CE_NOTE, "!vsw%d: changing mac address to 0x%lx",
2141 		    vswp->instance, macaddr);
2142 
2143 		READ_ENTER(&vswp->if_lockrw);
2144 		if (vswp->if_state & VSW_IF_UP) {
2145 
2146 			mutex_enter(&vswp->hw_lock);
2147 			/*
2148 			 * Remove old mac address of vsw interface
2149 			 * from the physdev
2150 			 */
2151 			(void) vsw_unset_hw(vswp, NULL, VSW_LOCALDEV);
2152 			/*
2153 			 * Program new mac address of vsw interface
2154 			 * in the physdev
2155 			 */
2156 			rv = vsw_set_hw(vswp, NULL, VSW_LOCALDEV);
2157 			mutex_exit(&vswp->hw_lock);
2158 			if (rv != 0) {
2159 				cmn_err(CE_NOTE,
2160 				    "!vsw%d: failed to program interface "
2161 				    "unicast address\n", vswp->instance);
2162 			}
2163 			/*
2164 			 * Notify the MAC layer of the changed address.
2165 			 */
2166 			mac_unicst_update(vswp->if_mh,
2167 			    (uint8_t *)&vswp->if_addr);
2168 
2169 		}
2170 		RW_EXIT(&vswp->if_lockrw);
2171 
2172 	}
2173 
2174 	if (updated & MD_vlans) {
2175 		/* Remove existing vlan ids from the hash table. */
2176 		vsw_vlan_remove_ids(vswp, VSW_LOCALDEV);
2177 
2178 		/* save the new vlan ids */
2179 		vswp->pvid = pvid;
2180 		if (vswp->nvids != 0) {
2181 			kmem_free(vswp->vids, sizeof (uint16_t) * vswp->nvids);
2182 			vswp->nvids = 0;
2183 		}
2184 		if (nvids != 0) {
2185 			vswp->nvids = nvids;
2186 			vswp->vids = vids;
2187 		}
2188 
2189 		/* add these new vlan ids into hash table */
2190 		vsw_vlan_add_ids(vswp, VSW_LOCALDEV);
2191 	} else {
2192 		if (nvids != 0) {
2193 			kmem_free(vids, sizeof (uint16_t) * nvids);
2194 		}
2195 	}
2196 
2197 	return;
2198 
2199 fail_reconf:
2200 	cmn_err(CE_WARN, "!vsw%d: configuration unchanged", vswp->instance);
2201 	return;
2202 
2203 fail_update:
2204 	cmn_err(CE_WARN, "!vsw%d: re-configuration failed",
2205 	    vswp->instance);
2206 }
2207 
2208 /*
2209  * Read the port's md properties.
2210  */
2211 static int
2212 vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp,
2213 	md_t *mdp, mde_cookie_t *node)
2214 {
2215 	uint64_t		ldc_id;
2216 	uint8_t			*addrp;
2217 	int			i, addrsz;
2218 	int			num_nodes = 0, nchan = 0;
2219 	int			listsz = 0;
2220 	mde_cookie_t		*listp = NULL;
2221 	struct ether_addr	ea;
2222 	uint64_t		macaddr;
2223 	uint64_t		inst = 0;
2224 	uint64_t		val;
2225 
2226 	if (md_get_prop_val(mdp, *node, id_propname, &inst)) {
2227 		DWARN(vswp, "%s: prop(%s) not found", __func__,
2228 		    id_propname);
2229 		return (1);
2230 	}
2231 
2232 	/*
2233 	 * Find the channel endpoint node(s) (which should be under this
2234 	 * port node) which contain the channel id(s).
2235 	 */
2236 	if ((num_nodes = md_node_count(mdp)) <= 0) {
2237 		DERR(vswp, "%s: invalid number of nodes found (%d)",
2238 		    __func__, num_nodes);
2239 		return (1);
2240 	}
2241 
2242 	D2(vswp, "%s: %d nodes found", __func__, num_nodes);
2243 
2244 	/* allocate enough space for node list */
2245 	listsz = num_nodes * sizeof (mde_cookie_t);
2246 	listp = kmem_zalloc(listsz, KM_SLEEP);
2247 
2248 	nchan = md_scan_dag(mdp, *node, md_find_name(mdp, chan_propname),
2249 	    md_find_name(mdp, "fwd"), listp);
2250 
2251 	if (nchan <= 0) {
2252 		DWARN(vswp, "%s: no %s nodes found", __func__, chan_propname);
2253 		kmem_free(listp, listsz);
2254 		return (1);
2255 	}
2256 
2257 	D2(vswp, "%s: %d %s nodes found", __func__, nchan, chan_propname);
2258 
2259 	/* use property from first node found */
2260 	if (md_get_prop_val(mdp, listp[0], id_propname, &ldc_id)) {
2261 		DWARN(vswp, "%s: prop(%s) not found\n", __func__,
2262 		    id_propname);
2263 		kmem_free(listp, listsz);
2264 		return (1);
2265 	}
2266 
2267 	/* don't need list any more */
2268 	kmem_free(listp, listsz);
2269 
2270 	D2(vswp, "%s: ldc_id 0x%llx", __func__, ldc_id);
2271 
2272 	/* read mac-address property */
2273 	if (md_get_prop_data(mdp, *node, remaddr_propname,
2274 	    &addrp, &addrsz)) {
2275 		DWARN(vswp, "%s: prop(%s) not found",
2276 		    __func__, remaddr_propname);
2277 		return (1);
2278 	}
2279 
2280 	if (addrsz < ETHERADDRL) {
2281 		DWARN(vswp, "%s: invalid address size", __func__);
2282 		return (1);
2283 	}
2284 
2285 	macaddr = *((uint64_t *)addrp);
2286 	D2(vswp, "%s: remote mac address 0x%llx", __func__, macaddr);
2287 
2288 	for (i = ETHERADDRL - 1; i >= 0; i--) {
2289 		ea.ether_addr_octet[i] = macaddr & 0xFF;
2290 		macaddr >>= 8;
2291 	}
2292 
2293 	/* now update all properties into the port */
2294 	portp->p_vswp = vswp;
2295 	portp->p_instance = inst;
2296 	portp->addr_set = VSW_ADDR_UNSET;
2297 	ether_copy(&ea, &portp->p_macaddr);
2298 	if (nchan > VSW_PORT_MAX_LDCS) {
2299 		D2(vswp, "%s: using first of %d ldc ids",
2300 		    __func__, nchan);
2301 		nchan = VSW_PORT_MAX_LDCS;
2302 	}
2303 	portp->num_ldcs = nchan;
2304 	portp->ldc_ids =
2305 	    kmem_zalloc(sizeof (uint64_t) * nchan, KM_SLEEP);
2306 	bcopy(&ldc_id, (portp->ldc_ids), sizeof (uint64_t) * nchan);
2307 
2308 	/* read vlan id properties of this port node */
2309 	vsw_vlan_read_ids(portp, VSW_VNETPORT, mdp, *node, &portp->pvid,
2310 	    &portp->vids, &portp->nvids, NULL);
2311 
2312 	/* Check if hybrid property is present */
2313 	if (md_get_prop_val(mdp, *node, hybrid_propname, &val) == 0) {
2314 		D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname);
2315 		portp->p_hio_enabled = B_TRUE;
2316 	} else {
2317 		portp->p_hio_enabled = B_FALSE;
2318 	}
2319 	/*
2320 	 * Port hio capability determined after version
2321 	 * negotiation, i.e., when we know the peer is HybridIO capable.
2322 	 */
2323 	portp->p_hio_capable = B_FALSE;
2324 	return (0);
2325 }
2326 
2327 /*
2328  * Add a new port to the system.
2329  *
2330  * Returns 0 on success, 1 on failure.
2331  */
2332 int
2333 vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node)
2334 {
2335 	vsw_port_t	*portp;
2336 	int		rv;
2337 
2338 	portp = kmem_zalloc(sizeof (vsw_port_t), KM_SLEEP);
2339 
2340 	rv = vsw_port_read_props(portp, vswp, mdp, node);
2341 	if (rv != 0) {
2342 		kmem_free(portp, sizeof (*portp));
2343 		return (1);
2344 	}
2345 
2346 	rv = vsw_port_attach(portp);
2347 	if (rv != 0) {
2348 		DERR(vswp, "%s: failed to attach port", __func__);
2349 		return (1);
2350 	}
2351 
2352 	return (0);
2353 }
2354 
2355 static int
2356 vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex,
2357 	md_t *prev_mdp, mde_cookie_t prev_mdex)
2358 {
2359 	uint64_t	cport_num;
2360 	uint64_t	pport_num;
2361 	vsw_port_list_t	*plistp;
2362 	vsw_port_t	*portp;
2363 	boolean_t	updated_vlans = B_FALSE;
2364 	uint16_t	pvid;
2365 	uint16_t	*vids;
2366 	uint16_t	nvids;
2367 	uint64_t	val;
2368 	boolean_t	hio_enabled = B_FALSE;
2369 
2370 	/*
2371 	 * For now, we get port updates only if vlan ids changed.
2372 	 * We read the port num and do some sanity check.
2373 	 */
2374 	if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) {
2375 		return (1);
2376 	}
2377 
2378 	if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) {
2379 		return (1);
2380 	}
2381 	if (cport_num != pport_num)
2382 		return (1);
2383 
2384 	plistp = &(vswp->plist);
2385 
2386 	READ_ENTER(&plistp->lockrw);
2387 
2388 	portp = vsw_lookup_port(vswp, cport_num);
2389 	if (portp == NULL) {
2390 		RW_EXIT(&plistp->lockrw);
2391 		return (1);
2392 	}
2393 
2394 	/* Read the vlan ids */
2395 	vsw_vlan_read_ids(portp, VSW_VNETPORT, curr_mdp, curr_mdex, &pvid,
2396 	    &vids, &nvids, NULL);
2397 
2398 	/* Determine if there are any vlan id updates */
2399 	if ((pvid != portp->pvid) ||		/* pvid changed? */
2400 	    (nvids != portp->nvids) ||		/* # of vids changed? */
2401 	    ((nvids != 0) && (portp->nvids != 0) &&	/* vids changed? */
2402 	    bcmp(vids, portp->vids, sizeof (uint16_t) * nvids))) {
2403 		updated_vlans = B_TRUE;
2404 	}
2405 
2406 	if (updated_vlans == B_TRUE) {
2407 
2408 		/* Remove existing vlan ids from the hash table. */
2409 		vsw_vlan_remove_ids(portp, VSW_VNETPORT);
2410 
2411 		/* save the new vlan ids */
2412 		portp->pvid = pvid;
2413 		if (portp->nvids != 0) {
2414 			kmem_free(portp->vids,
2415 			    sizeof (uint16_t) * portp->nvids);
2416 			portp->nvids = 0;
2417 		}
2418 		if (nvids != 0) {
2419 			portp->vids = kmem_zalloc(sizeof (uint16_t) *
2420 			    nvids, KM_SLEEP);
2421 			bcopy(vids, portp->vids, sizeof (uint16_t) * nvids);
2422 			portp->nvids = nvids;
2423 			kmem_free(vids, sizeof (uint16_t) * nvids);
2424 		}
2425 
2426 		/* add these new vlan ids into hash table */
2427 		vsw_vlan_add_ids(portp, VSW_VNETPORT);
2428 
2429 		/* reset the port if it is vlan unaware (ver < 1.3) */
2430 		vsw_vlan_unaware_port_reset(portp);
2431 	}
2432 
2433 	/* Check if hybrid property is present */
2434 	if (md_get_prop_val(curr_mdp, curr_mdex, hybrid_propname, &val) == 0) {
2435 		D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname);
2436 		hio_enabled = B_TRUE;
2437 	}
2438 
2439 	if (portp->p_hio_enabled != hio_enabled) {
2440 		vsw_hio_port_update(portp, hio_enabled);
2441 	}
2442 
2443 	RW_EXIT(&plistp->lockrw);
2444 
2445 	return (0);
2446 }
2447 
2448 /*
2449  * vsw_mac_rx -- A common function to send packets to the interface.
2450  * By default this function check if the interface is UP or not, the
2451  * rest of the behaviour depends on the flags as below:
2452  *
2453  *	VSW_MACRX_PROMISC -- Check if the promisc mode set or not.
2454  *	VSW_MACRX_COPYMSG -- Make a copy of the message(s).
2455  *	VSW_MACRX_FREEMSG -- Free if the messages cannot be sent up the stack.
2456  */
2457 void
2458 vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh,
2459     mblk_t *mp, vsw_macrx_flags_t flags)
2460 {
2461 	mblk_t		*mpt;
2462 
2463 	D1(vswp, "%s:enter\n", __func__);
2464 	READ_ENTER(&vswp->if_lockrw);
2465 	/* Check if the interface is up */
2466 	if (!(vswp->if_state & VSW_IF_UP)) {
2467 		RW_EXIT(&vswp->if_lockrw);
2468 		/* Free messages only if FREEMSG flag specified */
2469 		if (flags & VSW_MACRX_FREEMSG) {
2470 			freemsgchain(mp);
2471 		}
2472 		D1(vswp, "%s:exit\n", __func__);
2473 		return;
2474 	}
2475 	/*
2476 	 * If PROMISC flag is passed, then check if
2477 	 * the interface is in the PROMISC mode.
2478 	 * If not, drop the messages.
2479 	 */
2480 	if (flags & VSW_MACRX_PROMISC) {
2481 		if (!(vswp->if_state & VSW_IF_PROMISC)) {
2482 			RW_EXIT(&vswp->if_lockrw);
2483 			/* Free messages only if FREEMSG flag specified */
2484 			if (flags & VSW_MACRX_FREEMSG) {
2485 				freemsgchain(mp);
2486 			}
2487 			D1(vswp, "%s:exit\n", __func__);
2488 			return;
2489 		}
2490 	}
2491 	RW_EXIT(&vswp->if_lockrw);
2492 	/*
2493 	 * If COPYMSG flag is passed, then make a copy
2494 	 * of the message chain and send up the copy.
2495 	 */
2496 	if (flags & VSW_MACRX_COPYMSG) {
2497 		mp = copymsgchain(mp);
2498 		if (mp == NULL) {
2499 			D1(vswp, "%s:exit\n", __func__);
2500 			return;
2501 		}
2502 	}
2503 
2504 	D2(vswp, "%s: sending up stack", __func__);
2505 
2506 	mpt = NULL;
2507 	(void) vsw_vlan_frame_untag(vswp, VSW_LOCALDEV, &mp, &mpt);
2508 	if (mp != NULL) {
2509 		mac_rx(vswp->if_mh, mrh, mp);
2510 	}
2511 	D1(vswp, "%s:exit\n", __func__);
2512 }
2513 
2514 /* copy mac address of vsw into soft state structure */
2515 static void
2516 vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr)
2517 {
2518 	int	i;
2519 
2520 	WRITE_ENTER(&vswp->if_lockrw);
2521 	for (i = ETHERADDRL - 1; i >= 0; i--) {
2522 		vswp->if_addr.ether_addr_octet[i] = macaddr & 0xFF;
2523 		macaddr >>= 8;
2524 	}
2525 	RW_EXIT(&vswp->if_lockrw);
2526 }
2527