xref: /titanic_44/usr/src/uts/sun4v/io/vsw.c (revision 5087e485d482853e61c9d38d8197dee892c7f43d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/errno.h>
29 #include <sys/debug.h>
30 #include <sys/time.h>
31 #include <sys/sysmacros.h>
32 #include <sys/systm.h>
33 #include <sys/user.h>
34 #include <sys/stropts.h>
35 #include <sys/stream.h>
36 #include <sys/strlog.h>
37 #include <sys/strsubr.h>
38 #include <sys/cmn_err.h>
39 #include <sys/cpu.h>
40 #include <sys/kmem.h>
41 #include <sys/conf.h>
42 #include <sys/ddi.h>
43 #include <sys/sunddi.h>
44 #include <sys/ksynch.h>
45 #include <sys/stat.h>
46 #include <sys/kstat.h>
47 #include <sys/vtrace.h>
48 #include <sys/strsun.h>
49 #include <sys/dlpi.h>
50 #include <sys/ethernet.h>
51 #include <net/if.h>
52 #include <sys/varargs.h>
53 #include <sys/machsystm.h>
54 #include <sys/modctl.h>
55 #include <sys/modhash.h>
56 #include <sys/mac_provider.h>
57 #include <sys/mac_ether.h>
58 #include <sys/taskq.h>
59 #include <sys/note.h>
60 #include <sys/mach_descrip.h>
61 #include <sys/mac_provider.h>
62 #include <sys/mdeg.h>
63 #include <sys/ldc.h>
64 #include <sys/vsw_fdb.h>
65 #include <sys/vsw.h>
66 #include <sys/vio_mailbox.h>
67 #include <sys/vnet_mailbox.h>
68 #include <sys/vnet_common.h>
69 #include <sys/vio_util.h>
70 #include <sys/sdt.h>
71 #include <sys/atomic.h>
72 #include <sys/callb.h>
73 #include <sys/vlan.h>
74 
75 /*
76  * Function prototypes.
77  */
78 static	int vsw_attach(dev_info_t *, ddi_attach_cmd_t);
79 static	int vsw_detach(dev_info_t *, ddi_detach_cmd_t);
80 static	int vsw_get_md_physname(vsw_t *, md_t *, mde_cookie_t, char *);
81 static	int vsw_get_md_smodes(vsw_t *, md_t *, mde_cookie_t, uint8_t *);
82 
83 /* MDEG routines */
84 static	int vsw_mdeg_register(vsw_t *vswp);
85 static	void vsw_mdeg_unregister(vsw_t *vswp);
86 static	int vsw_mdeg_cb(void *cb_argp, mdeg_result_t *);
87 static	int vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *);
88 static	int vsw_get_initial_md_properties(vsw_t *vswp, md_t *, mde_cookie_t);
89 static	int vsw_read_mdprops(vsw_t *vswp);
90 static	void vsw_vlan_read_ids(void *arg, int type, md_t *mdp,
91 	mde_cookie_t node, uint16_t *pvidp, vsw_vlanid_t **vidspp,
92 	uint16_t *nvidsp, uint16_t *default_idp);
93 static	int vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp,
94 	md_t *mdp, mde_cookie_t *node);
95 static	void vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp,
96 	mde_cookie_t node);
97 static	void vsw_mtu_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node,
98 	uint32_t *mtu);
99 static	int vsw_mtu_update(vsw_t *vswp, uint32_t mtu);
100 static	void vsw_update_md_prop(vsw_t *, md_t *, mde_cookie_t);
101 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr);
102 static boolean_t vsw_cmp_vids(vsw_vlanid_t *vids1,
103 	vsw_vlanid_t *vids2, int nvids);
104 
105 /* Mac driver related routines */
106 static int vsw_mac_register(vsw_t *);
107 static int vsw_mac_unregister(vsw_t *);
108 static int vsw_m_stat(void *, uint_t, uint64_t *);
109 static void vsw_m_stop(void *arg);
110 static int vsw_m_start(void *arg);
111 static int vsw_m_unicst(void *arg, const uint8_t *);
112 static int vsw_m_multicst(void *arg, boolean_t, const uint8_t *);
113 static int vsw_m_promisc(void *arg, boolean_t);
114 static mblk_t *vsw_m_tx(void *arg, mblk_t *);
115 void vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh,
116     mblk_t *mp, vsw_macrx_flags_t flags);
117 
118 /*
119  * Functions imported from other files.
120  */
121 extern void vsw_setup_switching_thread(void *arg);
122 extern int vsw_setup_switching_start(vsw_t *vswp);
123 extern void vsw_setup_switching_stop(vsw_t *vswp);
124 extern int vsw_setup_switching(vsw_t *);
125 extern void vsw_switch_frame_nop(vsw_t *vswp, mblk_t *mp, int caller,
126     vsw_port_t *port, mac_resource_handle_t mrh);
127 extern int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *);
128 extern int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *);
129 extern void vsw_del_mcst_vsw(vsw_t *);
130 extern mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr);
131 extern int vsw_detach_ports(vsw_t *vswp);
132 extern int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node);
133 extern int vsw_port_detach(vsw_t *vswp, int p_instance);
134 static int vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex,
135 	md_t *prev_mdp, mde_cookie_t prev_mdex);
136 extern	int vsw_port_attach(vsw_port_t *port);
137 extern vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance);
138 extern int vsw_mac_open(vsw_t *vswp);
139 extern void vsw_mac_close(vsw_t *vswp);
140 extern void vsw_mac_cleanup_ports(vsw_t *vswp);
141 extern void vsw_unset_addrs(vsw_t *vswp);
142 extern void vsw_setup_layer2_post_process(vsw_t *vswp);
143 extern void vsw_create_vlans(void *arg, int type);
144 extern void vsw_destroy_vlans(void *arg, int type);
145 extern void vsw_vlan_add_ids(void *arg, int type);
146 extern void vsw_vlan_remove_ids(void *arg, int type);
147 extern void vsw_vlan_unaware_port_reset(vsw_port_t *portp);
148 extern uint32_t vsw_vlan_frame_untag(void *arg, int type, mblk_t **np,
149 	mblk_t **npt);
150 extern mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp);
151 extern void vsw_hio_cleanup(vsw_t *vswp);
152 extern void vsw_hio_start_ports(vsw_t *vswp);
153 extern void vsw_hio_port_update(vsw_port_t *portp, boolean_t hio_enabled);
154 extern int vsw_mac_multicast_add(vsw_t *, vsw_port_t *, mcst_addr_t *, int);
155 extern void vsw_mac_multicast_remove(vsw_t *, vsw_port_t *, mcst_addr_t *, int);
156 extern void vsw_mac_port_reconfig_vlans(vsw_port_t *portp, uint16_t new_pvid,
157     vsw_vlanid_t *new_vids, int new_nvids);
158 extern int vsw_mac_client_init(vsw_t *vswp, vsw_port_t *port, int type);
159 extern void vsw_mac_client_cleanup(vsw_t *vswp, vsw_port_t *port, int type);
160 extern void vsw_if_mac_reconfig(vsw_t *vswp, boolean_t update_vlans,
161     uint16_t new_pvid, vsw_vlanid_t *new_vids, int new_nvids);
162 extern void vsw_reset_ports(vsw_t *vswp);
163 extern void vsw_port_reset(vsw_port_t *portp);
164 void vsw_hio_port_update(vsw_port_t *portp, boolean_t hio_enabled);
165 
166 /*
167  * Internal tunables.
168  */
169 int	vsw_num_handshakes = VNET_NUM_HANDSHAKES; /* # of handshake attempts */
170 int	vsw_wretries = 100;		/* # of write attempts */
171 int	vsw_desc_delay = 0;		/* delay in us */
172 int	vsw_read_attempts = 5;		/* # of reads of descriptor */
173 int	vsw_setup_switching_delay = 3;	/* setup sw timeout interval in sec */
174 int	vsw_mac_open_retries = 300;	/* max # of mac_open() retries */
175 					/* 300*3 = 900sec(15min) of max tmout */
176 int	vsw_ldc_tx_delay = 5;		/* delay(ticks) for tx retries */
177 int	vsw_ldc_tx_retries = 10;	/* # of ldc tx retries */
178 boolean_t vsw_ldc_rxthr_enabled = B_TRUE;	/* LDC Rx thread enabled */
179 boolean_t vsw_ldc_txthr_enabled = B_TRUE;	/* LDC Tx thread enabled */
180 
181 uint32_t	vsw_fdb_nchains = 8;	/* # of chains in fdb hash table */
182 uint32_t	vsw_vlan_nchains = 4;	/* # of chains in vlan id hash table */
183 uint32_t	vsw_ethermtu = 1500;	/* mtu of the device */
184 
185 /* delay in usec to wait for all references on a fdb entry to be dropped */
186 uint32_t vsw_fdbe_refcnt_delay = 10;
187 
188 /*
189  * Default vlan id. This is only used internally when the "default-vlan-id"
190  * property is not present in the MD device node. Therefore, this should not be
191  * used as a tunable; if this value is changed, the corresponding variable
192  * should be updated to the same value in all vnets connected to this vsw.
193  */
194 uint16_t	vsw_default_vlan_id = 1;
195 
196 /*
197  * Workaround for a version handshake bug in obp's vnet.
198  * If vsw initiates version negotiation starting from the highest version,
199  * obp sends a nack and terminates version handshake. To workaround
200  * this, we do not initiate version handshake when the channel comes up.
201  * Instead, we wait for the peer to send its version info msg and go through
202  * the version protocol exchange. If we successfully negotiate a version,
203  * before sending the ack, we send our version info msg to the peer
204  * using the <major,minor> version that we are about to ack.
205  */
206 boolean_t vsw_obp_ver_proto_workaround = B_TRUE;
207 
208 /*
209  * In the absence of "priority-ether-types" property in MD, the following
210  * internal tunable can be set to specify a single priority ethertype.
211  */
212 uint64_t vsw_pri_eth_type = 0;
213 
214 /*
215  * Number of transmit priority buffers that are preallocated per device.
216  * This number is chosen to be a small value to throttle transmission
217  * of priority packets. Note: Must be a power of 2 for vio_create_mblks().
218  */
219 uint32_t vsw_pri_tx_nmblks = 64;
220 
221 /*
222  * Number of RARP packets sent to announce macaddr to the physical switch,
223  * after vsw's physical device is changed dynamically or after a guest (client
224  * vnet) is live migrated in.
225  */
226 uint32_t vsw_publish_macaddr_count = 3;
227 
228 boolean_t vsw_hio_enabled = B_TRUE;	/* Enable/disable HybridIO */
229 int vsw_hio_max_cleanup_retries = 10;	/* Max retries for HybridIO cleanp */
230 int vsw_hio_cleanup_delay = 10000;	/* 10ms */
231 
232 /* Number of transmit descriptors -  must be power of 2 */
233 uint32_t vsw_ntxds = VSW_RING_NUM_EL;
234 
235 /*
236  * Max number of mblks received in one receive operation.
237  */
238 uint32_t vsw_chain_len = (VSW_NUM_MBLKS * 0.6);
239 
240 /*
241  * Internal tunables for receive buffer pools, that is,  the size and number of
242  * mblks for each pool. At least 3 sizes must be specified if these are used.
243  * The sizes must be specified in increasing order. Non-zero value of the first
244  * size will be used as a hint to use these values instead of the algorithm
245  * that determines the sizes based on MTU.
246  */
247 uint32_t vsw_mblk_size1 = 0;
248 uint32_t vsw_mblk_size2 = 0;
249 uint32_t vsw_mblk_size3 = 0;
250 uint32_t vsw_mblk_size4 = 0;
251 uint32_t vsw_num_mblks1 = VSW_NUM_MBLKS;	/* number of mblks for pool1 */
252 uint32_t vsw_num_mblks2 = VSW_NUM_MBLKS;	/* number of mblks for pool2 */
253 uint32_t vsw_num_mblks3 = VSW_NUM_MBLKS;	/* number of mblks for pool3 */
254 uint32_t vsw_num_mblks4 = VSW_NUM_MBLKS;	/* number of mblks for pool4 */
255 
256 /*
257  * Set this to non-zero to enable additional internal receive buffer pools
258  * based on the MTU of the device for better performance at the cost of more
259  * memory consumption. This is turned off by default, to use allocb(9F) for
260  * receive buffer allocations of sizes > 2K.
261  */
262 boolean_t vsw_jumbo_rxpools = B_FALSE;
263 
264 /*
265  * vsw_max_tx_qcount is the maximum # of packets that can be queued
266  * before the tx worker thread begins processing the queue. Its value
267  * is chosen to be 4x the default length of tx descriptor ring.
268  */
269 uint32_t vsw_max_tx_qcount = 4 * VSW_RING_NUM_EL;
270 
271 /*
272  * MAC callbacks
273  */
274 static	mac_callbacks_t	vsw_m_callbacks = {
275 	0,
276 	vsw_m_stat,
277 	vsw_m_start,
278 	vsw_m_stop,
279 	vsw_m_promisc,
280 	vsw_m_multicst,
281 	vsw_m_unicst,
282 	vsw_m_tx,
283 	NULL,
284 	NULL,
285 	NULL
286 };
287 
288 static	struct	cb_ops	vsw_cb_ops = {
289 	nulldev,			/* cb_open */
290 	nulldev,			/* cb_close */
291 	nodev,				/* cb_strategy */
292 	nodev,				/* cb_print */
293 	nodev,				/* cb_dump */
294 	nodev,				/* cb_read */
295 	nodev,				/* cb_write */
296 	nodev,				/* cb_ioctl */
297 	nodev,				/* cb_devmap */
298 	nodev,				/* cb_mmap */
299 	nodev,				/* cb_segmap */
300 	nochpoll,			/* cb_chpoll */
301 	ddi_prop_op,			/* cb_prop_op */
302 	NULL,				/* cb_stream */
303 	D_MP,				/* cb_flag */
304 	CB_REV,				/* rev */
305 	nodev,				/* int (*cb_aread)() */
306 	nodev				/* int (*cb_awrite)() */
307 };
308 
309 static	struct	dev_ops	vsw_ops = {
310 	DEVO_REV,		/* devo_rev */
311 	0,			/* devo_refcnt */
312 	NULL,			/* devo_getinfo */
313 	nulldev,		/* devo_identify */
314 	nulldev,		/* devo_probe */
315 	vsw_attach,		/* devo_attach */
316 	vsw_detach,		/* devo_detach */
317 	nodev,			/* devo_reset */
318 	&vsw_cb_ops,		/* devo_cb_ops */
319 	(struct bus_ops *)NULL,	/* devo_bus_ops */
320 	ddi_power		/* devo_power */
321 };
322 
323 extern	struct	mod_ops	mod_driverops;
324 static struct modldrv vswmodldrv = {
325 	&mod_driverops,
326 	"sun4v Virtual Switch",
327 	&vsw_ops,
328 };
329 
330 #define	LDC_ENTER_LOCK(ldcp)	\
331 				mutex_enter(&((ldcp)->ldc_cblock));\
332 				mutex_enter(&((ldcp)->ldc_rxlock));\
333 				mutex_enter(&((ldcp)->ldc_txlock));
334 #define	LDC_EXIT_LOCK(ldcp)	\
335 				mutex_exit(&((ldcp)->ldc_txlock));\
336 				mutex_exit(&((ldcp)->ldc_rxlock));\
337 				mutex_exit(&((ldcp)->ldc_cblock));
338 
339 /* Driver soft state ptr  */
340 static void	*vsw_state;
341 
342 /*
343  * Linked list of "vsw_t" structures - one per instance.
344  */
345 vsw_t		*vsw_head = NULL;
346 krwlock_t	vsw_rw;
347 
348 /*
349  * Property names
350  */
351 static char vdev_propname[] = "virtual-device";
352 static char vsw_propname[] = "virtual-network-switch";
353 static char physdev_propname[] = "vsw-phys-dev";
354 static char smode_propname[] = "vsw-switch-mode";
355 static char macaddr_propname[] = "local-mac-address";
356 static char remaddr_propname[] = "remote-mac-address";
357 static char ldcids_propname[] = "ldc-ids";
358 static char chan_propname[] = "channel-endpoint";
359 static char id_propname[] = "id";
360 static char reg_propname[] = "reg";
361 static char pri_types_propname[] = "priority-ether-types";
362 static char vsw_pvid_propname[] = "port-vlan-id";
363 static char vsw_vid_propname[] = "vlan-id";
364 static char vsw_dvid_propname[] = "default-vlan-id";
365 static char port_pvid_propname[] = "remote-port-vlan-id";
366 static char port_vid_propname[] = "remote-vlan-id";
367 static char hybrid_propname[] = "hybrid";
368 static char vsw_mtu_propname[] = "mtu";
369 
370 /*
371  * Matching criteria passed to the MDEG to register interest
372  * in changes to 'virtual-device-port' nodes identified by their
373  * 'id' property.
374  */
375 static md_prop_match_t vport_prop_match[] = {
376 	{ MDET_PROP_VAL,    "id"   },
377 	{ MDET_LIST_END,    NULL    }
378 };
379 
380 static mdeg_node_match_t vport_match = { "virtual-device-port",
381 						vport_prop_match };
382 
383 /*
384  * Matching criteria passed to the MDEG to register interest
385  * in changes to 'virtual-device' nodes (i.e. vsw nodes) identified
386  * by their 'name' and 'cfg-handle' properties.
387  */
388 static md_prop_match_t vdev_prop_match[] = {
389 	{ MDET_PROP_STR,    "name"   },
390 	{ MDET_PROP_VAL,    "cfg-handle" },
391 	{ MDET_LIST_END,    NULL    }
392 };
393 
394 static mdeg_node_match_t vdev_match = { "virtual-device",
395 						vdev_prop_match };
396 
397 
398 /*
399  * Specification of an MD node passed to the MDEG to filter any
400  * 'vport' nodes that do not belong to the specified node. This
401  * template is copied for each vsw instance and filled in with
402  * the appropriate 'cfg-handle' value before being passed to the MDEG.
403  */
404 static mdeg_prop_spec_t vsw_prop_template[] = {
405 	{ MDET_PROP_STR,    "name",		vsw_propname },
406 	{ MDET_PROP_VAL,    "cfg-handle",	NULL	},
407 	{ MDET_LIST_END,    NULL,		NULL	}
408 };
409 
410 #define	VSW_SET_MDEG_PROP_INST(specp, val)	(specp)[1].ps_val = (val);
411 
412 #ifdef	DEBUG
413 /*
414  * Print debug messages - set to 0x1f to enable all msgs
415  * or 0x0 to turn all off.
416  */
417 int vswdbg = 0x0;
418 
419 /*
420  * debug levels:
421  * 0x01:	Function entry/exit tracing
422  * 0x02:	Internal function messages
423  * 0x04:	Verbose internal messages
424  * 0x08:	Warning messages
425  * 0x10:	Error messages
426  */
427 
428 void
429 vswdebug(vsw_t *vswp, const char *fmt, ...)
430 {
431 	char buf[512];
432 	va_list ap;
433 
434 	va_start(ap, fmt);
435 	(void) vsprintf(buf, fmt, ap);
436 	va_end(ap);
437 
438 	if (vswp == NULL)
439 		cmn_err(CE_CONT, "%s\n", buf);
440 	else
441 		cmn_err(CE_CONT, "vsw%d: %s\n", vswp->instance, buf);
442 }
443 
444 #endif	/* DEBUG */
445 
446 static struct modlinkage modlinkage = {
447 	MODREV_1,
448 	&vswmodldrv,
449 	NULL
450 };
451 
452 int
453 _init(void)
454 {
455 	int status;
456 
457 	rw_init(&vsw_rw, NULL, RW_DRIVER, NULL);
458 
459 	status = ddi_soft_state_init(&vsw_state, sizeof (vsw_t), 1);
460 	if (status != 0) {
461 		return (status);
462 	}
463 
464 	mac_init_ops(&vsw_ops, DRV_NAME);
465 	status = mod_install(&modlinkage);
466 	if (status != 0) {
467 		ddi_soft_state_fini(&vsw_state);
468 	}
469 	return (status);
470 }
471 
472 int
473 _fini(void)
474 {
475 	int status;
476 
477 	status = mod_remove(&modlinkage);
478 	if (status != 0)
479 		return (status);
480 	mac_fini_ops(&vsw_ops);
481 	ddi_soft_state_fini(&vsw_state);
482 
483 	rw_destroy(&vsw_rw);
484 
485 	return (status);
486 }
487 
488 int
489 _info(struct modinfo *modinfop)
490 {
491 	return (mod_info(&modlinkage, modinfop));
492 }
493 
494 static int
495 vsw_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
496 {
497 	vsw_t		*vswp;
498 	int		instance;
499 	char		hashname[MAXNAMELEN];
500 	char		qname[TASKQ_NAMELEN];
501 	enum		{ PROG_init = 0x00,
502 				PROG_locks = 0x01,
503 				PROG_readmd = 0x02,
504 				PROG_fdb = 0x04,
505 				PROG_mfdb = 0x08,
506 				PROG_taskq = 0x10,
507 				PROG_swmode = 0x20,
508 				PROG_macreg = 0x40,
509 				PROG_mdreg = 0x80}
510 			progress;
511 
512 	progress = PROG_init;
513 	int		rv;
514 
515 	switch (cmd) {
516 	case DDI_ATTACH:
517 		break;
518 	case DDI_RESUME:
519 		/* nothing to do for this non-device */
520 		return (DDI_SUCCESS);
521 	case DDI_PM_RESUME:
522 	default:
523 		return (DDI_FAILURE);
524 	}
525 
526 	instance = ddi_get_instance(dip);
527 	if (ddi_soft_state_zalloc(vsw_state, instance) != DDI_SUCCESS) {
528 		DERR(NULL, "vsw%d: ddi_soft_state_zalloc failed", instance);
529 		return (DDI_FAILURE);
530 	}
531 	vswp = ddi_get_soft_state(vsw_state, instance);
532 
533 	if (vswp == NULL) {
534 		DERR(NULL, "vsw%d: ddi_get_soft_state failed", instance);
535 		goto vsw_attach_fail;
536 	}
537 
538 	vswp->dip = dip;
539 	vswp->instance = instance;
540 	ddi_set_driver_private(dip, (caddr_t)vswp);
541 
542 	mutex_init(&vswp->mac_lock, NULL, MUTEX_DRIVER, NULL);
543 	mutex_init(&vswp->mca_lock, NULL, MUTEX_DRIVER, NULL);
544 	mutex_init(&vswp->sw_thr_lock, NULL, MUTEX_DRIVER, NULL);
545 	cv_init(&vswp->sw_thr_cv, NULL, CV_DRIVER, NULL);
546 	rw_init(&vswp->maccl_rwlock, NULL, RW_DRIVER, NULL);
547 	rw_init(&vswp->if_lockrw, NULL, RW_DRIVER, NULL);
548 	rw_init(&vswp->mfdbrw, NULL, RW_DRIVER, NULL);
549 	rw_init(&vswp->plist.lockrw, NULL, RW_DRIVER, NULL);
550 
551 	progress |= PROG_locks;
552 
553 	rv = vsw_read_mdprops(vswp);
554 	if (rv != 0)
555 		goto vsw_attach_fail;
556 
557 	progress |= PROG_readmd;
558 
559 	/* setup the unicast forwarding database  */
560 	(void) snprintf(hashname, MAXNAMELEN, "vsw_unicst_table-%d",
561 	    vswp->instance);
562 	D2(vswp, "creating unicast hash table (%s)...", hashname);
563 	vswp->fdb_nchains = vsw_fdb_nchains;
564 	vswp->fdb_hashp = mod_hash_create_ptrhash(hashname, vswp->fdb_nchains,
565 	    mod_hash_null_valdtor, sizeof (void *));
566 	vsw_create_vlans((void *)vswp, VSW_LOCALDEV);
567 	progress |= PROG_fdb;
568 
569 	/* setup the multicast fowarding database */
570 	(void) snprintf(hashname, MAXNAMELEN, "vsw_mcst_table-%d",
571 	    vswp->instance);
572 	D2(vswp, "creating multicast hash table %s)...", hashname);
573 	vswp->mfdb = mod_hash_create_ptrhash(hashname, vsw_fdb_nchains,
574 	    mod_hash_null_valdtor, sizeof (void *));
575 
576 	progress |= PROG_mfdb;
577 
578 	/*
579 	 * Create the taskq which will process all the VIO
580 	 * control messages.
581 	 */
582 	(void) snprintf(qname, TASKQ_NAMELEN, "vsw_taskq%d", vswp->instance);
583 	if ((vswp->taskq_p = ddi_taskq_create(vswp->dip, qname, 1,
584 	    TASKQ_DEFAULTPRI, 0)) == NULL) {
585 		cmn_err(CE_WARN, "!vsw%d: Unable to create task queue",
586 		    vswp->instance);
587 		goto vsw_attach_fail;
588 	}
589 
590 	progress |= PROG_taskq;
591 
592 	/* prevent auto-detaching */
593 	if (ddi_prop_update_int(DDI_DEV_T_NONE, vswp->dip,
594 	    DDI_NO_AUTODETACH, 1) != DDI_SUCCESS) {
595 		cmn_err(CE_NOTE, "!Unable to set \"%s\" property for "
596 		    "instance %u", DDI_NO_AUTODETACH, instance);
597 	}
598 
599 	/*
600 	 * The null switching function is set to avoid panic until
601 	 * switch mode is setup.
602 	 */
603 	vswp->vsw_switch_frame = vsw_switch_frame_nop;
604 
605 	/*
606 	 * Setup the required switching mode, based on the mdprops that we read
607 	 * earlier. We start a thread to do this, to avoid calling mac_open()
608 	 * directly from attach().
609 	 */
610 	rv = vsw_setup_switching_start(vswp);
611 	if (rv != 0) {
612 		goto vsw_attach_fail;
613 	}
614 
615 	progress |= PROG_swmode;
616 
617 	/* Register with mac layer as a provider */
618 	rv = vsw_mac_register(vswp);
619 	if (rv != 0)
620 		goto vsw_attach_fail;
621 
622 	progress |= PROG_macreg;
623 
624 	/*
625 	 * Now we have everything setup, register an interest in
626 	 * specific MD nodes.
627 	 *
628 	 * The callback is invoked in 2 cases, firstly if upon mdeg
629 	 * registration there are existing nodes which match our specified
630 	 * criteria, and secondly if the MD is changed (and again, there
631 	 * are nodes which we are interested in present within it. Note
632 	 * that our callback will be invoked even if our specified nodes
633 	 * have not actually changed).
634 	 *
635 	 */
636 	rv = vsw_mdeg_register(vswp);
637 	if (rv != 0)
638 		goto vsw_attach_fail;
639 
640 	progress |= PROG_mdreg;
641 
642 	WRITE_ENTER(&vsw_rw);
643 	vswp->next = vsw_head;
644 	vsw_head = vswp;
645 	RW_EXIT(&vsw_rw);
646 
647 	ddi_report_dev(vswp->dip);
648 	return (DDI_SUCCESS);
649 
650 vsw_attach_fail:
651 	DERR(NULL, "vsw_attach: failed");
652 
653 	if (progress & PROG_mdreg) {
654 		vsw_mdeg_unregister(vswp);
655 		(void) vsw_detach_ports(vswp);
656 	}
657 
658 	if (progress & PROG_macreg)
659 		(void) vsw_mac_unregister(vswp);
660 
661 	if (progress & PROG_swmode) {
662 		vsw_setup_switching_stop(vswp);
663 		vsw_hio_cleanup(vswp);
664 		mutex_enter(&vswp->mac_lock);
665 		vsw_mac_close(vswp);
666 		mutex_exit(&vswp->mac_lock);
667 	}
668 
669 	if (progress & PROG_taskq)
670 		ddi_taskq_destroy(vswp->taskq_p);
671 
672 	if (progress & PROG_mfdb)
673 		mod_hash_destroy_hash(vswp->mfdb);
674 
675 	if (progress & PROG_fdb) {
676 		vsw_destroy_vlans(vswp, VSW_LOCALDEV);
677 		mod_hash_destroy_hash(vswp->fdb_hashp);
678 	}
679 
680 	if (progress & PROG_readmd) {
681 		if (VSW_PRI_ETH_DEFINED(vswp)) {
682 			kmem_free(vswp->pri_types,
683 			    sizeof (uint16_t) * vswp->pri_num_types);
684 		}
685 		(void) vio_destroy_mblks(vswp->pri_tx_vmp);
686 	}
687 
688 	if (progress & PROG_locks) {
689 		rw_destroy(&vswp->plist.lockrw);
690 		rw_destroy(&vswp->mfdbrw);
691 		rw_destroy(&vswp->if_lockrw);
692 		rw_destroy(&vswp->maccl_rwlock);
693 		cv_destroy(&vswp->sw_thr_cv);
694 		mutex_destroy(&vswp->sw_thr_lock);
695 		mutex_destroy(&vswp->mca_lock);
696 		mutex_destroy(&vswp->mac_lock);
697 	}
698 
699 	ddi_soft_state_free(vsw_state, instance);
700 	return (DDI_FAILURE);
701 }
702 
703 static int
704 vsw_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
705 {
706 	vio_mblk_pool_t		*poolp, *npoolp;
707 	vsw_t			**vswpp, *vswp;
708 	int 			instance;
709 
710 	instance = ddi_get_instance(dip);
711 	vswp = ddi_get_soft_state(vsw_state, instance);
712 
713 	if (vswp == NULL) {
714 		return (DDI_FAILURE);
715 	}
716 
717 	switch (cmd) {
718 	case DDI_DETACH:
719 		break;
720 	case DDI_SUSPEND:
721 	case DDI_PM_SUSPEND:
722 	default:
723 		return (DDI_FAILURE);
724 	}
725 
726 	D2(vswp, "detaching instance %d", instance);
727 
728 	/* Stop any pending thread to setup switching mode. */
729 	vsw_setup_switching_stop(vswp);
730 
731 	/* Cleanup the interface's mac client */
732 	vsw_mac_client_cleanup(vswp, NULL, VSW_LOCALDEV);
733 
734 	if (vswp->if_state & VSW_IF_REG) {
735 		if (vsw_mac_unregister(vswp) != 0) {
736 			cmn_err(CE_WARN, "!vsw%d: Unable to detach from "
737 			    "MAC layer", vswp->instance);
738 			return (DDI_FAILURE);
739 		}
740 	}
741 
742 	vsw_mdeg_unregister(vswp);
743 
744 	/* cleanup HybridIO */
745 	vsw_hio_cleanup(vswp);
746 
747 	if (vsw_detach_ports(vswp) != 0) {
748 		cmn_err(CE_WARN, "!vsw%d: Unable to unconfigure ports",
749 		    vswp->instance);
750 		return (DDI_FAILURE);
751 	}
752 
753 	rw_destroy(&vswp->if_lockrw);
754 
755 	vsw_mac_cleanup_ports(vswp);
756 
757 	/*
758 	 * Now that the ports have been deleted, stop and close
759 	 * the physical device.
760 	 */
761 	mutex_enter(&vswp->mac_lock);
762 	vsw_mac_close(vswp);
763 	mutex_exit(&vswp->mac_lock);
764 
765 	mutex_destroy(&vswp->mac_lock);
766 	cv_destroy(&vswp->sw_thr_cv);
767 	mutex_destroy(&vswp->sw_thr_lock);
768 	rw_destroy(&vswp->maccl_rwlock);
769 
770 	/*
771 	 * Destroy any free pools that may still exist.
772 	 */
773 	poolp = vswp->rxh;
774 	while (poolp != NULL) {
775 		npoolp = vswp->rxh = poolp->nextp;
776 		if (vio_destroy_mblks(poolp) != 0) {
777 			vswp->rxh = poolp;
778 			return (DDI_FAILURE);
779 		}
780 		poolp = npoolp;
781 	}
782 
783 	/*
784 	 * Remove this instance from any entries it may be on in
785 	 * the hash table by using the list of addresses maintained
786 	 * in the vsw_t structure.
787 	 */
788 	vsw_del_mcst_vsw(vswp);
789 
790 	vswp->mcap = NULL;
791 	mutex_destroy(&vswp->mca_lock);
792 
793 	/*
794 	 * By now any pending tasks have finished and the underlying
795 	 * ldc's have been destroyed, so its safe to delete the control
796 	 * message taskq.
797 	 */
798 	if (vswp->taskq_p != NULL)
799 		ddi_taskq_destroy(vswp->taskq_p);
800 
801 	/*
802 	 * At this stage all the data pointers in the hash table
803 	 * should be NULL, as all the ports have been removed and will
804 	 * have deleted themselves from the port lists which the data
805 	 * pointers point to. Hence we can destroy the table using the
806 	 * default destructors.
807 	 */
808 	D2(vswp, "vsw_detach: destroying hash tables..");
809 	vsw_destroy_vlans(vswp, VSW_LOCALDEV);
810 	mod_hash_destroy_hash(vswp->fdb_hashp);
811 	vswp->fdb_hashp = NULL;
812 
813 	WRITE_ENTER(&vswp->mfdbrw);
814 	mod_hash_destroy_hash(vswp->mfdb);
815 	vswp->mfdb = NULL;
816 	RW_EXIT(&vswp->mfdbrw);
817 	rw_destroy(&vswp->mfdbrw);
818 
819 	/* free pri_types table */
820 	if (VSW_PRI_ETH_DEFINED(vswp)) {
821 		kmem_free(vswp->pri_types,
822 		    sizeof (uint16_t) * vswp->pri_num_types);
823 		(void) vio_destroy_mblks(vswp->pri_tx_vmp);
824 	}
825 
826 	ddi_remove_minor_node(dip, NULL);
827 
828 	rw_destroy(&vswp->plist.lockrw);
829 	WRITE_ENTER(&vsw_rw);
830 	for (vswpp = &vsw_head; *vswpp; vswpp = &(*vswpp)->next) {
831 		if (*vswpp == vswp) {
832 			*vswpp = vswp->next;
833 			break;
834 		}
835 	}
836 	RW_EXIT(&vsw_rw);
837 	ddi_soft_state_free(vsw_state, instance);
838 
839 	return (DDI_SUCCESS);
840 }
841 
842 /*
843  * Get the value of the "vsw-phys-dev" property in the specified
844  * node. This property is the name of the physical device that
845  * the virtual switch will use to talk to the outside world.
846  *
847  * Note it is valid for this property to be NULL (but the property
848  * itself must exist). Callers of this routine should verify that
849  * the value returned is what they expected (i.e. either NULL or non NULL).
850  *
851  * On success returns value of the property in region pointed to by
852  * the 'name' argument, and with return value of 0. Otherwise returns 1.
853  */
854 static int
855 vsw_get_md_physname(vsw_t *vswp, md_t *mdp, mde_cookie_t node, char *name)
856 {
857 	int		len = 0;
858 	int		instance;
859 	char		*physname = NULL;
860 	char		*dev;
861 	const char	*dev_name;
862 	char		myname[MAXNAMELEN];
863 
864 	dev_name = ddi_driver_name(vswp->dip);
865 	instance = ddi_get_instance(vswp->dip);
866 	(void) snprintf(myname, MAXNAMELEN, "%s%d", dev_name, instance);
867 
868 	if (md_get_prop_data(mdp, node, physdev_propname,
869 	    (uint8_t **)(&physname), &len) != 0) {
870 		cmn_err(CE_WARN, "!vsw%d: Unable to get name(s) of physical "
871 		    "device(s) from MD", vswp->instance);
872 		return (1);
873 	} else if ((strlen(physname) + 1) > LIFNAMSIZ) {
874 		cmn_err(CE_WARN, "!vsw%d: %s is too long a device name",
875 		    vswp->instance, physname);
876 		return (1);
877 	} else if (strcmp(myname, physname) == 0) {
878 		/*
879 		 * Prevent the vswitch from opening itself as the
880 		 * network device.
881 		 */
882 		cmn_err(CE_WARN, "!vsw%d: %s is an invalid device name",
883 		    vswp->instance, physname);
884 		return (1);
885 	} else {
886 		(void) strncpy(name, physname, strlen(physname) + 1);
887 		D2(vswp, "%s: using first device specified (%s)",
888 		    __func__, physname);
889 	}
890 
891 #ifdef DEBUG
892 	/*
893 	 * As a temporary measure to aid testing we check to see if there
894 	 * is a vsw.conf file present. If there is we use the value of the
895 	 * vsw_physname property in the file as the name of the physical
896 	 * device, overriding the value from the MD.
897 	 *
898 	 * There may be multiple devices listed, but for the moment
899 	 * we just use the first one.
900 	 */
901 	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vswp->dip, 0,
902 	    "vsw_physname", &dev) == DDI_PROP_SUCCESS) {
903 		if ((strlen(dev) + 1) > LIFNAMSIZ) {
904 			cmn_err(CE_WARN, "vsw%d: %s is too long a device name",
905 			    vswp->instance, dev);
906 			ddi_prop_free(dev);
907 			return (1);
908 		} else {
909 			cmn_err(CE_NOTE, "vsw%d: Using device name (%s) from "
910 			    "config file", vswp->instance, dev);
911 
912 			(void) strncpy(name, dev, strlen(dev) + 1);
913 		}
914 
915 		ddi_prop_free(dev);
916 	}
917 #endif
918 
919 	return (0);
920 }
921 
922 /*
923  * Read the 'vsw-switch-mode' property from the specified MD node.
924  *
925  * Returns 0 on success, otherwise returns 1.
926  */
927 static int
928 vsw_get_md_smodes(vsw_t *vswp, md_t *mdp, mde_cookie_t node, uint8_t *mode)
929 {
930 	int		len = 0;
931 	char		*smode = NULL;
932 	char		*curr_mode = NULL;
933 
934 	D1(vswp, "%s: enter", __func__);
935 
936 	/*
937 	 * Get the switch-mode property. The modes are listed in
938 	 * decreasing order of preference, i.e. prefered mode is
939 	 * first item in list.
940 	 */
941 	len = 0;
942 	if (md_get_prop_data(mdp, node, smode_propname,
943 	    (uint8_t **)(&smode), &len) != 0) {
944 		/*
945 		 * Unable to get switch-mode property from MD, nothing
946 		 * more we can do.
947 		 */
948 		cmn_err(CE_WARN, "!vsw%d: Unable to get switch mode property"
949 		    " from the MD", vswp->instance);
950 		return (1);
951 	}
952 
953 	curr_mode = smode;
954 	/*
955 	 * Modes of operation:
956 	 * 'switched'	 - layer 2 switching, underlying HW in
957 	 *			programmed mode.
958 	 * 'promiscuous' - layer 2 switching, underlying HW in
959 	 *			promiscuous mode.
960 	 * 'routed'	 - layer 3 (i.e. IP) routing, underlying HW
961 	 *			in non-promiscuous mode.
962 	 */
963 	while (curr_mode < (smode + len)) {
964 		D2(vswp, "%s: curr_mode = [%s]", __func__, curr_mode);
965 		if (strcmp(curr_mode, "switched") == 0) {
966 			*mode = VSW_LAYER2;
967 		} else if (strcmp(curr_mode, "promiscuous") == 0) {
968 			*mode = VSW_LAYER2 | VSW_LAYER2_PROMISC;
969 		} else if (strcmp(curr_mode, "routed") == 0) {
970 			*mode = VSW_LAYER3;
971 		} else {
972 			cmn_err(CE_WARN, "!vsw%d: Unknown switch mode %s, "
973 			    "setting to default switched mode",
974 			    vswp->instance, curr_mode);
975 			*mode = VSW_LAYER2;
976 		}
977 		curr_mode += strlen(curr_mode) + 1;
978 	}
979 
980 	D2(vswp, "%s: %d mode", __func__, *mode);
981 
982 	D1(vswp, "%s: exit", __func__);
983 
984 	return (0);
985 }
986 
987 /*
988  * Register with the MAC layer as a network device, so we
989  * can be plumbed if necessary.
990  */
991 static int
992 vsw_mac_register(vsw_t *vswp)
993 {
994 	mac_register_t	*macp;
995 	int		rv;
996 
997 	D1(vswp, "%s: enter", __func__);
998 
999 	if ((macp = mac_alloc(MAC_VERSION)) == NULL)
1000 		return (EINVAL);
1001 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1002 	macp->m_driver = vswp;
1003 	macp->m_dip = vswp->dip;
1004 	macp->m_src_addr = (uint8_t *)&vswp->if_addr;
1005 	macp->m_callbacks = &vsw_m_callbacks;
1006 	macp->m_min_sdu = 0;
1007 	macp->m_max_sdu = vswp->mtu;
1008 	macp->m_margin = VLAN_TAGSZ;
1009 	rv = mac_register(macp, &vswp->if_mh);
1010 	mac_free(macp);
1011 	if (rv != 0) {
1012 		/*
1013 		 * Treat this as a non-fatal error as we may be
1014 		 * able to operate in some other mode.
1015 		 */
1016 		cmn_err(CE_NOTE, "!vsw%d: Unable to register as "
1017 		    "a provider with MAC layer", vswp->instance);
1018 		return (rv);
1019 	}
1020 
1021 	vswp->if_state |= VSW_IF_REG;
1022 
1023 	D1(vswp, "%s: exit", __func__);
1024 
1025 	return (rv);
1026 }
1027 
1028 static int
1029 vsw_mac_unregister(vsw_t *vswp)
1030 {
1031 	int		rv = 0;
1032 
1033 	D1(vswp, "%s: enter", __func__);
1034 
1035 	WRITE_ENTER(&vswp->if_lockrw);
1036 
1037 	if (vswp->if_state & VSW_IF_REG) {
1038 		rv = mac_unregister(vswp->if_mh);
1039 		if (rv != 0) {
1040 			DWARN(vswp, "%s: unable to unregister from MAC "
1041 			    "framework", __func__);
1042 
1043 			RW_EXIT(&vswp->if_lockrw);
1044 			D1(vswp, "%s: fail exit", __func__);
1045 			return (rv);
1046 		}
1047 
1048 		/* mark i/f as down and unregistered */
1049 		vswp->if_state &= ~(VSW_IF_UP | VSW_IF_REG);
1050 	}
1051 	RW_EXIT(&vswp->if_lockrw);
1052 
1053 	D1(vswp, "%s: exit", __func__);
1054 
1055 	return (rv);
1056 }
1057 
1058 static int
1059 vsw_m_stat(void *arg, uint_t stat, uint64_t *val)
1060 {
1061 	vsw_t			*vswp = (vsw_t *)arg;
1062 
1063 	D1(vswp, "%s: enter", __func__);
1064 
1065 	mutex_enter(&vswp->mac_lock);
1066 	if (vswp->mh == NULL) {
1067 		mutex_exit(&vswp->mac_lock);
1068 		return (EINVAL);
1069 	}
1070 
1071 	/* return stats from underlying device */
1072 	*val = mac_stat_get(vswp->mh, stat);
1073 
1074 	mutex_exit(&vswp->mac_lock);
1075 
1076 	return (0);
1077 }
1078 
1079 static void
1080 vsw_m_stop(void *arg)
1081 {
1082 	vsw_t	*vswp = (vsw_t *)arg;
1083 
1084 	D1(vswp, "%s: enter", __func__);
1085 
1086 	WRITE_ENTER(&vswp->if_lockrw);
1087 	vswp->if_state &= ~VSW_IF_UP;
1088 	RW_EXIT(&vswp->if_lockrw);
1089 
1090 	/* Cleanup and close the mac client */
1091 	vsw_mac_client_cleanup(vswp, NULL, VSW_LOCALDEV);
1092 
1093 	D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state);
1094 }
1095 
1096 static int
1097 vsw_m_start(void *arg)
1098 {
1099 	int		rv;
1100 	vsw_t		*vswp = (vsw_t *)arg;
1101 
1102 	D1(vswp, "%s: enter", __func__);
1103 
1104 	WRITE_ENTER(&vswp->if_lockrw);
1105 
1106 	vswp->if_state |= VSW_IF_UP;
1107 
1108 	if (vswp->switching_setup_done == B_FALSE) {
1109 		/*
1110 		 * If the switching mode has not been setup yet, just
1111 		 * return. The unicast address will be programmed
1112 		 * after the physical device is successfully setup by the
1113 		 * timeout handler.
1114 		 */
1115 		RW_EXIT(&vswp->if_lockrw);
1116 		return (0);
1117 	}
1118 
1119 	/* if in layer2 mode, program unicast address. */
1120 	if (vswp->mh != NULL) {
1121 		/* Init a mac client and program addresses */
1122 		rv = vsw_mac_client_init(vswp, NULL, VSW_LOCALDEV);
1123 		if (rv != 0) {
1124 			cmn_err(CE_NOTE,
1125 			    "!vsw%d: failed to program interface "
1126 			    "unicast address\n", vswp->instance);
1127 		}
1128 	}
1129 
1130 	RW_EXIT(&vswp->if_lockrw);
1131 
1132 	D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state);
1133 	return (0);
1134 }
1135 
1136 /*
1137  * Change the local interface address.
1138  *
1139  * Note: we don't support this entry point. The local
1140  * mac address of the switch can only be changed via its
1141  * MD node properties.
1142  */
1143 static int
1144 vsw_m_unicst(void *arg, const uint8_t *macaddr)
1145 {
1146 	_NOTE(ARGUNUSED(arg, macaddr))
1147 
1148 	return (DDI_FAILURE);
1149 }
1150 
1151 static int
1152 vsw_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
1153 {
1154 	vsw_t		*vswp = (vsw_t *)arg;
1155 	mcst_addr_t	*mcst_p = NULL;
1156 	uint64_t	addr = 0x0;
1157 	int		i, ret = 0;
1158 
1159 	D1(vswp, "%s: enter", __func__);
1160 
1161 	/*
1162 	 * Convert address into form that can be used
1163 	 * as hash table key.
1164 	 */
1165 	for (i = 0; i < ETHERADDRL; i++) {
1166 		addr = (addr << 8) | mca[i];
1167 	}
1168 
1169 	D2(vswp, "%s: addr = 0x%llx", __func__, addr);
1170 
1171 	if (add) {
1172 		D2(vswp, "%s: adding multicast", __func__);
1173 		if (vsw_add_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) {
1174 			/*
1175 			 * Update the list of multicast addresses
1176 			 * contained within the vsw_t structure to
1177 			 * include this new one.
1178 			 */
1179 			mcst_p = kmem_zalloc(sizeof (mcst_addr_t), KM_NOSLEEP);
1180 			if (mcst_p == NULL) {
1181 				DERR(vswp, "%s unable to alloc mem", __func__);
1182 				(void) vsw_del_mcst(vswp,
1183 				    VSW_LOCALDEV, addr, NULL);
1184 				return (1);
1185 			}
1186 			mcst_p->addr = addr;
1187 			ether_copy(mca, &mcst_p->mca);
1188 
1189 			/*
1190 			 * Call into the underlying driver to program the
1191 			 * address into HW.
1192 			 */
1193 			ret = vsw_mac_multicast_add(vswp, NULL, mcst_p,
1194 			    VSW_LOCALDEV);
1195 			if (ret != 0) {
1196 				(void) vsw_del_mcst(vswp,
1197 				    VSW_LOCALDEV, addr, NULL);
1198 				kmem_free(mcst_p, sizeof (*mcst_p));
1199 				return (ret);
1200 			}
1201 
1202 			mutex_enter(&vswp->mca_lock);
1203 			mcst_p->nextp = vswp->mcap;
1204 			vswp->mcap = mcst_p;
1205 			mutex_exit(&vswp->mca_lock);
1206 		} else {
1207 			cmn_err(CE_WARN, "!vsw%d: unable to add multicast "
1208 			    "address", vswp->instance);
1209 		}
1210 		return (ret);
1211 	}
1212 
1213 	D2(vswp, "%s: removing multicast", __func__);
1214 	/*
1215 	 * Remove the address from the hash table..
1216 	 */
1217 	if (vsw_del_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) {
1218 
1219 		/*
1220 		 * ..and then from the list maintained in the
1221 		 * vsw_t structure.
1222 		 */
1223 		mcst_p = vsw_del_addr(VSW_LOCALDEV, vswp, addr);
1224 		ASSERT(mcst_p != NULL);
1225 
1226 		vsw_mac_multicast_remove(vswp, NULL, mcst_p, VSW_LOCALDEV);
1227 		kmem_free(mcst_p, sizeof (*mcst_p));
1228 	}
1229 
1230 	D1(vswp, "%s: exit", __func__);
1231 
1232 	return (0);
1233 }
1234 
1235 static int
1236 vsw_m_promisc(void *arg, boolean_t on)
1237 {
1238 	vsw_t		*vswp = (vsw_t *)arg;
1239 
1240 	D1(vswp, "%s: enter", __func__);
1241 
1242 	WRITE_ENTER(&vswp->if_lockrw);
1243 	if (on)
1244 		vswp->if_state |= VSW_IF_PROMISC;
1245 	else
1246 		vswp->if_state &= ~VSW_IF_PROMISC;
1247 	RW_EXIT(&vswp->if_lockrw);
1248 
1249 	D1(vswp, "%s: exit", __func__);
1250 
1251 	return (0);
1252 }
1253 
1254 static mblk_t *
1255 vsw_m_tx(void *arg, mblk_t *mp)
1256 {
1257 	vsw_t		*vswp = (vsw_t *)arg;
1258 
1259 	D1(vswp, "%s: enter", __func__);
1260 
1261 	mp = vsw_vlan_frame_pretag(vswp, VSW_LOCALDEV, mp);
1262 
1263 	if (mp == NULL) {
1264 		return (NULL);
1265 	}
1266 
1267 	vswp->vsw_switch_frame(vswp, mp, VSW_LOCALDEV, NULL, NULL);
1268 
1269 	D1(vswp, "%s: exit", __func__);
1270 
1271 	return (NULL);
1272 }
1273 
1274 /*
1275  * Register for machine description (MD) updates.
1276  *
1277  * Returns 0 on success, 1 on failure.
1278  */
1279 static int
1280 vsw_mdeg_register(vsw_t *vswp)
1281 {
1282 	mdeg_prop_spec_t	*pspecp;
1283 	mdeg_node_spec_t	*inst_specp;
1284 	mdeg_handle_t		mdeg_hdl, mdeg_port_hdl;
1285 	size_t			templatesz;
1286 	int			rv;
1287 
1288 	D1(vswp, "%s: enter", __func__);
1289 
1290 	/*
1291 	 * Allocate and initialize a per-instance copy
1292 	 * of the global property spec array that will
1293 	 * uniquely identify this vsw instance.
1294 	 */
1295 	templatesz = sizeof (vsw_prop_template);
1296 	pspecp = kmem_zalloc(templatesz, KM_SLEEP);
1297 
1298 	bcopy(vsw_prop_template, pspecp, templatesz);
1299 
1300 	VSW_SET_MDEG_PROP_INST(pspecp, vswp->regprop);
1301 
1302 	/* initialize the complete prop spec structure */
1303 	inst_specp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP);
1304 	inst_specp->namep = "virtual-device";
1305 	inst_specp->specp = pspecp;
1306 
1307 	D2(vswp, "%s: instance %d registering with mdeg", __func__,
1308 	    vswp->regprop);
1309 	/*
1310 	 * Register an interest in 'virtual-device' nodes with a
1311 	 * 'name' property of 'virtual-network-switch'
1312 	 */
1313 	rv = mdeg_register(inst_specp, &vdev_match, vsw_mdeg_cb,
1314 	    (void *)vswp, &mdeg_hdl);
1315 	if (rv != MDEG_SUCCESS) {
1316 		DERR(vswp, "%s: mdeg_register failed (%d) for vsw node",
1317 		    __func__, rv);
1318 		goto mdeg_reg_fail;
1319 	}
1320 
1321 	/*
1322 	 * Register an interest in 'vsw-port' nodes.
1323 	 */
1324 	rv = mdeg_register(inst_specp, &vport_match, vsw_port_mdeg_cb,
1325 	    (void *)vswp, &mdeg_port_hdl);
1326 	if (rv != MDEG_SUCCESS) {
1327 		DERR(vswp, "%s: mdeg_register failed (%d)\n", __func__, rv);
1328 		(void) mdeg_unregister(mdeg_hdl);
1329 		goto mdeg_reg_fail;
1330 	}
1331 
1332 	/* save off data that will be needed later */
1333 	vswp->inst_spec = inst_specp;
1334 	vswp->mdeg_hdl = mdeg_hdl;
1335 	vswp->mdeg_port_hdl = mdeg_port_hdl;
1336 
1337 	D1(vswp, "%s: exit", __func__);
1338 	return (0);
1339 
1340 mdeg_reg_fail:
1341 	cmn_err(CE_WARN, "!vsw%d: Unable to register MDEG callbacks",
1342 	    vswp->instance);
1343 	kmem_free(pspecp, templatesz);
1344 	kmem_free(inst_specp, sizeof (mdeg_node_spec_t));
1345 
1346 	vswp->mdeg_hdl = NULL;
1347 	vswp->mdeg_port_hdl = NULL;
1348 
1349 	return (1);
1350 }
1351 
1352 static void
1353 vsw_mdeg_unregister(vsw_t *vswp)
1354 {
1355 	D1(vswp, "vsw_mdeg_unregister: enter");
1356 
1357 	if (vswp->mdeg_hdl != NULL)
1358 		(void) mdeg_unregister(vswp->mdeg_hdl);
1359 
1360 	if (vswp->mdeg_port_hdl != NULL)
1361 		(void) mdeg_unregister(vswp->mdeg_port_hdl);
1362 
1363 	if (vswp->inst_spec != NULL) {
1364 		if (vswp->inst_spec->specp != NULL) {
1365 			(void) kmem_free(vswp->inst_spec->specp,
1366 			    sizeof (vsw_prop_template));
1367 			vswp->inst_spec->specp = NULL;
1368 		}
1369 
1370 		(void) kmem_free(vswp->inst_spec, sizeof (mdeg_node_spec_t));
1371 		vswp->inst_spec = NULL;
1372 	}
1373 
1374 	D1(vswp, "vsw_mdeg_unregister: exit");
1375 }
1376 
1377 /*
1378  * Mdeg callback invoked for the vsw node itself.
1379  */
1380 static int
1381 vsw_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1382 {
1383 	vsw_t		*vswp;
1384 	md_t		*mdp;
1385 	mde_cookie_t	node;
1386 	uint64_t	inst;
1387 	char		*node_name = NULL;
1388 
1389 	if (resp == NULL)
1390 		return (MDEG_FAILURE);
1391 
1392 	vswp = (vsw_t *)cb_argp;
1393 
1394 	D1(vswp, "%s: added %d : removed %d : curr matched %d"
1395 	    " : prev matched %d", __func__, resp->added.nelem,
1396 	    resp->removed.nelem, resp->match_curr.nelem,
1397 	    resp->match_prev.nelem);
1398 
1399 	/*
1400 	 * We get an initial callback for this node as 'added'
1401 	 * after registering with mdeg. Note that we would have
1402 	 * already gathered information about this vsw node by
1403 	 * walking MD earlier during attach (in vsw_read_mdprops()).
1404 	 * So, there is a window where the properties of this
1405 	 * node might have changed when we get this initial 'added'
1406 	 * callback. We handle this as if an update occured
1407 	 * and invoke the same function which handles updates to
1408 	 * the properties of this vsw-node if any.
1409 	 *
1410 	 * A non-zero 'match' value indicates that the MD has been
1411 	 * updated and that a virtual-network-switch node is
1412 	 * present which may or may not have been updated. It is
1413 	 * up to the clients to examine their own nodes and
1414 	 * determine if they have changed.
1415 	 */
1416 	if (resp->added.nelem != 0) {
1417 
1418 		if (resp->added.nelem != 1) {
1419 			cmn_err(CE_NOTE, "!vsw%d: number of nodes added "
1420 			    "invalid: %d\n", vswp->instance, resp->added.nelem);
1421 			return (MDEG_FAILURE);
1422 		}
1423 
1424 		mdp = resp->added.mdp;
1425 		node = resp->added.mdep[0];
1426 
1427 	} else if (resp->match_curr.nelem != 0) {
1428 
1429 		if (resp->match_curr.nelem != 1) {
1430 			cmn_err(CE_NOTE, "!vsw%d: number of nodes updated "
1431 			    "invalid: %d\n", vswp->instance,
1432 			    resp->match_curr.nelem);
1433 			return (MDEG_FAILURE);
1434 		}
1435 
1436 		mdp = resp->match_curr.mdp;
1437 		node = resp->match_curr.mdep[0];
1438 
1439 	} else {
1440 		return (MDEG_FAILURE);
1441 	}
1442 
1443 	/* Validate name and instance */
1444 	if (md_get_prop_str(mdp, node, "name", &node_name) != 0) {
1445 		DERR(vswp, "%s: unable to get node name\n",  __func__);
1446 		return (MDEG_FAILURE);
1447 	}
1448 
1449 	/* is this a virtual-network-switch? */
1450 	if (strcmp(node_name, vsw_propname) != 0) {
1451 		DERR(vswp, "%s: Invalid node name: %s\n",
1452 		    __func__, node_name);
1453 		return (MDEG_FAILURE);
1454 	}
1455 
1456 	if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) {
1457 		DERR(vswp, "%s: prop(cfg-handle) not found\n",
1458 		    __func__);
1459 		return (MDEG_FAILURE);
1460 	}
1461 
1462 	/* is this the right instance of vsw? */
1463 	if (inst != vswp->regprop) {
1464 		DERR(vswp, "%s: Invalid cfg-handle: %lx\n",
1465 		    __func__, inst);
1466 		return (MDEG_FAILURE);
1467 	}
1468 
1469 	vsw_update_md_prop(vswp, mdp, node);
1470 
1471 	return (MDEG_SUCCESS);
1472 }
1473 
1474 /*
1475  * Mdeg callback invoked for changes to the vsw-port nodes
1476  * under the vsw node.
1477  */
1478 static int
1479 vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1480 {
1481 	vsw_t		*vswp;
1482 	int		idx;
1483 	md_t		*mdp;
1484 	mde_cookie_t	node;
1485 	uint64_t	inst;
1486 	int		rv;
1487 
1488 	if ((resp == NULL) || (cb_argp == NULL))
1489 		return (MDEG_FAILURE);
1490 
1491 	vswp = (vsw_t *)cb_argp;
1492 
1493 	D2(vswp, "%s: added %d : removed %d : curr matched %d"
1494 	    " : prev matched %d", __func__, resp->added.nelem,
1495 	    resp->removed.nelem, resp->match_curr.nelem,
1496 	    resp->match_prev.nelem);
1497 
1498 	/* process added ports */
1499 	for (idx = 0; idx < resp->added.nelem; idx++) {
1500 		mdp = resp->added.mdp;
1501 		node = resp->added.mdep[idx];
1502 
1503 		D2(vswp, "%s: adding node(%d) 0x%lx", __func__, idx, node);
1504 
1505 		if ((rv = vsw_port_add(vswp, mdp, &node)) != 0) {
1506 			cmn_err(CE_WARN, "!vsw%d: Unable to add new port "
1507 			    "(0x%lx), err=%d", vswp->instance, node, rv);
1508 		}
1509 	}
1510 
1511 	/* process removed ports */
1512 	for (idx = 0; idx < resp->removed.nelem; idx++) {
1513 		mdp = resp->removed.mdp;
1514 		node = resp->removed.mdep[idx];
1515 
1516 		if (md_get_prop_val(mdp, node, id_propname, &inst)) {
1517 			DERR(vswp, "%s: prop(%s) not found in port(%d)",
1518 			    __func__, id_propname, idx);
1519 			continue;
1520 		}
1521 
1522 		D2(vswp, "%s: removing node(%d) 0x%lx", __func__, idx, node);
1523 
1524 		if (vsw_port_detach(vswp, inst) != 0) {
1525 			cmn_err(CE_WARN, "!vsw%d: Unable to remove port %ld",
1526 			    vswp->instance, inst);
1527 		}
1528 	}
1529 
1530 	for (idx = 0; idx < resp->match_curr.nelem; idx++) {
1531 		(void) vsw_port_update(vswp, resp->match_curr.mdp,
1532 		    resp->match_curr.mdep[idx],
1533 		    resp->match_prev.mdp,
1534 		    resp->match_prev.mdep[idx]);
1535 	}
1536 
1537 	D1(vswp, "%s: exit", __func__);
1538 
1539 	return (MDEG_SUCCESS);
1540 }
1541 
1542 /*
1543  * Scan the machine description for this instance of vsw
1544  * and read its properties. Called only from vsw_attach().
1545  * Returns: 0 on success, 1 on failure.
1546  */
1547 static int
1548 vsw_read_mdprops(vsw_t *vswp)
1549 {
1550 	md_t		*mdp = NULL;
1551 	mde_cookie_t	rootnode;
1552 	mde_cookie_t	*listp = NULL;
1553 	uint64_t	inst;
1554 	uint64_t	cfgh;
1555 	char		*name;
1556 	int		rv = 1;
1557 	int		num_nodes = 0;
1558 	int		num_devs = 0;
1559 	int		listsz = 0;
1560 	int		i;
1561 
1562 	/*
1563 	 * In each 'virtual-device' node in the MD there is a
1564 	 * 'cfg-handle' property which is the MD's concept of
1565 	 * an instance number (this may be completely different from
1566 	 * the device drivers instance #). OBP reads that value and
1567 	 * stores it in the 'reg' property of the appropriate node in
1568 	 * the device tree. We first read this reg property and use this
1569 	 * to compare against the 'cfg-handle' property of vsw nodes
1570 	 * in MD to get to this specific vsw instance and then read
1571 	 * other properties that we are interested in.
1572 	 * We also cache the value of 'reg' property and use it later
1573 	 * to register callbacks with mdeg (see vsw_mdeg_register())
1574 	 */
1575 	inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip,
1576 	    DDI_PROP_DONTPASS, reg_propname, -1);
1577 	if (inst == -1) {
1578 		cmn_err(CE_NOTE, "!vsw%d: Unable to read %s property from "
1579 		    "OBP device tree", vswp->instance, reg_propname);
1580 		return (rv);
1581 	}
1582 
1583 	vswp->regprop = inst;
1584 
1585 	if ((mdp = md_get_handle()) == NULL) {
1586 		DWARN(vswp, "%s: cannot init MD\n", __func__);
1587 		return (rv);
1588 	}
1589 
1590 	num_nodes = md_node_count(mdp);
1591 	ASSERT(num_nodes > 0);
1592 
1593 	listsz = num_nodes * sizeof (mde_cookie_t);
1594 	listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP);
1595 
1596 	rootnode = md_root_node(mdp);
1597 
1598 	/* search for all "virtual_device" nodes */
1599 	num_devs = md_scan_dag(mdp, rootnode,
1600 	    md_find_name(mdp, vdev_propname),
1601 	    md_find_name(mdp, "fwd"), listp);
1602 	if (num_devs <= 0) {
1603 		DWARN(vswp, "%s: invalid num_devs:%d\n", __func__, num_devs);
1604 		goto vsw_readmd_exit;
1605 	}
1606 
1607 	/*
1608 	 * Now loop through the list of virtual-devices looking for
1609 	 * devices with name "virtual-network-switch" and for each
1610 	 * such device compare its instance with what we have from
1611 	 * the 'reg' property to find the right node in MD and then
1612 	 * read all its properties.
1613 	 */
1614 	for (i = 0; i < num_devs; i++) {
1615 
1616 		if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) {
1617 			DWARN(vswp, "%s: name property not found\n",
1618 			    __func__);
1619 			goto vsw_readmd_exit;
1620 		}
1621 
1622 		/* is this a virtual-network-switch? */
1623 		if (strcmp(name, vsw_propname) != 0)
1624 			continue;
1625 
1626 		if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) {
1627 			DWARN(vswp, "%s: cfg-handle property not found\n",
1628 			    __func__);
1629 			goto vsw_readmd_exit;
1630 		}
1631 
1632 		/* is this the required instance of vsw? */
1633 		if (inst != cfgh)
1634 			continue;
1635 
1636 		/* now read all properties of this vsw instance */
1637 		rv = vsw_get_initial_md_properties(vswp, mdp, listp[i]);
1638 		break;
1639 	}
1640 
1641 vsw_readmd_exit:
1642 
1643 	kmem_free(listp, listsz);
1644 	(void) md_fini_handle(mdp);
1645 	return (rv);
1646 }
1647 
1648 /*
1649  * Read the initial start-of-day values from the specified MD node.
1650  */
1651 static int
1652 vsw_get_initial_md_properties(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
1653 {
1654 	uint64_t	macaddr = 0;
1655 
1656 	D1(vswp, "%s: enter", __func__);
1657 
1658 	if (vsw_get_md_physname(vswp, mdp, node, vswp->physname) != 0) {
1659 		return (1);
1660 	}
1661 
1662 	/* mac address for vswitch device itself */
1663 	if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) {
1664 		cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD",
1665 		    vswp->instance);
1666 		return (1);
1667 	}
1668 
1669 	vsw_save_lmacaddr(vswp, macaddr);
1670 
1671 	if (vsw_get_md_smodes(vswp, mdp, node, &vswp->smode)) {
1672 		DWARN(vswp, "%s: Unable to read %s property from MD, "
1673 		    "defaulting to 'switched' mode",
1674 		    __func__, smode_propname);
1675 
1676 		vswp->smode = VSW_LAYER2;
1677 	}
1678 
1679 	/* read mtu */
1680 	vsw_mtu_read(vswp, mdp, node, &vswp->mtu);
1681 	if (vswp->mtu < ETHERMTU || vswp->mtu > VNET_MAX_MTU) {
1682 		vswp->mtu = ETHERMTU;
1683 	}
1684 	vswp->max_frame_size = vswp->mtu + sizeof (struct ether_header) +
1685 	    VLAN_TAGSZ;
1686 
1687 	/* read vlan id properties of this vsw instance */
1688 	vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &vswp->pvid,
1689 	    &vswp->vids, &vswp->nvids, &vswp->default_vlan_id);
1690 
1691 	/* read priority-ether-types */
1692 	vsw_read_pri_eth_types(vswp, mdp, node);
1693 
1694 	D1(vswp, "%s: exit", __func__);
1695 	return (0);
1696 }
1697 
1698 /*
1699  * Read vlan id properties of the given MD node.
1700  * Arguments:
1701  *   arg:          device argument(vsw device or a port)
1702  *   type:         type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port)
1703  *   mdp:          machine description
1704  *   node:         md node cookie
1705  *
1706  * Returns:
1707  *   pvidp:        port-vlan-id of the node
1708  *   vidspp:       list of vlan-ids of the node
1709  *   nvidsp:       # of vlan-ids in the list
1710  *   default_idp:  default-vlan-id of the node(if node is vsw device)
1711  */
1712 static void
1713 vsw_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node,
1714 	uint16_t *pvidp, vsw_vlanid_t **vidspp, uint16_t *nvidsp,
1715 	uint16_t *default_idp)
1716 {
1717 	vsw_t		*vswp;
1718 	vsw_port_t	*portp;
1719 	char		*pvid_propname;
1720 	char		*vid_propname;
1721 	uint_t		nvids = 0;
1722 	uint32_t	vids_size;
1723 	int		rv;
1724 	int		i;
1725 	uint64_t	*data;
1726 	uint64_t	val;
1727 	int		size;
1728 	int		inst;
1729 
1730 	if (type == VSW_LOCALDEV) {
1731 
1732 		vswp = (vsw_t *)arg;
1733 		pvid_propname = vsw_pvid_propname;
1734 		vid_propname = vsw_vid_propname;
1735 		inst = vswp->instance;
1736 
1737 	} else if (type == VSW_VNETPORT) {
1738 
1739 		portp = (vsw_port_t *)arg;
1740 		vswp = portp->p_vswp;
1741 		pvid_propname = port_pvid_propname;
1742 		vid_propname = port_vid_propname;
1743 		inst = portp->p_instance;
1744 
1745 	} else {
1746 		return;
1747 	}
1748 
1749 	if (type == VSW_LOCALDEV && default_idp != NULL) {
1750 		rv = md_get_prop_val(mdp, node, vsw_dvid_propname, &val);
1751 		if (rv != 0) {
1752 			DWARN(vswp, "%s: prop(%s) not found", __func__,
1753 			    vsw_dvid_propname);
1754 
1755 			*default_idp = vsw_default_vlan_id;
1756 		} else {
1757 			*default_idp = val & 0xFFF;
1758 			D2(vswp, "%s: %s(%d): (%d)\n", __func__,
1759 			    vsw_dvid_propname, inst, *default_idp);
1760 		}
1761 	}
1762 
1763 	rv = md_get_prop_val(mdp, node, pvid_propname, &val);
1764 	if (rv != 0) {
1765 		DWARN(vswp, "%s: prop(%s) not found", __func__, pvid_propname);
1766 		*pvidp = vsw_default_vlan_id;
1767 	} else {
1768 
1769 		*pvidp = val & 0xFFF;
1770 		D2(vswp, "%s: %s(%d): (%d)\n", __func__,
1771 		    pvid_propname, inst, *pvidp);
1772 	}
1773 
1774 	rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data,
1775 	    &size);
1776 	if (rv != 0) {
1777 		D2(vswp, "%s: prop(%s) not found", __func__, vid_propname);
1778 		size = 0;
1779 	} else {
1780 		size /= sizeof (uint64_t);
1781 	}
1782 	nvids = size;
1783 
1784 	if (nvids != 0) {
1785 		D2(vswp, "%s: %s(%d): ", __func__, vid_propname, inst);
1786 		vids_size = sizeof (vsw_vlanid_t) * nvids;
1787 		*vidspp = kmem_zalloc(vids_size, KM_SLEEP);
1788 		for (i = 0; i < nvids; i++) {
1789 			(*vidspp)[i].vl_vid = data[i] & 0xFFFF;
1790 			(*vidspp)[i].vl_set = B_FALSE;
1791 			D2(vswp, " %d ", (*vidspp)[i].vl_vid);
1792 		}
1793 		D2(vswp, "\n");
1794 	}
1795 
1796 	*nvidsp = nvids;
1797 }
1798 
1799 /*
1800  * This function reads "priority-ether-types" property from md. This property
1801  * is used to enable support for priority frames. Applications which need
1802  * guaranteed and timely delivery of certain high priority frames to/from
1803  * a vnet or vsw within ldoms, should configure this property by providing
1804  * the ether type(s) for which the priority facility is needed.
1805  * Normal data frames are delivered over a ldc channel using the descriptor
1806  * ring mechanism which is constrained by factors such as descriptor ring size,
1807  * the rate at which the ring is processed at the peer ldc end point, etc.
1808  * The priority mechanism provides an Out-Of-Band path to send/receive frames
1809  * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the
1810  * descriptor ring path and enables a more reliable and timely delivery of
1811  * frames to the peer.
1812  */
1813 static void
1814 vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
1815 {
1816 	int		rv;
1817 	uint16_t	*types;
1818 	uint64_t	*data;
1819 	int		size;
1820 	int		i;
1821 	size_t		mblk_sz;
1822 
1823 	rv = md_get_prop_data(mdp, node, pri_types_propname,
1824 	    (uint8_t **)&data, &size);
1825 	if (rv != 0) {
1826 		/*
1827 		 * Property may not exist if we are running pre-ldoms1.1 f/w.
1828 		 * Check if 'vsw_pri_eth_type' has been set in that case.
1829 		 */
1830 		if (vsw_pri_eth_type != 0) {
1831 			size = sizeof (vsw_pri_eth_type);
1832 			data = &vsw_pri_eth_type;
1833 		} else {
1834 			D3(vswp, "%s: prop(%s) not found", __func__,
1835 			    pri_types_propname);
1836 			size = 0;
1837 		}
1838 	}
1839 
1840 	if (size == 0) {
1841 		vswp->pri_num_types = 0;
1842 		return;
1843 	}
1844 
1845 	/*
1846 	 * we have some priority-ether-types defined;
1847 	 * allocate a table of these types and also
1848 	 * allocate a pool of mblks to transmit these
1849 	 * priority packets.
1850 	 */
1851 	size /= sizeof (uint64_t);
1852 	vswp->pri_num_types = size;
1853 	vswp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP);
1854 	for (i = 0, types = vswp->pri_types; i < size; i++) {
1855 		types[i] = data[i] & 0xFFFF;
1856 	}
1857 	mblk_sz = (VIO_PKT_DATA_HDRSIZE + ETHERMAX + 7) & ~7;
1858 	(void) vio_create_mblks(vsw_pri_tx_nmblks, mblk_sz, &vswp->pri_tx_vmp);
1859 }
1860 
1861 static void
1862 vsw_mtu_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node, uint32_t *mtu)
1863 {
1864 	int		rv;
1865 	int		inst;
1866 	uint64_t	val;
1867 	char		*mtu_propname;
1868 
1869 	mtu_propname = vsw_mtu_propname;
1870 	inst = vswp->instance;
1871 
1872 	rv = md_get_prop_val(mdp, node, mtu_propname, &val);
1873 	if (rv != 0) {
1874 		D3(vswp, "%s: prop(%s) not found", __func__, mtu_propname);
1875 		*mtu = vsw_ethermtu;
1876 	} else {
1877 
1878 		*mtu = val & 0xFFFF;
1879 		D2(vswp, "%s: %s(%d): (%d)\n", __func__,
1880 		    mtu_propname, inst, *mtu);
1881 	}
1882 }
1883 
1884 /*
1885  * Update the mtu of the vsw device. We first check if the device has been
1886  * plumbed and if so fail the mtu update. Otherwise, we continue to update the
1887  * new mtu and reset all ports to initiate handshake re-negotiation with peers
1888  * using the new mtu.
1889  */
1890 static int
1891 vsw_mtu_update(vsw_t *vswp, uint32_t mtu)
1892 {
1893 	int	rv;
1894 
1895 	WRITE_ENTER(&vswp->if_lockrw);
1896 
1897 	if (vswp->if_state & VSW_IF_UP) {
1898 
1899 		RW_EXIT(&vswp->if_lockrw);
1900 
1901 		cmn_err(CE_NOTE, "!vsw%d: Unable to process mtu update"
1902 		    " as the device is plumbed\n", vswp->instance);
1903 		return (EBUSY);
1904 
1905 	} else {
1906 
1907 		D2(vswp, "%s: curr_mtu(%d) new_mtu(%d)\n",
1908 		    __func__, vswp->mtu, mtu);
1909 
1910 		vswp->mtu = mtu;
1911 		vswp->max_frame_size = vswp->mtu +
1912 		    sizeof (struct ether_header) + VLAN_TAGSZ;
1913 
1914 		rv = mac_maxsdu_update(vswp->if_mh, mtu);
1915 		if (rv != 0) {
1916 			cmn_err(CE_NOTE,
1917 			    "!vsw%d: Unable to update mtu with mac"
1918 			    " layer\n", vswp->instance);
1919 		}
1920 
1921 		RW_EXIT(&vswp->if_lockrw);
1922 
1923 		/* Reset ports to renegotiate with the new mtu */
1924 		vsw_reset_ports(vswp);
1925 
1926 	}
1927 
1928 	return (0);
1929 }
1930 
1931 /*
1932  * Check to see if the relevant properties in the specified node have
1933  * changed, and if so take the appropriate action.
1934  *
1935  * If any of the properties are missing or invalid we don't take
1936  * any action, as this function should only be invoked when modifications
1937  * have been made to what we assume is a working configuration, which
1938  * we leave active.
1939  *
1940  * Note it is legal for this routine to be invoked even if none of the
1941  * properties in the port node within the MD have actually changed.
1942  */
1943 static void
1944 vsw_update_md_prop(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
1945 {
1946 	char		physname[LIFNAMSIZ];
1947 	char		drv[LIFNAMSIZ];
1948 	uint_t		ddi_instance;
1949 	uint8_t		new_smode;
1950 	int		i;
1951 	uint64_t 	macaddr = 0;
1952 	enum		{MD_init = 0x1,
1953 				MD_physname = 0x2,
1954 				MD_macaddr = 0x4,
1955 				MD_smode = 0x8,
1956 				MD_vlans = 0x10,
1957 				MD_mtu = 0x20} updated;
1958 	int		rv;
1959 	uint16_t	pvid;
1960 	vsw_vlanid_t	*vids;
1961 	uint16_t	nvids;
1962 	uint32_t	mtu;
1963 
1964 	updated = MD_init;
1965 
1966 	D1(vswp, "%s: enter", __func__);
1967 
1968 	/*
1969 	 * Check if name of physical device in MD has changed.
1970 	 */
1971 	if (vsw_get_md_physname(vswp, mdp, node, (char *)&physname) == 0) {
1972 		/*
1973 		 * Do basic sanity check on new device name/instance,
1974 		 * if its non NULL. It is valid for the device name to
1975 		 * have changed from a non NULL to a NULL value, i.e.
1976 		 * the vsw is being changed to 'routed' mode.
1977 		 */
1978 		if ((strlen(physname) != 0) &&
1979 		    (ddi_parse(physname, drv,
1980 		    &ddi_instance) != DDI_SUCCESS)) {
1981 			cmn_err(CE_WARN, "!vsw%d: physical device %s is not"
1982 			    " a valid device name/instance",
1983 			    vswp->instance, physname);
1984 			goto fail_reconf;
1985 		}
1986 
1987 		if (strcmp(physname, vswp->physname)) {
1988 			D2(vswp, "%s: device name changed from %s to %s",
1989 			    __func__, vswp->physname, physname);
1990 
1991 			updated |= MD_physname;
1992 		} else {
1993 			D2(vswp, "%s: device name unchanged at %s",
1994 			    __func__, vswp->physname);
1995 		}
1996 	} else {
1997 		cmn_err(CE_WARN, "!vsw%d: Unable to read name of physical "
1998 		    "device from updated MD.", vswp->instance);
1999 		goto fail_reconf;
2000 	}
2001 
2002 	/*
2003 	 * Check if MAC address has changed.
2004 	 */
2005 	if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) {
2006 		cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD",
2007 		    vswp->instance);
2008 		goto fail_reconf;
2009 	} else {
2010 		uint64_t maddr = macaddr;
2011 		READ_ENTER(&vswp->if_lockrw);
2012 		for (i = ETHERADDRL - 1; i >= 0; i--) {
2013 			if (vswp->if_addr.ether_addr_octet[i]
2014 			    != (macaddr & 0xFF)) {
2015 				D2(vswp, "%s: octet[%d] 0x%x != 0x%x",
2016 				    __func__, i,
2017 				    vswp->if_addr.ether_addr_octet[i],
2018 				    (macaddr & 0xFF));
2019 				updated |= MD_macaddr;
2020 				macaddr = maddr;
2021 				break;
2022 			}
2023 			macaddr >>= 8;
2024 		}
2025 		RW_EXIT(&vswp->if_lockrw);
2026 		if (updated & MD_macaddr) {
2027 			vsw_save_lmacaddr(vswp, macaddr);
2028 		}
2029 	}
2030 
2031 	/*
2032 	 * Check if switching modes have changed.
2033 	 */
2034 	if (vsw_get_md_smodes(vswp, mdp, node, &new_smode)) {
2035 		cmn_err(CE_WARN, "!vsw%d: Unable to read %s property from MD",
2036 		    vswp->instance, smode_propname);
2037 		goto fail_reconf;
2038 	} else {
2039 		if (new_smode != vswp->smode) {
2040 			D2(vswp, "%s: switching mode changed from %d to %d",
2041 			    __func__, vswp->smode, new_smode);
2042 
2043 			updated |= MD_smode;
2044 		}
2045 	}
2046 
2047 	/* Read the vlan ids */
2048 	vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &pvid, &vids,
2049 	    &nvids, NULL);
2050 
2051 	/* Determine if there are any vlan id updates */
2052 	if ((pvid != vswp->pvid) ||		/* pvid changed? */
2053 	    (nvids != vswp->nvids) ||		/* # of vids changed? */
2054 	    ((nvids != 0) && (vswp->nvids != 0) &&	/* vids changed? */
2055 	    !vsw_cmp_vids(vids, vswp->vids, nvids))) {
2056 		updated |= MD_vlans;
2057 	}
2058 
2059 	/* Read mtu */
2060 	vsw_mtu_read(vswp, mdp, node, &mtu);
2061 	if (mtu != vswp->mtu) {
2062 		if (mtu >= ETHERMTU && mtu <= VNET_MAX_MTU) {
2063 			updated |= MD_mtu;
2064 		} else {
2065 			cmn_err(CE_NOTE, "!vsw%d: Unable to process mtu update"
2066 			    " as the specified value:%d is invalid\n",
2067 			    vswp->instance, mtu);
2068 		}
2069 	}
2070 
2071 	/*
2072 	 * Now make any changes which are needed...
2073 	 */
2074 
2075 	if (updated & (MD_physname | MD_smode | MD_mtu)) {
2076 
2077 		/*
2078 		 * Stop any pending thread to setup switching mode.
2079 		 */
2080 		vsw_setup_switching_stop(vswp);
2081 
2082 		/* Cleanup HybridIO */
2083 		vsw_hio_cleanup(vswp);
2084 
2085 		/*
2086 		 * Remove unicst, mcst addrs of vsw interface
2087 		 * and ports from the physdev. This also closes
2088 		 * the corresponding mac clients.
2089 		 */
2090 		vsw_unset_addrs(vswp);
2091 
2092 		/*
2093 		 * Stop, detach and close the old device..
2094 		 */
2095 		mutex_enter(&vswp->mac_lock);
2096 		vsw_mac_close(vswp);
2097 		mutex_exit(&vswp->mac_lock);
2098 
2099 		/*
2100 		 * Update phys name.
2101 		 */
2102 		if (updated & MD_physname) {
2103 			cmn_err(CE_NOTE, "!vsw%d: changing from %s to %s",
2104 			    vswp->instance, vswp->physname, physname);
2105 			(void) strncpy(vswp->physname,
2106 			    physname, strlen(physname) + 1);
2107 		}
2108 
2109 		/*
2110 		 * Update array with the new switch mode values.
2111 		 */
2112 		if (updated & MD_smode) {
2113 			vswp->smode = new_smode;
2114 		}
2115 
2116 		/* Update mtu */
2117 		if (updated & MD_mtu) {
2118 			rv = vsw_mtu_update(vswp, mtu);
2119 			if (rv != 0) {
2120 				goto fail_update;
2121 			}
2122 		}
2123 
2124 		/*
2125 		 * ..and attach, start the new device.
2126 		 */
2127 		rv = vsw_setup_switching(vswp);
2128 		if (rv == EAGAIN) {
2129 			/*
2130 			 * Unable to setup switching mode.
2131 			 * As the error is EAGAIN, schedule a thread to retry
2132 			 * and return. Programming addresses of ports and
2133 			 * vsw interface will be done by the thread when the
2134 			 * switching setup completes successfully.
2135 			 */
2136 			if (vsw_setup_switching_start(vswp) != 0) {
2137 				goto fail_update;
2138 			}
2139 			return;
2140 
2141 		} else if (rv) {
2142 			goto fail_update;
2143 		}
2144 
2145 		vsw_setup_layer2_post_process(vswp);
2146 	} else if (updated & MD_macaddr) {
2147 		/*
2148 		 * We enter here if only MD_macaddr is exclusively updated.
2149 		 * If MD_physname and/or MD_smode are also updated, then
2150 		 * as part of that, we would have implicitly processed
2151 		 * MD_macaddr update (above).
2152 		 */
2153 		cmn_err(CE_NOTE, "!vsw%d: changing mac address to 0x%lx",
2154 		    vswp->instance, macaddr);
2155 
2156 		READ_ENTER(&vswp->if_lockrw);
2157 		if (vswp->if_state & VSW_IF_UP) {
2158 			/* reconfigure with new address */
2159 			vsw_if_mac_reconfig(vswp, B_FALSE, 0, NULL, 0);
2160 
2161 			/*
2162 			 * Notify the MAC layer of the changed address.
2163 			 */
2164 			mac_unicst_update(vswp->if_mh,
2165 			    (uint8_t *)&vswp->if_addr);
2166 
2167 		}
2168 		RW_EXIT(&vswp->if_lockrw);
2169 
2170 	}
2171 
2172 	if (updated & MD_vlans) {
2173 		/* Remove existing vlan ids from the hash table. */
2174 		vsw_vlan_remove_ids(vswp, VSW_LOCALDEV);
2175 
2176 		if (vswp->if_state & VSW_IF_UP) {
2177 			vsw_if_mac_reconfig(vswp, B_TRUE, pvid, vids, nvids);
2178 		} else {
2179 			if (vswp->nvids != 0) {
2180 				kmem_free(vswp->vids,
2181 				    sizeof (vsw_vlanid_t) * vswp->nvids);
2182 			}
2183 			vswp->vids = vids;
2184 			vswp->nvids = nvids;
2185 			vswp->pvid = pvid;
2186 		}
2187 
2188 		/* add these new vlan ids into hash table */
2189 		vsw_vlan_add_ids(vswp, VSW_LOCALDEV);
2190 	} else {
2191 		if (nvids != 0) {
2192 			kmem_free(vids, sizeof (vsw_vlanid_t) * nvids);
2193 		}
2194 	}
2195 
2196 	return;
2197 
2198 fail_reconf:
2199 	cmn_err(CE_WARN, "!vsw%d: configuration unchanged", vswp->instance);
2200 	return;
2201 
2202 fail_update:
2203 	cmn_err(CE_WARN, "!vsw%d: re-configuration failed",
2204 	    vswp->instance);
2205 }
2206 
2207 /*
2208  * Read the port's md properties.
2209  */
2210 static int
2211 vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp,
2212 	md_t *mdp, mde_cookie_t *node)
2213 {
2214 	uint64_t		ldc_id;
2215 	uint8_t			*addrp;
2216 	int			i, addrsz;
2217 	int			num_nodes = 0, nchan = 0;
2218 	int			listsz = 0;
2219 	mde_cookie_t		*listp = NULL;
2220 	struct ether_addr	ea;
2221 	uint64_t		macaddr;
2222 	uint64_t		inst = 0;
2223 	uint64_t		val;
2224 
2225 	if (md_get_prop_val(mdp, *node, id_propname, &inst)) {
2226 		DWARN(vswp, "%s: prop(%s) not found", __func__,
2227 		    id_propname);
2228 		return (1);
2229 	}
2230 
2231 	/*
2232 	 * Find the channel endpoint node(s) (which should be under this
2233 	 * port node) which contain the channel id(s).
2234 	 */
2235 	if ((num_nodes = md_node_count(mdp)) <= 0) {
2236 		DERR(vswp, "%s: invalid number of nodes found (%d)",
2237 		    __func__, num_nodes);
2238 		return (1);
2239 	}
2240 
2241 	D2(vswp, "%s: %d nodes found", __func__, num_nodes);
2242 
2243 	/* allocate enough space for node list */
2244 	listsz = num_nodes * sizeof (mde_cookie_t);
2245 	listp = kmem_zalloc(listsz, KM_SLEEP);
2246 
2247 	nchan = md_scan_dag(mdp, *node, md_find_name(mdp, chan_propname),
2248 	    md_find_name(mdp, "fwd"), listp);
2249 
2250 	if (nchan <= 0) {
2251 		DWARN(vswp, "%s: no %s nodes found", __func__, chan_propname);
2252 		kmem_free(listp, listsz);
2253 		return (1);
2254 	}
2255 
2256 	D2(vswp, "%s: %d %s nodes found", __func__, nchan, chan_propname);
2257 
2258 	/* use property from first node found */
2259 	if (md_get_prop_val(mdp, listp[0], id_propname, &ldc_id)) {
2260 		DWARN(vswp, "%s: prop(%s) not found\n", __func__,
2261 		    id_propname);
2262 		kmem_free(listp, listsz);
2263 		return (1);
2264 	}
2265 
2266 	/* don't need list any more */
2267 	kmem_free(listp, listsz);
2268 
2269 	D2(vswp, "%s: ldc_id 0x%llx", __func__, ldc_id);
2270 
2271 	/* read mac-address property */
2272 	if (md_get_prop_data(mdp, *node, remaddr_propname,
2273 	    &addrp, &addrsz)) {
2274 		DWARN(vswp, "%s: prop(%s) not found",
2275 		    __func__, remaddr_propname);
2276 		return (1);
2277 	}
2278 
2279 	if (addrsz < ETHERADDRL) {
2280 		DWARN(vswp, "%s: invalid address size", __func__);
2281 		return (1);
2282 	}
2283 
2284 	macaddr = *((uint64_t *)addrp);
2285 	D2(vswp, "%s: remote mac address 0x%llx", __func__, macaddr);
2286 
2287 	for (i = ETHERADDRL - 1; i >= 0; i--) {
2288 		ea.ether_addr_octet[i] = macaddr & 0xFF;
2289 		macaddr >>= 8;
2290 	}
2291 
2292 	/* now update all properties into the port */
2293 	portp->p_vswp = vswp;
2294 	portp->p_instance = inst;
2295 	portp->addr_set = B_FALSE;
2296 	ether_copy(&ea, &portp->p_macaddr);
2297 	if (nchan > VSW_PORT_MAX_LDCS) {
2298 		D2(vswp, "%s: using first of %d ldc ids",
2299 		    __func__, nchan);
2300 		nchan = VSW_PORT_MAX_LDCS;
2301 	}
2302 	portp->num_ldcs = nchan;
2303 	portp->ldc_ids =
2304 	    kmem_zalloc(sizeof (uint64_t) * nchan, KM_SLEEP);
2305 	bcopy(&ldc_id, (portp->ldc_ids), sizeof (uint64_t) * nchan);
2306 
2307 	/* read vlan id properties of this port node */
2308 	vsw_vlan_read_ids(portp, VSW_VNETPORT, mdp, *node, &portp->pvid,
2309 	    &portp->vids, &portp->nvids, NULL);
2310 
2311 	/* Check if hybrid property is present */
2312 	if (md_get_prop_val(mdp, *node, hybrid_propname, &val) == 0) {
2313 		D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname);
2314 		portp->p_hio_enabled = B_TRUE;
2315 	} else {
2316 		portp->p_hio_enabled = B_FALSE;
2317 	}
2318 	/*
2319 	 * Port hio capability determined after version
2320 	 * negotiation, i.e., when we know the peer is HybridIO capable.
2321 	 */
2322 	portp->p_hio_capable = B_FALSE;
2323 	return (0);
2324 }
2325 
2326 /*
2327  * Add a new port to the system.
2328  *
2329  * Returns 0 on success, 1 on failure.
2330  */
2331 int
2332 vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node)
2333 {
2334 	vsw_port_t	*portp;
2335 	int		rv;
2336 
2337 	portp = kmem_zalloc(sizeof (vsw_port_t), KM_SLEEP);
2338 
2339 	rv = vsw_port_read_props(portp, vswp, mdp, node);
2340 	if (rv != 0) {
2341 		kmem_free(portp, sizeof (*portp));
2342 		return (1);
2343 	}
2344 
2345 	rv = vsw_port_attach(portp);
2346 	if (rv != 0) {
2347 		DERR(vswp, "%s: failed to attach port", __func__);
2348 		return (1);
2349 	}
2350 
2351 	return (0);
2352 }
2353 
2354 static int
2355 vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex,
2356 	md_t *prev_mdp, mde_cookie_t prev_mdex)
2357 {
2358 	uint64_t	cport_num;
2359 	uint64_t	pport_num;
2360 	vsw_port_list_t	*plistp;
2361 	vsw_port_t	*portp;
2362 	boolean_t	updated_vlans = B_FALSE;
2363 	uint16_t	pvid;
2364 	vsw_vlanid_t	*vids;
2365 	uint16_t	nvids;
2366 	uint64_t	val;
2367 	boolean_t	hio_enabled = B_FALSE;
2368 
2369 	/*
2370 	 * For now, we get port updates only if vlan ids changed.
2371 	 * We read the port num and do some sanity check.
2372 	 */
2373 	if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) {
2374 		return (1);
2375 	}
2376 
2377 	if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) {
2378 		return (1);
2379 	}
2380 	if (cport_num != pport_num)
2381 		return (1);
2382 
2383 	plistp = &(vswp->plist);
2384 
2385 	READ_ENTER(&plistp->lockrw);
2386 
2387 	portp = vsw_lookup_port(vswp, cport_num);
2388 	if (portp == NULL) {
2389 		RW_EXIT(&plistp->lockrw);
2390 		return (1);
2391 	}
2392 
2393 	/* Read the vlan ids */
2394 	vsw_vlan_read_ids(portp, VSW_VNETPORT, curr_mdp, curr_mdex, &pvid,
2395 	    &vids, &nvids, NULL);
2396 
2397 	/* Determine if there are any vlan id updates */
2398 	if ((pvid != portp->pvid) ||		/* pvid changed? */
2399 	    (nvids != portp->nvids) ||		/* # of vids changed? */
2400 	    ((nvids != 0) && (portp->nvids != 0) &&	/* vids changed? */
2401 	    !vsw_cmp_vids(vids, portp->vids, nvids))) {
2402 		updated_vlans = B_TRUE;
2403 	}
2404 
2405 	if (updated_vlans == B_TRUE) {
2406 
2407 		/* Remove existing vlan ids from the hash table. */
2408 		vsw_vlan_remove_ids(portp, VSW_VNETPORT);
2409 
2410 		/* Reconfigure vlans with network device */
2411 		vsw_mac_port_reconfig_vlans(portp, pvid, vids, nvids);
2412 
2413 		/* add these new vlan ids into hash table */
2414 		vsw_vlan_add_ids(portp, VSW_VNETPORT);
2415 
2416 		/* reset the port if it is vlan unaware (ver < 1.3) */
2417 		vsw_vlan_unaware_port_reset(portp);
2418 	}
2419 
2420 	/* Check if hybrid property is present */
2421 	if (md_get_prop_val(curr_mdp, curr_mdex, hybrid_propname, &val) == 0) {
2422 		D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname);
2423 		hio_enabled = B_TRUE;
2424 	}
2425 
2426 	if (portp->p_hio_enabled != hio_enabled) {
2427 		vsw_hio_port_update(portp, hio_enabled);
2428 	}
2429 
2430 	RW_EXIT(&plistp->lockrw);
2431 
2432 	return (0);
2433 }
2434 
2435 /*
2436  * vsw_mac_rx -- A common function to send packets to the interface.
2437  * By default this function check if the interface is UP or not, the
2438  * rest of the behaviour depends on the flags as below:
2439  *
2440  *	VSW_MACRX_PROMISC -- Check if the promisc mode set or not.
2441  *	VSW_MACRX_COPYMSG -- Make a copy of the message(s).
2442  *	VSW_MACRX_FREEMSG -- Free if the messages cannot be sent up the stack.
2443  */
2444 void
2445 vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh,
2446     mblk_t *mp, vsw_macrx_flags_t flags)
2447 {
2448 	mblk_t		*mpt;
2449 
2450 	D1(vswp, "%s:enter\n", __func__);
2451 	READ_ENTER(&vswp->if_lockrw);
2452 	/* Check if the interface is up */
2453 	if (!(vswp->if_state & VSW_IF_UP)) {
2454 		RW_EXIT(&vswp->if_lockrw);
2455 		/* Free messages only if FREEMSG flag specified */
2456 		if (flags & VSW_MACRX_FREEMSG) {
2457 			freemsgchain(mp);
2458 		}
2459 		D1(vswp, "%s:exit\n", __func__);
2460 		return;
2461 	}
2462 	/*
2463 	 * If PROMISC flag is passed, then check if
2464 	 * the interface is in the PROMISC mode.
2465 	 * If not, drop the messages.
2466 	 */
2467 	if (flags & VSW_MACRX_PROMISC) {
2468 		if (!(vswp->if_state & VSW_IF_PROMISC)) {
2469 			RW_EXIT(&vswp->if_lockrw);
2470 			/* Free messages only if FREEMSG flag specified */
2471 			if (flags & VSW_MACRX_FREEMSG) {
2472 				freemsgchain(mp);
2473 			}
2474 			D1(vswp, "%s:exit\n", __func__);
2475 			return;
2476 		}
2477 	}
2478 	RW_EXIT(&vswp->if_lockrw);
2479 	/*
2480 	 * If COPYMSG flag is passed, then make a copy
2481 	 * of the message chain and send up the copy.
2482 	 */
2483 	if (flags & VSW_MACRX_COPYMSG) {
2484 		mp = copymsgchain(mp);
2485 		if (mp == NULL) {
2486 			D1(vswp, "%s:exit\n", __func__);
2487 			return;
2488 		}
2489 	}
2490 
2491 	D2(vswp, "%s: sending up stack", __func__);
2492 
2493 	mpt = NULL;
2494 	(void) vsw_vlan_frame_untag(vswp, VSW_LOCALDEV, &mp, &mpt);
2495 	if (mp != NULL) {
2496 		mac_rx(vswp->if_mh, mrh, mp);
2497 	}
2498 	D1(vswp, "%s:exit\n", __func__);
2499 }
2500 
2501 /* copy mac address of vsw into soft state structure */
2502 static void
2503 vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr)
2504 {
2505 	int	i;
2506 
2507 	WRITE_ENTER(&vswp->if_lockrw);
2508 	for (i = ETHERADDRL - 1; i >= 0; i--) {
2509 		vswp->if_addr.ether_addr_octet[i] = macaddr & 0xFF;
2510 		macaddr >>= 8;
2511 	}
2512 	RW_EXIT(&vswp->if_lockrw);
2513 }
2514 
2515 /* Compare VLAN ids, array size expected to be same. */
2516 static boolean_t
2517 vsw_cmp_vids(vsw_vlanid_t *vids1, vsw_vlanid_t *vids2, int nvids)
2518 {
2519 	int i, j;
2520 	uint16_t vid;
2521 
2522 	for (i = 0; i < nvids; i++) {
2523 		vid = vids1[i].vl_vid;
2524 		for (j = 0; j < nvids; j++) {
2525 			if (vid == vids2[i].vl_vid)
2526 				break;
2527 		}
2528 		if (j == nvids) {
2529 			return (B_FALSE);
2530 		}
2531 	}
2532 	return (B_TRUE);
2533 }
2534