xref: /titanic_41/usr/src/uts/sun4v/io/vsw.c (revision 22fe2c8844be88ebae6478ca1b0b92c8ec2aef54)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/errno.h>
29 #include <sys/debug.h>
30 #include <sys/time.h>
31 #include <sys/sysmacros.h>
32 #include <sys/systm.h>
33 #include <sys/user.h>
34 #include <sys/stropts.h>
35 #include <sys/stream.h>
36 #include <sys/strlog.h>
37 #include <sys/strsubr.h>
38 #include <sys/cmn_err.h>
39 #include <sys/cpu.h>
40 #include <sys/kmem.h>
41 #include <sys/conf.h>
42 #include <sys/ddi.h>
43 #include <sys/sunddi.h>
44 #include <sys/ksynch.h>
45 #include <sys/stat.h>
46 #include <sys/kstat.h>
47 #include <sys/vtrace.h>
48 #include <sys/strsun.h>
49 #include <sys/dlpi.h>
50 #include <sys/ethernet.h>
51 #include <net/if.h>
52 #include <sys/varargs.h>
53 #include <sys/machsystm.h>
54 #include <sys/modctl.h>
55 #include <sys/modhash.h>
56 #include <sys/mac_provider.h>
57 #include <sys/mac_ether.h>
58 #include <sys/taskq.h>
59 #include <sys/note.h>
60 #include <sys/mach_descrip.h>
61 #include <sys/mac_provider.h>
62 #include <sys/mdeg.h>
63 #include <sys/ldc.h>
64 #include <sys/vsw_fdb.h>
65 #include <sys/vsw.h>
66 #include <sys/vio_mailbox.h>
67 #include <sys/vnet_mailbox.h>
68 #include <sys/vnet_common.h>
69 #include <sys/vio_util.h>
70 #include <sys/sdt.h>
71 #include <sys/atomic.h>
72 #include <sys/callb.h>
73 #include <sys/vlan.h>
74 
75 /*
76  * Function prototypes.
77  */
78 static	int vsw_attach(dev_info_t *, ddi_attach_cmd_t);
79 static	int vsw_detach(dev_info_t *, ddi_detach_cmd_t);
80 static	int vsw_unattach(vsw_t *vswp);
81 static	int vsw_get_md_physname(vsw_t *, md_t *, mde_cookie_t, char *);
82 static	int vsw_get_md_smodes(vsw_t *, md_t *, mde_cookie_t, uint8_t *);
83 void vsw_destroy_rxpools(void *);
84 
85 /* MDEG routines */
86 static	int vsw_mdeg_register(vsw_t *vswp);
87 static	void vsw_mdeg_unregister(vsw_t *vswp);
88 static	int vsw_mdeg_cb(void *cb_argp, mdeg_result_t *);
89 static	int vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *);
90 static	int vsw_get_initial_md_properties(vsw_t *vswp, md_t *, mde_cookie_t);
91 static	int vsw_read_mdprops(vsw_t *vswp);
92 static	void vsw_vlan_read_ids(void *arg, int type, md_t *mdp,
93 	mde_cookie_t node, uint16_t *pvidp, vsw_vlanid_t **vidspp,
94 	uint16_t *nvidsp, uint16_t *default_idp);
95 static	int vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp,
96 	md_t *mdp, mde_cookie_t *node);
97 static	void vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp,
98 	mde_cookie_t node);
99 static	void vsw_mtu_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node,
100 	uint32_t *mtu);
101 static	int vsw_mtu_update(vsw_t *vswp, uint32_t mtu);
102 static	void vsw_linkprop_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node,
103 	boolean_t *pls);
104 static	void vsw_update_md_prop(vsw_t *, md_t *, mde_cookie_t);
105 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr);
106 static boolean_t vsw_cmp_vids(vsw_vlanid_t *vids1,
107 	vsw_vlanid_t *vids2, int nvids);
108 
109 /* Mac driver related routines */
110 static int vsw_mac_register(vsw_t *);
111 static int vsw_mac_unregister(vsw_t *);
112 static int vsw_m_stat(void *, uint_t, uint64_t *);
113 static void vsw_m_stop(void *arg);
114 static int vsw_m_start(void *arg);
115 static int vsw_m_unicst(void *arg, const uint8_t *);
116 static int vsw_m_multicst(void *arg, boolean_t, const uint8_t *);
117 static int vsw_m_promisc(void *arg, boolean_t);
118 static mblk_t *vsw_m_tx(void *arg, mblk_t *);
119 void vsw_mac_link_update(vsw_t *vswp, link_state_t link_state);
120 void vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh,
121     mblk_t *mp, vsw_macrx_flags_t flags);
122 void vsw_physlink_state_update(vsw_t *vswp);
123 
124 /*
125  * Functions imported from other files.
126  */
127 extern void vsw_setup_switching_thread(void *arg);
128 extern int vsw_setup_switching_start(vsw_t *vswp);
129 extern void vsw_setup_switching_stop(vsw_t *vswp);
130 extern int vsw_setup_switching(vsw_t *);
131 extern void vsw_switch_frame_nop(vsw_t *vswp, mblk_t *mp, int caller,
132     vsw_port_t *port, mac_resource_handle_t mrh);
133 extern int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *);
134 extern int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *);
135 extern void vsw_del_mcst_vsw(vsw_t *);
136 extern mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr);
137 extern void vsw_detach_ports(vsw_t *vswp);
138 extern int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node);
139 extern int vsw_port_detach(vsw_t *vswp, int p_instance);
140 static int vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex,
141 	md_t *prev_mdp, mde_cookie_t prev_mdex);
142 extern	int vsw_port_attach(vsw_port_t *port);
143 extern vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance);
144 extern int vsw_mac_open(vsw_t *vswp);
145 extern void vsw_mac_close(vsw_t *vswp);
146 extern void vsw_mac_cleanup_ports(vsw_t *vswp);
147 extern void vsw_unset_addrs(vsw_t *vswp);
148 extern void vsw_setup_switching_post_process(vsw_t *vswp);
149 extern void vsw_create_vlans(void *arg, int type);
150 extern void vsw_destroy_vlans(void *arg, int type);
151 extern void vsw_vlan_add_ids(void *arg, int type);
152 extern void vsw_vlan_remove_ids(void *arg, int type);
153 extern void vsw_vlan_unaware_port_reset(vsw_port_t *portp);
154 extern uint32_t vsw_vlan_frame_untag(void *arg, int type, mblk_t **np,
155 	mblk_t **npt);
156 extern mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp);
157 extern void vsw_hio_cleanup(vsw_t *vswp);
158 extern void vsw_hio_start_ports(vsw_t *vswp);
159 extern void vsw_hio_port_update(vsw_port_t *portp, boolean_t hio_enabled);
160 extern int vsw_mac_multicast_add(vsw_t *, vsw_port_t *, mcst_addr_t *, int);
161 extern void vsw_mac_multicast_remove(vsw_t *, vsw_port_t *, mcst_addr_t *, int);
162 extern void vsw_mac_port_reconfig_vlans(vsw_port_t *portp, uint16_t new_pvid,
163     vsw_vlanid_t *new_vids, int new_nvids);
164 extern int vsw_mac_client_init(vsw_t *vswp, vsw_port_t *port, int type);
165 extern void vsw_mac_client_cleanup(vsw_t *vswp, vsw_port_t *port, int type);
166 extern void vsw_if_mac_reconfig(vsw_t *vswp, boolean_t update_vlans,
167     uint16_t new_pvid, vsw_vlanid_t *new_vids, int new_nvids);
168 extern void vsw_reset_ports(vsw_t *vswp);
169 extern void vsw_port_reset(vsw_port_t *portp);
170 extern void vsw_physlink_update_ports(vsw_t *vswp);
171 void vsw_hio_port_update(vsw_port_t *portp, boolean_t hio_enabled);
172 
173 /*
174  * Internal tunables.
175  */
176 int	vsw_num_handshakes = VNET_NUM_HANDSHAKES; /* # of handshake attempts */
177 int	vsw_wretries = 100;		/* # of write attempts */
178 int	vsw_desc_delay = 0;		/* delay in us */
179 int	vsw_read_attempts = 5;		/* # of reads of descriptor */
180 int	vsw_setup_switching_delay = 3;	/* setup sw timeout interval in sec */
181 int	vsw_mac_open_retries = 300;	/* max # of mac_open() retries */
182 					/* 300*3 = 900sec(15min) of max tmout */
183 int	vsw_ldc_tx_delay = 5;		/* delay(ticks) for tx retries */
184 int	vsw_ldc_tx_retries = 10;	/* # of ldc tx retries */
185 int	vsw_ldc_retries = 5;		/* # of ldc_close() retries */
186 int	vsw_ldc_delay = 1000;		/* 1 ms delay for ldc_close() */
187 boolean_t vsw_ldc_rxthr_enabled = B_TRUE;	/* LDC Rx thread enabled */
188 boolean_t vsw_ldc_txthr_enabled = B_TRUE;	/* LDC Tx thread enabled */
189 int	vsw_rxpool_cleanup_delay = 100000;	/* 100ms */
190 
191 
192 uint32_t	vsw_fdb_nchains = 8;	/* # of chains in fdb hash table */
193 uint32_t	vsw_vlan_nchains = 4;	/* # of chains in vlan id hash table */
194 uint32_t	vsw_ethermtu = 1500;	/* mtu of the device */
195 
196 /* delay in usec to wait for all references on a fdb entry to be dropped */
197 uint32_t vsw_fdbe_refcnt_delay = 10;
198 
199 /*
200  * Default vlan id. This is only used internally when the "default-vlan-id"
201  * property is not present in the MD device node. Therefore, this should not be
202  * used as a tunable; if this value is changed, the corresponding variable
203  * should be updated to the same value in all vnets connected to this vsw.
204  */
205 uint16_t	vsw_default_vlan_id = 1;
206 
207 /*
208  * Workaround for a version handshake bug in obp's vnet.
209  * If vsw initiates version negotiation starting from the highest version,
210  * obp sends a nack and terminates version handshake. To workaround
211  * this, we do not initiate version handshake when the channel comes up.
212  * Instead, we wait for the peer to send its version info msg and go through
213  * the version protocol exchange. If we successfully negotiate a version,
214  * before sending the ack, we send our version info msg to the peer
215  * using the <major,minor> version that we are about to ack.
216  */
217 boolean_t vsw_obp_ver_proto_workaround = B_TRUE;
218 
219 /*
220  * In the absence of "priority-ether-types" property in MD, the following
221  * internal tunable can be set to specify a single priority ethertype.
222  */
223 uint64_t vsw_pri_eth_type = 0;
224 
225 /*
226  * Number of transmit priority buffers that are preallocated per device.
227  * This number is chosen to be a small value to throttle transmission
228  * of priority packets. Note: Must be a power of 2 for vio_create_mblks().
229  */
230 uint32_t vsw_pri_tx_nmblks = 64;
231 
232 /*
233  * Number of RARP packets sent to announce macaddr to the physical switch,
234  * after vsw's physical device is changed dynamically or after a guest (client
235  * vnet) is live migrated in.
236  */
237 uint32_t vsw_publish_macaddr_count = 3;
238 
239 boolean_t vsw_hio_enabled = B_TRUE;	/* Enable/disable HybridIO */
240 int vsw_hio_max_cleanup_retries = 10;	/* Max retries for HybridIO cleanp */
241 int vsw_hio_cleanup_delay = 10000;	/* 10ms */
242 
243 /* Number of transmit descriptors -  must be power of 2 */
244 uint32_t vsw_ntxds = VSW_RING_NUM_EL;
245 
246 /*
247  * Max number of mblks received in one receive operation.
248  */
249 uint32_t vsw_chain_len = (VSW_NUM_MBLKS * 0.6);
250 
251 /*
252  * Internal tunables for receive buffer pools, that is,  the size and number of
253  * mblks for each pool. At least 3 sizes must be specified if these are used.
254  * The sizes must be specified in increasing order. Non-zero value of the first
255  * size will be used as a hint to use these values instead of the algorithm
256  * that determines the sizes based on MTU.
257  */
258 uint32_t vsw_mblk_size1 = 0;
259 uint32_t vsw_mblk_size2 = 0;
260 uint32_t vsw_mblk_size3 = 0;
261 uint32_t vsw_mblk_size4 = 0;
262 uint32_t vsw_num_mblks1 = VSW_NUM_MBLKS;	/* number of mblks for pool1 */
263 uint32_t vsw_num_mblks2 = VSW_NUM_MBLKS;	/* number of mblks for pool2 */
264 uint32_t vsw_num_mblks3 = VSW_NUM_MBLKS;	/* number of mblks for pool3 */
265 uint32_t vsw_num_mblks4 = VSW_NUM_MBLKS;	/* number of mblks for pool4 */
266 
267 /*
268  * Set this to non-zero to enable additional internal receive buffer pools
269  * based on the MTU of the device for better performance at the cost of more
270  * memory consumption. This is turned off by default, to use allocb(9F) for
271  * receive buffer allocations of sizes > 2K.
272  */
273 boolean_t vsw_jumbo_rxpools = B_FALSE;
274 
275 /*
276  * vsw_max_tx_qcount is the maximum # of packets that can be queued
277  * before the tx worker thread begins processing the queue. Its value
278  * is chosen to be 4x the default length of tx descriptor ring.
279  */
280 uint32_t vsw_max_tx_qcount = 4 * VSW_RING_NUM_EL;
281 
282 /*
283  * MAC callbacks
284  */
285 static	mac_callbacks_t	vsw_m_callbacks = {
286 	0,
287 	vsw_m_stat,
288 	vsw_m_start,
289 	vsw_m_stop,
290 	vsw_m_promisc,
291 	vsw_m_multicst,
292 	vsw_m_unicst,
293 	vsw_m_tx,
294 	NULL,
295 	NULL,
296 	NULL
297 };
298 
299 static	struct	cb_ops	vsw_cb_ops = {
300 	nulldev,			/* cb_open */
301 	nulldev,			/* cb_close */
302 	nodev,				/* cb_strategy */
303 	nodev,				/* cb_print */
304 	nodev,				/* cb_dump */
305 	nodev,				/* cb_read */
306 	nodev,				/* cb_write */
307 	nodev,				/* cb_ioctl */
308 	nodev,				/* cb_devmap */
309 	nodev,				/* cb_mmap */
310 	nodev,				/* cb_segmap */
311 	nochpoll,			/* cb_chpoll */
312 	ddi_prop_op,			/* cb_prop_op */
313 	NULL,				/* cb_stream */
314 	D_MP,				/* cb_flag */
315 	CB_REV,				/* rev */
316 	nodev,				/* int (*cb_aread)() */
317 	nodev				/* int (*cb_awrite)() */
318 };
319 
320 static	struct	dev_ops	vsw_ops = {
321 	DEVO_REV,		/* devo_rev */
322 	0,			/* devo_refcnt */
323 	NULL,			/* devo_getinfo */
324 	nulldev,		/* devo_identify */
325 	nulldev,		/* devo_probe */
326 	vsw_attach,		/* devo_attach */
327 	vsw_detach,		/* devo_detach */
328 	nodev,			/* devo_reset */
329 	&vsw_cb_ops,		/* devo_cb_ops */
330 	(struct bus_ops *)NULL,	/* devo_bus_ops */
331 	ddi_power		/* devo_power */
332 };
333 
334 extern	struct	mod_ops	mod_driverops;
335 static struct modldrv vswmodldrv = {
336 	&mod_driverops,
337 	"sun4v Virtual Switch",
338 	&vsw_ops,
339 };
340 
341 #define	LDC_ENTER_LOCK(ldcp)	\
342 				mutex_enter(&((ldcp)->ldc_cblock));\
343 				mutex_enter(&((ldcp)->ldc_rxlock));\
344 				mutex_enter(&((ldcp)->ldc_txlock));
345 #define	LDC_EXIT_LOCK(ldcp)	\
346 				mutex_exit(&((ldcp)->ldc_txlock));\
347 				mutex_exit(&((ldcp)->ldc_rxlock));\
348 				mutex_exit(&((ldcp)->ldc_cblock));
349 
350 /* Driver soft state ptr  */
351 static void	*vsw_state;
352 
353 /*
354  * Linked list of "vsw_t" structures - one per instance.
355  */
356 vsw_t		*vsw_head = NULL;
357 krwlock_t	vsw_rw;
358 
359 /*
360  * Property names
361  */
362 static char vdev_propname[] = "virtual-device";
363 static char vsw_propname[] = "virtual-network-switch";
364 static char physdev_propname[] = "vsw-phys-dev";
365 static char smode_propname[] = "vsw-switch-mode";
366 static char macaddr_propname[] = "local-mac-address";
367 static char remaddr_propname[] = "remote-mac-address";
368 static char ldcids_propname[] = "ldc-ids";
369 static char chan_propname[] = "channel-endpoint";
370 static char id_propname[] = "id";
371 static char reg_propname[] = "reg";
372 static char pri_types_propname[] = "priority-ether-types";
373 static char vsw_pvid_propname[] = "port-vlan-id";
374 static char vsw_vid_propname[] = "vlan-id";
375 static char vsw_dvid_propname[] = "default-vlan-id";
376 static char port_pvid_propname[] = "remote-port-vlan-id";
377 static char port_vid_propname[] = "remote-vlan-id";
378 static char hybrid_propname[] = "hybrid";
379 static char vsw_mtu_propname[] = "mtu";
380 static char vsw_linkprop_propname[] = "linkprop";
381 
382 /*
383  * Matching criteria passed to the MDEG to register interest
384  * in changes to 'virtual-device-port' nodes identified by their
385  * 'id' property.
386  */
387 static md_prop_match_t vport_prop_match[] = {
388 	{ MDET_PROP_VAL,    "id"   },
389 	{ MDET_LIST_END,    NULL    }
390 };
391 
392 static mdeg_node_match_t vport_match = { "virtual-device-port",
393 						vport_prop_match };
394 
395 /*
396  * Matching criteria passed to the MDEG to register interest
397  * in changes to 'virtual-device' nodes (i.e. vsw nodes) identified
398  * by their 'name' and 'cfg-handle' properties.
399  */
400 static md_prop_match_t vdev_prop_match[] = {
401 	{ MDET_PROP_STR,    "name"   },
402 	{ MDET_PROP_VAL,    "cfg-handle" },
403 	{ MDET_LIST_END,    NULL    }
404 };
405 
406 static mdeg_node_match_t vdev_match = { "virtual-device",
407 						vdev_prop_match };
408 
409 
410 /*
411  * Specification of an MD node passed to the MDEG to filter any
412  * 'vport' nodes that do not belong to the specified node. This
413  * template is copied for each vsw instance and filled in with
414  * the appropriate 'cfg-handle' value before being passed to the MDEG.
415  */
416 static mdeg_prop_spec_t vsw_prop_template[] = {
417 	{ MDET_PROP_STR,    "name",		vsw_propname },
418 	{ MDET_PROP_VAL,    "cfg-handle",	NULL	},
419 	{ MDET_LIST_END,    NULL,		NULL	}
420 };
421 
422 #define	VSW_SET_MDEG_PROP_INST(specp, val)	(specp)[1].ps_val = (val);
423 
424 #ifdef	DEBUG
425 /*
426  * Print debug messages - set to 0x1f to enable all msgs
427  * or 0x0 to turn all off.
428  */
429 int vswdbg = 0x0;
430 
431 /*
432  * debug levels:
433  * 0x01:	Function entry/exit tracing
434  * 0x02:	Internal function messages
435  * 0x04:	Verbose internal messages
436  * 0x08:	Warning messages
437  * 0x10:	Error messages
438  */
439 
440 void
441 vswdebug(vsw_t *vswp, const char *fmt, ...)
442 {
443 	char buf[512];
444 	va_list ap;
445 
446 	va_start(ap, fmt);
447 	(void) vsprintf(buf, fmt, ap);
448 	va_end(ap);
449 
450 	if (vswp == NULL)
451 		cmn_err(CE_CONT, "%s\n", buf);
452 	else
453 		cmn_err(CE_CONT, "vsw%d: %s\n", vswp->instance, buf);
454 }
455 
456 #endif	/* DEBUG */
457 
458 static struct modlinkage modlinkage = {
459 	MODREV_1,
460 	&vswmodldrv,
461 	NULL
462 };
463 
464 int
465 _init(void)
466 {
467 	int status;
468 
469 	rw_init(&vsw_rw, NULL, RW_DRIVER, NULL);
470 
471 	status = ddi_soft_state_init(&vsw_state, sizeof (vsw_t), 1);
472 	if (status != 0) {
473 		return (status);
474 	}
475 
476 	mac_init_ops(&vsw_ops, DRV_NAME);
477 	status = mod_install(&modlinkage);
478 	if (status != 0) {
479 		ddi_soft_state_fini(&vsw_state);
480 	}
481 	return (status);
482 }
483 
484 int
485 _fini(void)
486 {
487 	int status;
488 
489 	status = mod_remove(&modlinkage);
490 	if (status != 0)
491 		return (status);
492 	mac_fini_ops(&vsw_ops);
493 	ddi_soft_state_fini(&vsw_state);
494 
495 	rw_destroy(&vsw_rw);
496 
497 	return (status);
498 }
499 
500 int
501 _info(struct modinfo *modinfop)
502 {
503 	return (mod_info(&modlinkage, modinfop));
504 }
505 
506 static int
507 vsw_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
508 {
509 	vsw_t			*vswp;
510 	int			instance;
511 	char			hashname[MAXNAMELEN];
512 	char			qname[TASKQ_NAMELEN];
513 	vsw_attach_progress_t	progress = PROG_init;
514 	int			rv;
515 
516 	switch (cmd) {
517 	case DDI_ATTACH:
518 		break;
519 	case DDI_RESUME:
520 		/* nothing to do for this non-device */
521 		return (DDI_SUCCESS);
522 	case DDI_PM_RESUME:
523 	default:
524 		return (DDI_FAILURE);
525 	}
526 
527 	instance = ddi_get_instance(dip);
528 	if (ddi_soft_state_zalloc(vsw_state, instance) != DDI_SUCCESS) {
529 		DERR(NULL, "vsw%d: ddi_soft_state_zalloc failed", instance);
530 		return (DDI_FAILURE);
531 	}
532 	vswp = ddi_get_soft_state(vsw_state, instance);
533 
534 	if (vswp == NULL) {
535 		DERR(NULL, "vsw%d: ddi_get_soft_state failed", instance);
536 		goto vsw_attach_fail;
537 	}
538 
539 	vswp->dip = dip;
540 	vswp->instance = instance;
541 	vswp->phys_link_state = LINK_STATE_UNKNOWN;
542 	ddi_set_driver_private(dip, (caddr_t)vswp);
543 
544 	mutex_init(&vswp->mac_lock, NULL, MUTEX_DRIVER, NULL);
545 	mutex_init(&vswp->mca_lock, NULL, MUTEX_DRIVER, NULL);
546 	mutex_init(&vswp->sw_thr_lock, NULL, MUTEX_DRIVER, NULL);
547 	cv_init(&vswp->sw_thr_cv, NULL, CV_DRIVER, NULL);
548 	rw_init(&vswp->maccl_rwlock, NULL, RW_DRIVER, NULL);
549 	rw_init(&vswp->if_lockrw, NULL, RW_DRIVER, NULL);
550 	rw_init(&vswp->mfdbrw, NULL, RW_DRIVER, NULL);
551 	rw_init(&vswp->plist.lockrw, NULL, RW_DRIVER, NULL);
552 
553 	progress |= PROG_locks;
554 
555 	rv = vsw_read_mdprops(vswp);
556 	if (rv != 0)
557 		goto vsw_attach_fail;
558 
559 	progress |= PROG_readmd;
560 
561 	/* setup the unicast forwarding database  */
562 	(void) snprintf(hashname, MAXNAMELEN, "vsw_unicst_table-%d",
563 	    vswp->instance);
564 	D2(vswp, "creating unicast hash table (%s)...", hashname);
565 	vswp->fdb_nchains = vsw_fdb_nchains;
566 	vswp->fdb_hashp = mod_hash_create_ptrhash(hashname, vswp->fdb_nchains,
567 	    mod_hash_null_valdtor, sizeof (void *));
568 	vsw_create_vlans((void *)vswp, VSW_LOCALDEV);
569 	progress |= PROG_fdb;
570 
571 	/* setup the multicast fowarding database */
572 	(void) snprintf(hashname, MAXNAMELEN, "vsw_mcst_table-%d",
573 	    vswp->instance);
574 	D2(vswp, "creating multicast hash table %s)...", hashname);
575 	vswp->mfdb = mod_hash_create_ptrhash(hashname, vsw_fdb_nchains,
576 	    mod_hash_null_valdtor, sizeof (void *));
577 
578 	progress |= PROG_mfdb;
579 
580 	/*
581 	 * Create the taskq which will process all the VIO
582 	 * control messages.
583 	 */
584 	(void) snprintf(qname, TASKQ_NAMELEN, "vsw_taskq%d", vswp->instance);
585 	if ((vswp->taskq_p = ddi_taskq_create(vswp->dip, qname, 1,
586 	    TASKQ_DEFAULTPRI, 0)) == NULL) {
587 		cmn_err(CE_WARN, "!vsw%d: Unable to create task queue",
588 		    vswp->instance);
589 		goto vsw_attach_fail;
590 	}
591 
592 	progress |= PROG_taskq;
593 
594 	(void) snprintf(qname, TASKQ_NAMELEN, "vsw_rxp_taskq%d",
595 	    vswp->instance);
596 	if ((vswp->rxp_taskq = ddi_taskq_create(vswp->dip, qname, 1,
597 	    TASKQ_DEFAULTPRI, 0)) == NULL) {
598 		cmn_err(CE_WARN, "!vsw%d: Unable to create rxp task queue",
599 		    vswp->instance);
600 		goto vsw_attach_fail;
601 	}
602 
603 	progress |= PROG_rxp_taskq;
604 
605 	/* prevent auto-detaching */
606 	if (ddi_prop_update_int(DDI_DEV_T_NONE, vswp->dip,
607 	    DDI_NO_AUTODETACH, 1) != DDI_SUCCESS) {
608 		cmn_err(CE_NOTE, "!Unable to set \"%s\" property for "
609 		    "instance %u", DDI_NO_AUTODETACH, instance);
610 	}
611 
612 	/*
613 	 * The null switching function is set to avoid panic until
614 	 * switch mode is setup.
615 	 */
616 	vswp->vsw_switch_frame = vsw_switch_frame_nop;
617 
618 	/*
619 	 * Setup the required switching mode, based on the mdprops that we read
620 	 * earlier. We start a thread to do this, to avoid calling mac_open()
621 	 * directly from attach().
622 	 */
623 	rv = vsw_setup_switching_start(vswp);
624 	if (rv != 0) {
625 		goto vsw_attach_fail;
626 	}
627 
628 	progress |= PROG_swmode;
629 
630 	/* Register with mac layer as a provider */
631 	rv = vsw_mac_register(vswp);
632 	if (rv != 0)
633 		goto vsw_attach_fail;
634 
635 	progress |= PROG_macreg;
636 
637 	/*
638 	 * Now we have everything setup, register an interest in
639 	 * specific MD nodes.
640 	 *
641 	 * The callback is invoked in 2 cases, firstly if upon mdeg
642 	 * registration there are existing nodes which match our specified
643 	 * criteria, and secondly if the MD is changed (and again, there
644 	 * are nodes which we are interested in present within it. Note
645 	 * that our callback will be invoked even if our specified nodes
646 	 * have not actually changed).
647 	 *
648 	 */
649 	rv = vsw_mdeg_register(vswp);
650 	if (rv != 0)
651 		goto vsw_attach_fail;
652 
653 	progress |= PROG_mdreg;
654 
655 	vswp->attach_progress = progress;
656 
657 	WRITE_ENTER(&vsw_rw);
658 	vswp->next = vsw_head;
659 	vsw_head = vswp;
660 	RW_EXIT(&vsw_rw);
661 
662 	ddi_report_dev(vswp->dip);
663 	return (DDI_SUCCESS);
664 
665 vsw_attach_fail:
666 	DERR(NULL, "vsw_attach: failed");
667 
668 	vswp->attach_progress = progress;
669 	(void) vsw_unattach(vswp);
670 	ddi_soft_state_free(vsw_state, instance);
671 	return (DDI_FAILURE);
672 }
673 
674 static int
675 vsw_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
676 {
677 	vsw_t			**vswpp, *vswp;
678 	int 			instance;
679 
680 	instance = ddi_get_instance(dip);
681 	vswp = ddi_get_soft_state(vsw_state, instance);
682 
683 	if (vswp == NULL) {
684 		return (DDI_FAILURE);
685 	}
686 
687 	switch (cmd) {
688 	case DDI_DETACH:
689 		break;
690 	case DDI_SUSPEND:
691 	case DDI_PM_SUSPEND:
692 	default:
693 		return (DDI_FAILURE);
694 	}
695 
696 	D2(vswp, "detaching instance %d", instance);
697 
698 	if (vsw_unattach(vswp) != 0) {
699 		return (DDI_FAILURE);
700 	}
701 
702 	ddi_remove_minor_node(dip, NULL);
703 
704 	WRITE_ENTER(&vsw_rw);
705 	for (vswpp = &vsw_head; *vswpp; vswpp = &(*vswpp)->next) {
706 		if (*vswpp == vswp) {
707 			*vswpp = vswp->next;
708 			break;
709 		}
710 	}
711 	RW_EXIT(&vsw_rw);
712 
713 	ddi_soft_state_free(vsw_state, instance);
714 
715 	return (DDI_SUCCESS);
716 }
717 
718 /*
719  * Common routine to handle vsw_attach() failure and vsw_detach(). Note that
720  * the only reason this function could fail is if mac_unregister() fails.
721  * Otherwise, this function must ensure that all resources are freed and return
722  * success.
723  */
724 static int
725 vsw_unattach(vsw_t *vswp)
726 {
727 	vsw_attach_progress_t	progress;
728 
729 	progress = vswp->attach_progress;
730 
731 	/*
732 	 * Unregister from the gldv3 subsystem. This can fail, in particular
733 	 * if there are still any open references to this mac device; in which
734 	 * case we just return failure without continuing to detach further.
735 	 */
736 	if (progress & PROG_macreg) {
737 		if (vsw_mac_unregister(vswp) != 0) {
738 			cmn_err(CE_WARN, "!vsw%d: Unable to detach from "
739 			    "MAC layer", vswp->instance);
740 			return (1);
741 		}
742 		progress &= ~PROG_macreg;
743 	}
744 
745 	/*
746 	 * Now that we have unregistered from gldv3, we must finish all other
747 	 * steps and successfully return from this function; otherwise we will
748 	 * end up leaving the device in a broken/unusable state.
749 	 *
750 	 * If we have registered with mdeg, unregister now to stop further
751 	 * callbacks to this vsw device and/or its ports. Then, detach any
752 	 * existing ports.
753 	 */
754 	if (progress & PROG_mdreg) {
755 		vsw_mdeg_unregister(vswp);
756 		vsw_detach_ports(vswp);
757 		progress &= ~PROG_mdreg;
758 	}
759 
760 	/*
761 	 * If we have started a thread to setup the switching mode, stop it, if
762 	 * it is still running. If it has finished setting up the switching
763 	 * mode, then we need to clean up some additional things if we are
764 	 * running in L2 mode: first free up any hybrid resources; then stop
765 	 * and close the underlying physical device. Note that we would have
766 	 * already released all per mac_client resources (ucast, mcast addrs,
767 	 * hio-shares etc) as all the ports are detached and if the vsw device
768 	 * itself was in use as an interface, it has been unplumbed (otherwise
769 	 * mac_unregister() above would fail).
770 	 */
771 	if (progress & PROG_swmode) {
772 
773 		vsw_setup_switching_stop(vswp);
774 
775 		if (vswp->hio_capable == B_TRUE) {
776 			vsw_hio_cleanup(vswp);
777 			vswp->hio_capable = B_FALSE;
778 		}
779 
780 		mutex_enter(&vswp->mac_lock);
781 		vsw_mac_close(vswp);
782 		mutex_exit(&vswp->mac_lock);
783 
784 		progress &= ~PROG_swmode;
785 	}
786 
787 	/*
788 	 * We now destroy the taskq used to clean up rx mblk pools that
789 	 * couldn't be destroyed when the ports/channels were detached.
790 	 * We implicitly wait for those tasks to complete in
791 	 * ddi_taskq_destroy().
792 	 */
793 	if (progress & PROG_rxp_taskq) {
794 		ddi_taskq_destroy(vswp->rxp_taskq);
795 		progress &= ~PROG_rxp_taskq;
796 	}
797 
798 	/*
799 	 * By now any pending tasks have finished and the underlying
800 	 * ldc's have been destroyed, so its safe to delete the control
801 	 * message taskq.
802 	 */
803 	if (progress & PROG_taskq) {
804 		ddi_taskq_destroy(vswp->taskq_p);
805 		progress &= ~PROG_taskq;
806 	}
807 
808 	/* Destroy the multicast hash table */
809 	if (progress & PROG_mfdb) {
810 		mod_hash_destroy_hash(vswp->mfdb);
811 		progress &= ~PROG_mfdb;
812 	}
813 
814 	/* Destroy the vlan hash table and fdb */
815 	if (progress & PROG_fdb) {
816 		vsw_destroy_vlans(vswp, VSW_LOCALDEV);
817 		mod_hash_destroy_hash(vswp->fdb_hashp);
818 		progress &= ~PROG_fdb;
819 	}
820 
821 	if (progress & PROG_readmd) {
822 		if (VSW_PRI_ETH_DEFINED(vswp)) {
823 			kmem_free(vswp->pri_types,
824 			    sizeof (uint16_t) * vswp->pri_num_types);
825 			(void) vio_destroy_mblks(vswp->pri_tx_vmp);
826 		}
827 		progress &= ~PROG_readmd;
828 	}
829 
830 	if (progress & PROG_locks) {
831 		rw_destroy(&vswp->plist.lockrw);
832 		rw_destroy(&vswp->mfdbrw);
833 		rw_destroy(&vswp->if_lockrw);
834 		rw_destroy(&vswp->maccl_rwlock);
835 		cv_destroy(&vswp->sw_thr_cv);
836 		mutex_destroy(&vswp->sw_thr_lock);
837 		mutex_destroy(&vswp->mca_lock);
838 		mutex_destroy(&vswp->mac_lock);
839 		progress &= ~PROG_locks;
840 	}
841 
842 	vswp->attach_progress = progress;
843 
844 	return (0);
845 }
846 
847 void
848 vsw_destroy_rxpools(void *arg)
849 {
850 	vio_mblk_pool_t	*poolp = (vio_mblk_pool_t *)arg;
851 	vio_mblk_pool_t	*npoolp;
852 
853 	while (poolp != NULL) {
854 		npoolp =  poolp->nextp;
855 		while (vio_destroy_mblks(poolp) != 0) {
856 			drv_usecwait(vsw_rxpool_cleanup_delay);
857 		}
858 		poolp = npoolp;
859 	}
860 }
861 
862 /*
863  * Get the value of the "vsw-phys-dev" property in the specified
864  * node. This property is the name of the physical device that
865  * the virtual switch will use to talk to the outside world.
866  *
867  * Note it is valid for this property to be NULL (but the property
868  * itself must exist). Callers of this routine should verify that
869  * the value returned is what they expected (i.e. either NULL or non NULL).
870  *
871  * On success returns value of the property in region pointed to by
872  * the 'name' argument, and with return value of 0. Otherwise returns 1.
873  */
874 static int
875 vsw_get_md_physname(vsw_t *vswp, md_t *mdp, mde_cookie_t node, char *name)
876 {
877 	int		len = 0;
878 	int		instance;
879 	char		*physname = NULL;
880 	char		*dev;
881 	const char	*dev_name;
882 	char		myname[MAXNAMELEN];
883 
884 	dev_name = ddi_driver_name(vswp->dip);
885 	instance = ddi_get_instance(vswp->dip);
886 	(void) snprintf(myname, MAXNAMELEN, "%s%d", dev_name, instance);
887 
888 	if (md_get_prop_data(mdp, node, physdev_propname,
889 	    (uint8_t **)(&physname), &len) != 0) {
890 		cmn_err(CE_WARN, "!vsw%d: Unable to get name(s) of physical "
891 		    "device(s) from MD", vswp->instance);
892 		return (1);
893 	} else if ((strlen(physname) + 1) > LIFNAMSIZ) {
894 		cmn_err(CE_WARN, "!vsw%d: %s is too long a device name",
895 		    vswp->instance, physname);
896 		return (1);
897 	} else if (strcmp(myname, physname) == 0) {
898 		/*
899 		 * Prevent the vswitch from opening itself as the
900 		 * network device.
901 		 */
902 		cmn_err(CE_WARN, "!vsw%d: %s is an invalid device name",
903 		    vswp->instance, physname);
904 		return (1);
905 	} else {
906 		(void) strncpy(name, physname, strlen(physname) + 1);
907 		D2(vswp, "%s: using first device specified (%s)",
908 		    __func__, physname);
909 	}
910 
911 #ifdef DEBUG
912 	/*
913 	 * As a temporary measure to aid testing we check to see if there
914 	 * is a vsw.conf file present. If there is we use the value of the
915 	 * vsw_physname property in the file as the name of the physical
916 	 * device, overriding the value from the MD.
917 	 *
918 	 * There may be multiple devices listed, but for the moment
919 	 * we just use the first one.
920 	 */
921 	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vswp->dip, 0,
922 	    "vsw_physname", &dev) == DDI_PROP_SUCCESS) {
923 		if ((strlen(dev) + 1) > LIFNAMSIZ) {
924 			cmn_err(CE_WARN, "vsw%d: %s is too long a device name",
925 			    vswp->instance, dev);
926 			ddi_prop_free(dev);
927 			return (1);
928 		} else {
929 			cmn_err(CE_NOTE, "vsw%d: Using device name (%s) from "
930 			    "config file", vswp->instance, dev);
931 
932 			(void) strncpy(name, dev, strlen(dev) + 1);
933 		}
934 
935 		ddi_prop_free(dev);
936 	}
937 #endif
938 
939 	return (0);
940 }
941 
942 /*
943  * Read the 'vsw-switch-mode' property from the specified MD node.
944  *
945  * Returns 0 on success, otherwise returns 1.
946  */
947 static int
948 vsw_get_md_smodes(vsw_t *vswp, md_t *mdp, mde_cookie_t node, uint8_t *mode)
949 {
950 	int		len = 0;
951 	char		*smode = NULL;
952 	char		*curr_mode = NULL;
953 
954 	D1(vswp, "%s: enter", __func__);
955 
956 	/*
957 	 * Get the switch-mode property. The modes are listed in
958 	 * decreasing order of preference, i.e. prefered mode is
959 	 * first item in list.
960 	 */
961 	len = 0;
962 	if (md_get_prop_data(mdp, node, smode_propname,
963 	    (uint8_t **)(&smode), &len) != 0) {
964 		/*
965 		 * Unable to get switch-mode property from MD, nothing
966 		 * more we can do.
967 		 */
968 		cmn_err(CE_WARN, "!vsw%d: Unable to get switch mode property"
969 		    " from the MD", vswp->instance);
970 		return (1);
971 	}
972 
973 	curr_mode = smode;
974 	/*
975 	 * Modes of operation:
976 	 * 'switched'	 - layer 2 switching, underlying HW in
977 	 *			programmed mode.
978 	 * 'promiscuous' - layer 2 switching, underlying HW in
979 	 *			promiscuous mode.
980 	 * 'routed'	 - layer 3 (i.e. IP) routing, underlying HW
981 	 *			in non-promiscuous mode.
982 	 */
983 	while (curr_mode < (smode + len)) {
984 		D2(vswp, "%s: curr_mode = [%s]", __func__, curr_mode);
985 		if (strcmp(curr_mode, "switched") == 0) {
986 			*mode = VSW_LAYER2;
987 		} else if (strcmp(curr_mode, "promiscuous") == 0) {
988 			*mode = VSW_LAYER2 | VSW_LAYER2_PROMISC;
989 		} else if (strcmp(curr_mode, "routed") == 0) {
990 			*mode = VSW_LAYER3;
991 		} else {
992 			cmn_err(CE_WARN, "!vsw%d: Unknown switch mode %s, "
993 			    "setting to default switched mode",
994 			    vswp->instance, curr_mode);
995 			*mode = VSW_LAYER2;
996 		}
997 		curr_mode += strlen(curr_mode) + 1;
998 	}
999 
1000 	D2(vswp, "%s: %d mode", __func__, *mode);
1001 
1002 	D1(vswp, "%s: exit", __func__);
1003 
1004 	return (0);
1005 }
1006 
1007 /*
1008  * Register with the MAC layer as a network device, so we
1009  * can be plumbed if necessary.
1010  */
1011 static int
1012 vsw_mac_register(vsw_t *vswp)
1013 {
1014 	mac_register_t	*macp;
1015 	int		rv;
1016 
1017 	D1(vswp, "%s: enter", __func__);
1018 
1019 	if ((macp = mac_alloc(MAC_VERSION)) == NULL)
1020 		return (EINVAL);
1021 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1022 	macp->m_driver = vswp;
1023 	macp->m_dip = vswp->dip;
1024 	macp->m_src_addr = (uint8_t *)&vswp->if_addr;
1025 	macp->m_callbacks = &vsw_m_callbacks;
1026 	macp->m_min_sdu = 0;
1027 	macp->m_max_sdu = vswp->mtu;
1028 	macp->m_margin = VLAN_TAGSZ;
1029 	rv = mac_register(macp, &vswp->if_mh);
1030 	mac_free(macp);
1031 	if (rv != 0) {
1032 		/*
1033 		 * Treat this as a non-fatal error as we may be
1034 		 * able to operate in some other mode.
1035 		 */
1036 		cmn_err(CE_NOTE, "!vsw%d: Unable to register as "
1037 		    "a provider with MAC layer", vswp->instance);
1038 		return (rv);
1039 	}
1040 
1041 	vswp->if_state |= VSW_IF_REG;
1042 
1043 	D1(vswp, "%s: exit", __func__);
1044 
1045 	return (rv);
1046 }
1047 
1048 static int
1049 vsw_mac_unregister(vsw_t *vswp)
1050 {
1051 	int		rv = 0;
1052 
1053 	D1(vswp, "%s: enter", __func__);
1054 
1055 	WRITE_ENTER(&vswp->if_lockrw);
1056 
1057 	if (vswp->if_state & VSW_IF_REG) {
1058 		rv = mac_unregister(vswp->if_mh);
1059 		if (rv != 0) {
1060 			DWARN(vswp, "%s: unable to unregister from MAC "
1061 			    "framework", __func__);
1062 
1063 			RW_EXIT(&vswp->if_lockrw);
1064 			D1(vswp, "%s: fail exit", __func__);
1065 			return (rv);
1066 		}
1067 
1068 		/* mark i/f as down and unregistered */
1069 		vswp->if_state &= ~(VSW_IF_UP | VSW_IF_REG);
1070 	}
1071 	RW_EXIT(&vswp->if_lockrw);
1072 
1073 	D1(vswp, "%s: exit", __func__);
1074 
1075 	return (rv);
1076 }
1077 
1078 static int
1079 vsw_m_stat(void *arg, uint_t stat, uint64_t *val)
1080 {
1081 	vsw_t			*vswp = (vsw_t *)arg;
1082 
1083 	D1(vswp, "%s: enter", __func__);
1084 
1085 	mutex_enter(&vswp->mac_lock);
1086 	if (vswp->mh == NULL) {
1087 		mutex_exit(&vswp->mac_lock);
1088 		return (EINVAL);
1089 	}
1090 
1091 	/* return stats from underlying device */
1092 	*val = mac_stat_get(vswp->mh, stat);
1093 
1094 	mutex_exit(&vswp->mac_lock);
1095 
1096 	return (0);
1097 }
1098 
1099 static void
1100 vsw_m_stop(void *arg)
1101 {
1102 	vsw_t	*vswp = (vsw_t *)arg;
1103 
1104 	D1(vswp, "%s: enter", __func__);
1105 
1106 	WRITE_ENTER(&vswp->if_lockrw);
1107 	vswp->if_state &= ~VSW_IF_UP;
1108 	RW_EXIT(&vswp->if_lockrw);
1109 
1110 	/* Cleanup and close the mac client */
1111 	vsw_mac_client_cleanup(vswp, NULL, VSW_LOCALDEV);
1112 
1113 	D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state);
1114 }
1115 
1116 static int
1117 vsw_m_start(void *arg)
1118 {
1119 	int		rv;
1120 	vsw_t		*vswp = (vsw_t *)arg;
1121 
1122 	D1(vswp, "%s: enter", __func__);
1123 
1124 	WRITE_ENTER(&vswp->if_lockrw);
1125 
1126 	vswp->if_state |= VSW_IF_UP;
1127 
1128 	if (vswp->switching_setup_done == B_FALSE) {
1129 		/*
1130 		 * If the switching mode has not been setup yet, just
1131 		 * return. The unicast address will be programmed
1132 		 * after the physical device is successfully setup by the
1133 		 * timeout handler.
1134 		 */
1135 		RW_EXIT(&vswp->if_lockrw);
1136 		return (0);
1137 	}
1138 
1139 	/* if in layer2 mode, program unicast address. */
1140 	if (vswp->mh != NULL) {
1141 		/* Init a mac client and program addresses */
1142 		rv = vsw_mac_client_init(vswp, NULL, VSW_LOCALDEV);
1143 		if (rv != 0) {
1144 			cmn_err(CE_NOTE,
1145 			    "!vsw%d: failed to program interface "
1146 			    "unicast address\n", vswp->instance);
1147 		}
1148 	}
1149 
1150 	RW_EXIT(&vswp->if_lockrw);
1151 
1152 	D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state);
1153 	return (0);
1154 }
1155 
1156 /*
1157  * Change the local interface address.
1158  *
1159  * Note: we don't support this entry point. The local
1160  * mac address of the switch can only be changed via its
1161  * MD node properties.
1162  */
1163 static int
1164 vsw_m_unicst(void *arg, const uint8_t *macaddr)
1165 {
1166 	_NOTE(ARGUNUSED(arg, macaddr))
1167 
1168 	return (DDI_FAILURE);
1169 }
1170 
1171 static int
1172 vsw_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
1173 {
1174 	vsw_t		*vswp = (vsw_t *)arg;
1175 	mcst_addr_t	*mcst_p = NULL;
1176 	uint64_t	addr = 0x0;
1177 	int		i, ret = 0;
1178 
1179 	D1(vswp, "%s: enter", __func__);
1180 
1181 	/*
1182 	 * Convert address into form that can be used
1183 	 * as hash table key.
1184 	 */
1185 	for (i = 0; i < ETHERADDRL; i++) {
1186 		addr = (addr << 8) | mca[i];
1187 	}
1188 
1189 	D2(vswp, "%s: addr = 0x%llx", __func__, addr);
1190 
1191 	if (add) {
1192 		D2(vswp, "%s: adding multicast", __func__);
1193 		if (vsw_add_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) {
1194 			/*
1195 			 * Update the list of multicast addresses
1196 			 * contained within the vsw_t structure to
1197 			 * include this new one.
1198 			 */
1199 			mcst_p = kmem_zalloc(sizeof (mcst_addr_t), KM_NOSLEEP);
1200 			if (mcst_p == NULL) {
1201 				DERR(vswp, "%s unable to alloc mem", __func__);
1202 				(void) vsw_del_mcst(vswp,
1203 				    VSW_LOCALDEV, addr, NULL);
1204 				return (1);
1205 			}
1206 			mcst_p->addr = addr;
1207 			ether_copy(mca, &mcst_p->mca);
1208 
1209 			/*
1210 			 * Call into the underlying driver to program the
1211 			 * address into HW.
1212 			 */
1213 			ret = vsw_mac_multicast_add(vswp, NULL, mcst_p,
1214 			    VSW_LOCALDEV);
1215 			if (ret != 0) {
1216 				(void) vsw_del_mcst(vswp,
1217 				    VSW_LOCALDEV, addr, NULL);
1218 				kmem_free(mcst_p, sizeof (*mcst_p));
1219 				return (ret);
1220 			}
1221 
1222 			mutex_enter(&vswp->mca_lock);
1223 			mcst_p->nextp = vswp->mcap;
1224 			vswp->mcap = mcst_p;
1225 			mutex_exit(&vswp->mca_lock);
1226 		} else {
1227 			cmn_err(CE_WARN, "!vsw%d: unable to add multicast "
1228 			    "address", vswp->instance);
1229 		}
1230 		return (ret);
1231 	}
1232 
1233 	D2(vswp, "%s: removing multicast", __func__);
1234 	/*
1235 	 * Remove the address from the hash table..
1236 	 */
1237 	if (vsw_del_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) {
1238 
1239 		/*
1240 		 * ..and then from the list maintained in the
1241 		 * vsw_t structure.
1242 		 */
1243 		mcst_p = vsw_del_addr(VSW_LOCALDEV, vswp, addr);
1244 		ASSERT(mcst_p != NULL);
1245 
1246 		vsw_mac_multicast_remove(vswp, NULL, mcst_p, VSW_LOCALDEV);
1247 		kmem_free(mcst_p, sizeof (*mcst_p));
1248 	}
1249 
1250 	D1(vswp, "%s: exit", __func__);
1251 
1252 	return (0);
1253 }
1254 
1255 static int
1256 vsw_m_promisc(void *arg, boolean_t on)
1257 {
1258 	vsw_t		*vswp = (vsw_t *)arg;
1259 
1260 	D1(vswp, "%s: enter", __func__);
1261 
1262 	WRITE_ENTER(&vswp->if_lockrw);
1263 	if (on)
1264 		vswp->if_state |= VSW_IF_PROMISC;
1265 	else
1266 		vswp->if_state &= ~VSW_IF_PROMISC;
1267 	RW_EXIT(&vswp->if_lockrw);
1268 
1269 	D1(vswp, "%s: exit", __func__);
1270 
1271 	return (0);
1272 }
1273 
1274 static mblk_t *
1275 vsw_m_tx(void *arg, mblk_t *mp)
1276 {
1277 	vsw_t		*vswp = (vsw_t *)arg;
1278 
1279 	D1(vswp, "%s: enter", __func__);
1280 
1281 	mp = vsw_vlan_frame_pretag(vswp, VSW_LOCALDEV, mp);
1282 
1283 	if (mp == NULL) {
1284 		return (NULL);
1285 	}
1286 
1287 	vswp->vsw_switch_frame(vswp, mp, VSW_LOCALDEV, NULL, NULL);
1288 
1289 	D1(vswp, "%s: exit", __func__);
1290 
1291 	return (NULL);
1292 }
1293 
1294 /*
1295  * Register for machine description (MD) updates.
1296  *
1297  * Returns 0 on success, 1 on failure.
1298  */
1299 static int
1300 vsw_mdeg_register(vsw_t *vswp)
1301 {
1302 	mdeg_prop_spec_t	*pspecp;
1303 	mdeg_node_spec_t	*inst_specp;
1304 	mdeg_handle_t		mdeg_hdl, mdeg_port_hdl;
1305 	size_t			templatesz;
1306 	int			rv;
1307 
1308 	D1(vswp, "%s: enter", __func__);
1309 
1310 	/*
1311 	 * Allocate and initialize a per-instance copy
1312 	 * of the global property spec array that will
1313 	 * uniquely identify this vsw instance.
1314 	 */
1315 	templatesz = sizeof (vsw_prop_template);
1316 	pspecp = kmem_zalloc(templatesz, KM_SLEEP);
1317 
1318 	bcopy(vsw_prop_template, pspecp, templatesz);
1319 
1320 	VSW_SET_MDEG_PROP_INST(pspecp, vswp->regprop);
1321 
1322 	/* initialize the complete prop spec structure */
1323 	inst_specp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP);
1324 	inst_specp->namep = "virtual-device";
1325 	inst_specp->specp = pspecp;
1326 
1327 	D2(vswp, "%s: instance %d registering with mdeg", __func__,
1328 	    vswp->regprop);
1329 	/*
1330 	 * Register an interest in 'virtual-device' nodes with a
1331 	 * 'name' property of 'virtual-network-switch'
1332 	 */
1333 	rv = mdeg_register(inst_specp, &vdev_match, vsw_mdeg_cb,
1334 	    (void *)vswp, &mdeg_hdl);
1335 	if (rv != MDEG_SUCCESS) {
1336 		DERR(vswp, "%s: mdeg_register failed (%d) for vsw node",
1337 		    __func__, rv);
1338 		goto mdeg_reg_fail;
1339 	}
1340 
1341 	/*
1342 	 * Register an interest in 'vsw-port' nodes.
1343 	 */
1344 	rv = mdeg_register(inst_specp, &vport_match, vsw_port_mdeg_cb,
1345 	    (void *)vswp, &mdeg_port_hdl);
1346 	if (rv != MDEG_SUCCESS) {
1347 		DERR(vswp, "%s: mdeg_register failed (%d)\n", __func__, rv);
1348 		(void) mdeg_unregister(mdeg_hdl);
1349 		goto mdeg_reg_fail;
1350 	}
1351 
1352 	/* save off data that will be needed later */
1353 	vswp->inst_spec = inst_specp;
1354 	vswp->mdeg_hdl = mdeg_hdl;
1355 	vswp->mdeg_port_hdl = mdeg_port_hdl;
1356 
1357 	D1(vswp, "%s: exit", __func__);
1358 	return (0);
1359 
1360 mdeg_reg_fail:
1361 	cmn_err(CE_WARN, "!vsw%d: Unable to register MDEG callbacks",
1362 	    vswp->instance);
1363 	kmem_free(pspecp, templatesz);
1364 	kmem_free(inst_specp, sizeof (mdeg_node_spec_t));
1365 
1366 	vswp->mdeg_hdl = NULL;
1367 	vswp->mdeg_port_hdl = NULL;
1368 
1369 	return (1);
1370 }
1371 
1372 static void
1373 vsw_mdeg_unregister(vsw_t *vswp)
1374 {
1375 	D1(vswp, "vsw_mdeg_unregister: enter");
1376 
1377 	if (vswp->mdeg_hdl != NULL)
1378 		(void) mdeg_unregister(vswp->mdeg_hdl);
1379 
1380 	if (vswp->mdeg_port_hdl != NULL)
1381 		(void) mdeg_unregister(vswp->mdeg_port_hdl);
1382 
1383 	if (vswp->inst_spec != NULL) {
1384 		if (vswp->inst_spec->specp != NULL) {
1385 			(void) kmem_free(vswp->inst_spec->specp,
1386 			    sizeof (vsw_prop_template));
1387 			vswp->inst_spec->specp = NULL;
1388 		}
1389 
1390 		(void) kmem_free(vswp->inst_spec, sizeof (mdeg_node_spec_t));
1391 		vswp->inst_spec = NULL;
1392 	}
1393 
1394 	D1(vswp, "vsw_mdeg_unregister: exit");
1395 }
1396 
1397 /*
1398  * Mdeg callback invoked for the vsw node itself.
1399  */
1400 static int
1401 vsw_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1402 {
1403 	vsw_t		*vswp;
1404 	md_t		*mdp;
1405 	mde_cookie_t	node;
1406 	uint64_t	inst;
1407 	char		*node_name = NULL;
1408 
1409 	if (resp == NULL)
1410 		return (MDEG_FAILURE);
1411 
1412 	vswp = (vsw_t *)cb_argp;
1413 
1414 	D1(vswp, "%s: added %d : removed %d : curr matched %d"
1415 	    " : prev matched %d", __func__, resp->added.nelem,
1416 	    resp->removed.nelem, resp->match_curr.nelem,
1417 	    resp->match_prev.nelem);
1418 
1419 	/*
1420 	 * We get an initial callback for this node as 'added'
1421 	 * after registering with mdeg. Note that we would have
1422 	 * already gathered information about this vsw node by
1423 	 * walking MD earlier during attach (in vsw_read_mdprops()).
1424 	 * So, there is a window where the properties of this
1425 	 * node might have changed when we get this initial 'added'
1426 	 * callback. We handle this as if an update occured
1427 	 * and invoke the same function which handles updates to
1428 	 * the properties of this vsw-node if any.
1429 	 *
1430 	 * A non-zero 'match' value indicates that the MD has been
1431 	 * updated and that a virtual-network-switch node is
1432 	 * present which may or may not have been updated. It is
1433 	 * up to the clients to examine their own nodes and
1434 	 * determine if they have changed.
1435 	 */
1436 	if (resp->added.nelem != 0) {
1437 
1438 		if (resp->added.nelem != 1) {
1439 			cmn_err(CE_NOTE, "!vsw%d: number of nodes added "
1440 			    "invalid: %d\n", vswp->instance, resp->added.nelem);
1441 			return (MDEG_FAILURE);
1442 		}
1443 
1444 		mdp = resp->added.mdp;
1445 		node = resp->added.mdep[0];
1446 
1447 	} else if (resp->match_curr.nelem != 0) {
1448 
1449 		if (resp->match_curr.nelem != 1) {
1450 			cmn_err(CE_NOTE, "!vsw%d: number of nodes updated "
1451 			    "invalid: %d\n", vswp->instance,
1452 			    resp->match_curr.nelem);
1453 			return (MDEG_FAILURE);
1454 		}
1455 
1456 		mdp = resp->match_curr.mdp;
1457 		node = resp->match_curr.mdep[0];
1458 
1459 	} else {
1460 		return (MDEG_FAILURE);
1461 	}
1462 
1463 	/* Validate name and instance */
1464 	if (md_get_prop_str(mdp, node, "name", &node_name) != 0) {
1465 		DERR(vswp, "%s: unable to get node name\n",  __func__);
1466 		return (MDEG_FAILURE);
1467 	}
1468 
1469 	/* is this a virtual-network-switch? */
1470 	if (strcmp(node_name, vsw_propname) != 0) {
1471 		DERR(vswp, "%s: Invalid node name: %s\n",
1472 		    __func__, node_name);
1473 		return (MDEG_FAILURE);
1474 	}
1475 
1476 	if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) {
1477 		DERR(vswp, "%s: prop(cfg-handle) not found\n",
1478 		    __func__);
1479 		return (MDEG_FAILURE);
1480 	}
1481 
1482 	/* is this the right instance of vsw? */
1483 	if (inst != vswp->regprop) {
1484 		DERR(vswp, "%s: Invalid cfg-handle: %lx\n",
1485 		    __func__, inst);
1486 		return (MDEG_FAILURE);
1487 	}
1488 
1489 	vsw_update_md_prop(vswp, mdp, node);
1490 
1491 	return (MDEG_SUCCESS);
1492 }
1493 
1494 /*
1495  * Mdeg callback invoked for changes to the vsw-port nodes
1496  * under the vsw node.
1497  */
1498 static int
1499 vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1500 {
1501 	vsw_t		*vswp;
1502 	int		idx;
1503 	md_t		*mdp;
1504 	mde_cookie_t	node;
1505 	uint64_t	inst;
1506 	int		rv;
1507 
1508 	if ((resp == NULL) || (cb_argp == NULL))
1509 		return (MDEG_FAILURE);
1510 
1511 	vswp = (vsw_t *)cb_argp;
1512 
1513 	D2(vswp, "%s: added %d : removed %d : curr matched %d"
1514 	    " : prev matched %d", __func__, resp->added.nelem,
1515 	    resp->removed.nelem, resp->match_curr.nelem,
1516 	    resp->match_prev.nelem);
1517 
1518 	/* process added ports */
1519 	for (idx = 0; idx < resp->added.nelem; idx++) {
1520 		mdp = resp->added.mdp;
1521 		node = resp->added.mdep[idx];
1522 
1523 		D2(vswp, "%s: adding node(%d) 0x%lx", __func__, idx, node);
1524 
1525 		if ((rv = vsw_port_add(vswp, mdp, &node)) != 0) {
1526 			cmn_err(CE_WARN, "!vsw%d: Unable to add new port "
1527 			    "(0x%lx), err=%d", vswp->instance, node, rv);
1528 		}
1529 	}
1530 
1531 	/* process removed ports */
1532 	for (idx = 0; idx < resp->removed.nelem; idx++) {
1533 		mdp = resp->removed.mdp;
1534 		node = resp->removed.mdep[idx];
1535 
1536 		if (md_get_prop_val(mdp, node, id_propname, &inst)) {
1537 			DERR(vswp, "%s: prop(%s) not found in port(%d)",
1538 			    __func__, id_propname, idx);
1539 			continue;
1540 		}
1541 
1542 		D2(vswp, "%s: removing node(%d) 0x%lx", __func__, idx, node);
1543 
1544 		if (vsw_port_detach(vswp, inst) != 0) {
1545 			cmn_err(CE_WARN, "!vsw%d: Unable to remove port %ld",
1546 			    vswp->instance, inst);
1547 		}
1548 	}
1549 
1550 	for (idx = 0; idx < resp->match_curr.nelem; idx++) {
1551 		(void) vsw_port_update(vswp, resp->match_curr.mdp,
1552 		    resp->match_curr.mdep[idx],
1553 		    resp->match_prev.mdp,
1554 		    resp->match_prev.mdep[idx]);
1555 	}
1556 
1557 	D1(vswp, "%s: exit", __func__);
1558 
1559 	return (MDEG_SUCCESS);
1560 }
1561 
1562 /*
1563  * Scan the machine description for this instance of vsw
1564  * and read its properties. Called only from vsw_attach().
1565  * Returns: 0 on success, 1 on failure.
1566  */
1567 static int
1568 vsw_read_mdprops(vsw_t *vswp)
1569 {
1570 	md_t		*mdp = NULL;
1571 	mde_cookie_t	rootnode;
1572 	mde_cookie_t	*listp = NULL;
1573 	uint64_t	inst;
1574 	uint64_t	cfgh;
1575 	char		*name;
1576 	int		rv = 1;
1577 	int		num_nodes = 0;
1578 	int		num_devs = 0;
1579 	int		listsz = 0;
1580 	int		i;
1581 
1582 	/*
1583 	 * In each 'virtual-device' node in the MD there is a
1584 	 * 'cfg-handle' property which is the MD's concept of
1585 	 * an instance number (this may be completely different from
1586 	 * the device drivers instance #). OBP reads that value and
1587 	 * stores it in the 'reg' property of the appropriate node in
1588 	 * the device tree. We first read this reg property and use this
1589 	 * to compare against the 'cfg-handle' property of vsw nodes
1590 	 * in MD to get to this specific vsw instance and then read
1591 	 * other properties that we are interested in.
1592 	 * We also cache the value of 'reg' property and use it later
1593 	 * to register callbacks with mdeg (see vsw_mdeg_register())
1594 	 */
1595 	inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip,
1596 	    DDI_PROP_DONTPASS, reg_propname, -1);
1597 	if (inst == -1) {
1598 		cmn_err(CE_NOTE, "!vsw%d: Unable to read %s property from "
1599 		    "OBP device tree", vswp->instance, reg_propname);
1600 		return (rv);
1601 	}
1602 
1603 	vswp->regprop = inst;
1604 
1605 	if ((mdp = md_get_handle()) == NULL) {
1606 		DWARN(vswp, "%s: cannot init MD\n", __func__);
1607 		return (rv);
1608 	}
1609 
1610 	num_nodes = md_node_count(mdp);
1611 	ASSERT(num_nodes > 0);
1612 
1613 	listsz = num_nodes * sizeof (mde_cookie_t);
1614 	listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP);
1615 
1616 	rootnode = md_root_node(mdp);
1617 
1618 	/* search for all "virtual_device" nodes */
1619 	num_devs = md_scan_dag(mdp, rootnode,
1620 	    md_find_name(mdp, vdev_propname),
1621 	    md_find_name(mdp, "fwd"), listp);
1622 	if (num_devs <= 0) {
1623 		DWARN(vswp, "%s: invalid num_devs:%d\n", __func__, num_devs);
1624 		goto vsw_readmd_exit;
1625 	}
1626 
1627 	/*
1628 	 * Now loop through the list of virtual-devices looking for
1629 	 * devices with name "virtual-network-switch" and for each
1630 	 * such device compare its instance with what we have from
1631 	 * the 'reg' property to find the right node in MD and then
1632 	 * read all its properties.
1633 	 */
1634 	for (i = 0; i < num_devs; i++) {
1635 
1636 		if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) {
1637 			DWARN(vswp, "%s: name property not found\n",
1638 			    __func__);
1639 			goto vsw_readmd_exit;
1640 		}
1641 
1642 		/* is this a virtual-network-switch? */
1643 		if (strcmp(name, vsw_propname) != 0)
1644 			continue;
1645 
1646 		if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) {
1647 			DWARN(vswp, "%s: cfg-handle property not found\n",
1648 			    __func__);
1649 			goto vsw_readmd_exit;
1650 		}
1651 
1652 		/* is this the required instance of vsw? */
1653 		if (inst != cfgh)
1654 			continue;
1655 
1656 		/* now read all properties of this vsw instance */
1657 		rv = vsw_get_initial_md_properties(vswp, mdp, listp[i]);
1658 		break;
1659 	}
1660 
1661 vsw_readmd_exit:
1662 
1663 	kmem_free(listp, listsz);
1664 	(void) md_fini_handle(mdp);
1665 	return (rv);
1666 }
1667 
1668 /*
1669  * Read the initial start-of-day values from the specified MD node.
1670  */
1671 static int
1672 vsw_get_initial_md_properties(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
1673 {
1674 	uint64_t	macaddr = 0;
1675 
1676 	D1(vswp, "%s: enter", __func__);
1677 
1678 	if (vsw_get_md_physname(vswp, mdp, node, vswp->physname) != 0) {
1679 		return (1);
1680 	}
1681 
1682 	/* mac address for vswitch device itself */
1683 	if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) {
1684 		cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD",
1685 		    vswp->instance);
1686 		return (1);
1687 	}
1688 
1689 	vsw_save_lmacaddr(vswp, macaddr);
1690 
1691 	if (vsw_get_md_smodes(vswp, mdp, node, &vswp->smode)) {
1692 		DWARN(vswp, "%s: Unable to read %s property from MD, "
1693 		    "defaulting to 'switched' mode",
1694 		    __func__, smode_propname);
1695 
1696 		vswp->smode = VSW_LAYER2;
1697 	}
1698 
1699 	/*
1700 	 * Read the 'linkprop' property to know if this
1701 	 * vsw device wants to get physical link updates.
1702 	 */
1703 	vsw_linkprop_read(vswp, mdp, node, &vswp->pls_update);
1704 
1705 	/* read mtu */
1706 	vsw_mtu_read(vswp, mdp, node, &vswp->mtu);
1707 	if (vswp->mtu < ETHERMTU || vswp->mtu > VNET_MAX_MTU) {
1708 		vswp->mtu = ETHERMTU;
1709 	}
1710 	vswp->max_frame_size = vswp->mtu + sizeof (struct ether_header) +
1711 	    VLAN_TAGSZ;
1712 
1713 	/* read vlan id properties of this vsw instance */
1714 	vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &vswp->pvid,
1715 	    &vswp->vids, &vswp->nvids, &vswp->default_vlan_id);
1716 
1717 	/* read priority-ether-types */
1718 	vsw_read_pri_eth_types(vswp, mdp, node);
1719 
1720 	D1(vswp, "%s: exit", __func__);
1721 	return (0);
1722 }
1723 
1724 /*
1725  * Read vlan id properties of the given MD node.
1726  * Arguments:
1727  *   arg:          device argument(vsw device or a port)
1728  *   type:         type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port)
1729  *   mdp:          machine description
1730  *   node:         md node cookie
1731  *
1732  * Returns:
1733  *   pvidp:        port-vlan-id of the node
1734  *   vidspp:       list of vlan-ids of the node
1735  *   nvidsp:       # of vlan-ids in the list
1736  *   default_idp:  default-vlan-id of the node(if node is vsw device)
1737  */
1738 static void
1739 vsw_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node,
1740 	uint16_t *pvidp, vsw_vlanid_t **vidspp, uint16_t *nvidsp,
1741 	uint16_t *default_idp)
1742 {
1743 	vsw_t		*vswp;
1744 	vsw_port_t	*portp;
1745 	char		*pvid_propname;
1746 	char		*vid_propname;
1747 	uint_t		nvids = 0;
1748 	uint32_t	vids_size;
1749 	int		rv;
1750 	int		i;
1751 	uint64_t	*data;
1752 	uint64_t	val;
1753 	int		size;
1754 	int		inst;
1755 
1756 	if (type == VSW_LOCALDEV) {
1757 
1758 		vswp = (vsw_t *)arg;
1759 		pvid_propname = vsw_pvid_propname;
1760 		vid_propname = vsw_vid_propname;
1761 		inst = vswp->instance;
1762 
1763 	} else if (type == VSW_VNETPORT) {
1764 
1765 		portp = (vsw_port_t *)arg;
1766 		vswp = portp->p_vswp;
1767 		pvid_propname = port_pvid_propname;
1768 		vid_propname = port_vid_propname;
1769 		inst = portp->p_instance;
1770 
1771 	} else {
1772 		return;
1773 	}
1774 
1775 	if (type == VSW_LOCALDEV && default_idp != NULL) {
1776 		rv = md_get_prop_val(mdp, node, vsw_dvid_propname, &val);
1777 		if (rv != 0) {
1778 			DWARN(vswp, "%s: prop(%s) not found", __func__,
1779 			    vsw_dvid_propname);
1780 
1781 			*default_idp = vsw_default_vlan_id;
1782 		} else {
1783 			*default_idp = val & 0xFFF;
1784 			D2(vswp, "%s: %s(%d): (%d)\n", __func__,
1785 			    vsw_dvid_propname, inst, *default_idp);
1786 		}
1787 	}
1788 
1789 	rv = md_get_prop_val(mdp, node, pvid_propname, &val);
1790 	if (rv != 0) {
1791 		DWARN(vswp, "%s: prop(%s) not found", __func__, pvid_propname);
1792 		*pvidp = vsw_default_vlan_id;
1793 	} else {
1794 
1795 		*pvidp = val & 0xFFF;
1796 		D2(vswp, "%s: %s(%d): (%d)\n", __func__,
1797 		    pvid_propname, inst, *pvidp);
1798 	}
1799 
1800 	rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data,
1801 	    &size);
1802 	if (rv != 0) {
1803 		D2(vswp, "%s: prop(%s) not found", __func__, vid_propname);
1804 		size = 0;
1805 	} else {
1806 		size /= sizeof (uint64_t);
1807 	}
1808 	nvids = size;
1809 
1810 	if (nvids != 0) {
1811 		D2(vswp, "%s: %s(%d): ", __func__, vid_propname, inst);
1812 		vids_size = sizeof (vsw_vlanid_t) * nvids;
1813 		*vidspp = kmem_zalloc(vids_size, KM_SLEEP);
1814 		for (i = 0; i < nvids; i++) {
1815 			(*vidspp)[i].vl_vid = data[i] & 0xFFFF;
1816 			(*vidspp)[i].vl_set = B_FALSE;
1817 			D2(vswp, " %d ", (*vidspp)[i].vl_vid);
1818 		}
1819 		D2(vswp, "\n");
1820 	}
1821 
1822 	*nvidsp = nvids;
1823 }
1824 
1825 /*
1826  * This function reads "priority-ether-types" property from md. This property
1827  * is used to enable support for priority frames. Applications which need
1828  * guaranteed and timely delivery of certain high priority frames to/from
1829  * a vnet or vsw within ldoms, should configure this property by providing
1830  * the ether type(s) for which the priority facility is needed.
1831  * Normal data frames are delivered over a ldc channel using the descriptor
1832  * ring mechanism which is constrained by factors such as descriptor ring size,
1833  * the rate at which the ring is processed at the peer ldc end point, etc.
1834  * The priority mechanism provides an Out-Of-Band path to send/receive frames
1835  * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the
1836  * descriptor ring path and enables a more reliable and timely delivery of
1837  * frames to the peer.
1838  */
1839 static void
1840 vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
1841 {
1842 	int		rv;
1843 	uint16_t	*types;
1844 	uint64_t	*data;
1845 	int		size;
1846 	int		i;
1847 	size_t		mblk_sz;
1848 
1849 	rv = md_get_prop_data(mdp, node, pri_types_propname,
1850 	    (uint8_t **)&data, &size);
1851 	if (rv != 0) {
1852 		/*
1853 		 * Property may not exist if we are running pre-ldoms1.1 f/w.
1854 		 * Check if 'vsw_pri_eth_type' has been set in that case.
1855 		 */
1856 		if (vsw_pri_eth_type != 0) {
1857 			size = sizeof (vsw_pri_eth_type);
1858 			data = &vsw_pri_eth_type;
1859 		} else {
1860 			D3(vswp, "%s: prop(%s) not found", __func__,
1861 			    pri_types_propname);
1862 			size = 0;
1863 		}
1864 	}
1865 
1866 	if (size == 0) {
1867 		vswp->pri_num_types = 0;
1868 		return;
1869 	}
1870 
1871 	/*
1872 	 * we have some priority-ether-types defined;
1873 	 * allocate a table of these types and also
1874 	 * allocate a pool of mblks to transmit these
1875 	 * priority packets.
1876 	 */
1877 	size /= sizeof (uint64_t);
1878 	vswp->pri_num_types = size;
1879 	vswp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP);
1880 	for (i = 0, types = vswp->pri_types; i < size; i++) {
1881 		types[i] = data[i] & 0xFFFF;
1882 	}
1883 	mblk_sz = (VIO_PKT_DATA_HDRSIZE + ETHERMAX + 7) & ~7;
1884 	(void) vio_create_mblks(vsw_pri_tx_nmblks, mblk_sz, &vswp->pri_tx_vmp);
1885 }
1886 
1887 static void
1888 vsw_mtu_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node, uint32_t *mtu)
1889 {
1890 	int		rv;
1891 	int		inst;
1892 	uint64_t	val;
1893 	char		*mtu_propname;
1894 
1895 	mtu_propname = vsw_mtu_propname;
1896 	inst = vswp->instance;
1897 
1898 	rv = md_get_prop_val(mdp, node, mtu_propname, &val);
1899 	if (rv != 0) {
1900 		D3(vswp, "%s: prop(%s) not found", __func__, mtu_propname);
1901 		*mtu = vsw_ethermtu;
1902 	} else {
1903 
1904 		*mtu = val & 0xFFFF;
1905 		D2(vswp, "%s: %s(%d): (%d)\n", __func__,
1906 		    mtu_propname, inst, *mtu);
1907 	}
1908 }
1909 
1910 /*
1911  * Update the mtu of the vsw device. We first check if the device has been
1912  * plumbed and if so fail the mtu update. Otherwise, we continue to update the
1913  * new mtu and reset all ports to initiate handshake re-negotiation with peers
1914  * using the new mtu.
1915  */
1916 static int
1917 vsw_mtu_update(vsw_t *vswp, uint32_t mtu)
1918 {
1919 	int	rv;
1920 
1921 	WRITE_ENTER(&vswp->if_lockrw);
1922 
1923 	if (vswp->if_state & VSW_IF_UP) {
1924 
1925 		RW_EXIT(&vswp->if_lockrw);
1926 
1927 		cmn_err(CE_NOTE, "!vsw%d: Unable to process mtu update"
1928 		    " as the device is plumbed\n", vswp->instance);
1929 		return (EBUSY);
1930 
1931 	} else {
1932 
1933 		D2(vswp, "%s: curr_mtu(%d) new_mtu(%d)\n",
1934 		    __func__, vswp->mtu, mtu);
1935 
1936 		vswp->mtu = mtu;
1937 		vswp->max_frame_size = vswp->mtu +
1938 		    sizeof (struct ether_header) + VLAN_TAGSZ;
1939 
1940 		rv = mac_maxsdu_update(vswp->if_mh, mtu);
1941 		if (rv != 0) {
1942 			cmn_err(CE_NOTE,
1943 			    "!vsw%d: Unable to update mtu with mac"
1944 			    " layer\n", vswp->instance);
1945 		}
1946 
1947 		RW_EXIT(&vswp->if_lockrw);
1948 
1949 		/* Reset ports to renegotiate with the new mtu */
1950 		vsw_reset_ports(vswp);
1951 
1952 	}
1953 
1954 	return (0);
1955 }
1956 
1957 static void
1958 vsw_linkprop_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node,
1959 	boolean_t *pls)
1960 {
1961 	int		rv;
1962 	uint64_t	val;
1963 	char		*linkpropname;
1964 
1965 	linkpropname = vsw_linkprop_propname;
1966 
1967 	rv = md_get_prop_val(mdp, node, linkpropname, &val);
1968 	if (rv != 0) {
1969 		D3(vswp, "%s: prop(%s) not found", __func__, linkpropname);
1970 		*pls = B_FALSE;
1971 	} else {
1972 
1973 		*pls = (val & 0x1) ? B_TRUE : B_FALSE;
1974 		D2(vswp, "%s: %s(%d): (%d)\n", __func__, linkpropname,
1975 		    vswp->instance, *pls);
1976 	}
1977 }
1978 
1979 void
1980 vsw_mac_link_update(vsw_t *vswp, link_state_t link_state)
1981 {
1982 	READ_ENTER(&vswp->if_lockrw);
1983 
1984 	if (vswp->if_state & VSW_IF_REG) {
1985 		mac_link_update(vswp->if_mh, link_state);
1986 	}
1987 
1988 	RW_EXIT(&vswp->if_lockrw);
1989 }
1990 
1991 void
1992 vsw_physlink_state_update(vsw_t *vswp)
1993 {
1994 	if (vswp->pls_update == B_TRUE) {
1995 		vsw_mac_link_update(vswp, vswp->phys_link_state);
1996 	}
1997 	vsw_physlink_update_ports(vswp);
1998 }
1999 
2000 /*
2001  * Check to see if the relevant properties in the specified node have
2002  * changed, and if so take the appropriate action.
2003  *
2004  * If any of the properties are missing or invalid we don't take
2005  * any action, as this function should only be invoked when modifications
2006  * have been made to what we assume is a working configuration, which
2007  * we leave active.
2008  *
2009  * Note it is legal for this routine to be invoked even if none of the
2010  * properties in the port node within the MD have actually changed.
2011  */
2012 static void
2013 vsw_update_md_prop(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
2014 {
2015 	char		physname[LIFNAMSIZ];
2016 	char		drv[LIFNAMSIZ];
2017 	uint_t		ddi_instance;
2018 	uint8_t		new_smode;
2019 	int		i;
2020 	uint64_t 	macaddr = 0;
2021 	enum		{MD_init = 0x1,
2022 				MD_physname = 0x2,
2023 				MD_macaddr = 0x4,
2024 				MD_smode = 0x8,
2025 				MD_vlans = 0x10,
2026 				MD_mtu = 0x20,
2027 				MD_pls = 0x40} updated;
2028 	int		rv;
2029 	uint16_t	pvid;
2030 	vsw_vlanid_t	*vids;
2031 	uint16_t	nvids;
2032 	uint32_t	mtu;
2033 	boolean_t	pls_update;
2034 
2035 	updated = MD_init;
2036 
2037 	D1(vswp, "%s: enter", __func__);
2038 
2039 	/*
2040 	 * Check if name of physical device in MD has changed.
2041 	 */
2042 	if (vsw_get_md_physname(vswp, mdp, node, (char *)&physname) == 0) {
2043 		/*
2044 		 * Do basic sanity check on new device name/instance,
2045 		 * if its non NULL. It is valid for the device name to
2046 		 * have changed from a non NULL to a NULL value, i.e.
2047 		 * the vsw is being changed to 'routed' mode.
2048 		 */
2049 		if ((strlen(physname) != 0) &&
2050 		    (ddi_parse(physname, drv,
2051 		    &ddi_instance) != DDI_SUCCESS)) {
2052 			cmn_err(CE_WARN, "!vsw%d: physical device %s is not"
2053 			    " a valid device name/instance",
2054 			    vswp->instance, physname);
2055 			goto fail_reconf;
2056 		}
2057 
2058 		if (strcmp(physname, vswp->physname)) {
2059 			D2(vswp, "%s: device name changed from %s to %s",
2060 			    __func__, vswp->physname, physname);
2061 
2062 			updated |= MD_physname;
2063 		} else {
2064 			D2(vswp, "%s: device name unchanged at %s",
2065 			    __func__, vswp->physname);
2066 		}
2067 	} else {
2068 		cmn_err(CE_WARN, "!vsw%d: Unable to read name of physical "
2069 		    "device from updated MD.", vswp->instance);
2070 		goto fail_reconf;
2071 	}
2072 
2073 	/*
2074 	 * Check if MAC address has changed.
2075 	 */
2076 	if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) {
2077 		cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD",
2078 		    vswp->instance);
2079 		goto fail_reconf;
2080 	} else {
2081 		uint64_t maddr = macaddr;
2082 		READ_ENTER(&vswp->if_lockrw);
2083 		for (i = ETHERADDRL - 1; i >= 0; i--) {
2084 			if (vswp->if_addr.ether_addr_octet[i]
2085 			    != (macaddr & 0xFF)) {
2086 				D2(vswp, "%s: octet[%d] 0x%x != 0x%x",
2087 				    __func__, i,
2088 				    vswp->if_addr.ether_addr_octet[i],
2089 				    (macaddr & 0xFF));
2090 				updated |= MD_macaddr;
2091 				macaddr = maddr;
2092 				break;
2093 			}
2094 			macaddr >>= 8;
2095 		}
2096 		RW_EXIT(&vswp->if_lockrw);
2097 		if (updated & MD_macaddr) {
2098 			vsw_save_lmacaddr(vswp, macaddr);
2099 		}
2100 	}
2101 
2102 	/*
2103 	 * Check if switching modes have changed.
2104 	 */
2105 	if (vsw_get_md_smodes(vswp, mdp, node, &new_smode)) {
2106 		cmn_err(CE_WARN, "!vsw%d: Unable to read %s property from MD",
2107 		    vswp->instance, smode_propname);
2108 		goto fail_reconf;
2109 	} else {
2110 		if (new_smode != vswp->smode) {
2111 			D2(vswp, "%s: switching mode changed from %d to %d",
2112 			    __func__, vswp->smode, new_smode);
2113 
2114 			updated |= MD_smode;
2115 		}
2116 	}
2117 
2118 	/* Read the vlan ids */
2119 	vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &pvid, &vids,
2120 	    &nvids, NULL);
2121 
2122 	/* Determine if there are any vlan id updates */
2123 	if ((pvid != vswp->pvid) ||		/* pvid changed? */
2124 	    (nvids != vswp->nvids) ||		/* # of vids changed? */
2125 	    ((nvids != 0) && (vswp->nvids != 0) &&	/* vids changed? */
2126 	    !vsw_cmp_vids(vids, vswp->vids, nvids))) {
2127 		updated |= MD_vlans;
2128 	}
2129 
2130 	/* Read mtu */
2131 	vsw_mtu_read(vswp, mdp, node, &mtu);
2132 	if (mtu != vswp->mtu) {
2133 		if (mtu >= ETHERMTU && mtu <= VNET_MAX_MTU) {
2134 			updated |= MD_mtu;
2135 		} else {
2136 			cmn_err(CE_NOTE, "!vsw%d: Unable to process mtu update"
2137 			    " as the specified value:%d is invalid\n",
2138 			    vswp->instance, mtu);
2139 		}
2140 	}
2141 
2142 	/*
2143 	 * Read the 'linkprop' property.
2144 	 */
2145 	vsw_linkprop_read(vswp, mdp, node, &pls_update);
2146 	if (pls_update != vswp->pls_update) {
2147 		updated |= MD_pls;
2148 	}
2149 
2150 	/*
2151 	 * Now make any changes which are needed...
2152 	 */
2153 	if (updated & MD_pls) {
2154 
2155 		/* save the updated property. */
2156 		vswp->pls_update = pls_update;
2157 
2158 		if (pls_update == B_FALSE) {
2159 			/*
2160 			 * Phys link state update is now disabled for this vsw
2161 			 * interface. If we had previously reported a link-down
2162 			 * to the stack, undo that by sending a link-up.
2163 			 */
2164 			if (vswp->phys_link_state == LINK_STATE_DOWN) {
2165 				vsw_mac_link_update(vswp, LINK_STATE_UP);
2166 			}
2167 		} else {
2168 			/*
2169 			 * Phys link state update is now enabled. Send up an
2170 			 * update based on the current phys link state.
2171 			 */
2172 			if (vswp->smode & VSW_LAYER2) {
2173 				vsw_mac_link_update(vswp,
2174 				    vswp->phys_link_state);
2175 			}
2176 		}
2177 
2178 	}
2179 
2180 	if (updated & (MD_physname | MD_smode | MD_mtu)) {
2181 
2182 		/*
2183 		 * Stop any pending thread to setup switching mode.
2184 		 */
2185 		vsw_setup_switching_stop(vswp);
2186 
2187 		/* Cleanup HybridIO */
2188 		vsw_hio_cleanup(vswp);
2189 
2190 		/*
2191 		 * Remove unicst, mcst addrs of vsw interface
2192 		 * and ports from the physdev. This also closes
2193 		 * the corresponding mac clients.
2194 		 */
2195 		vsw_unset_addrs(vswp);
2196 
2197 		/*
2198 		 * Stop, detach and close the old device..
2199 		 */
2200 		mutex_enter(&vswp->mac_lock);
2201 		vsw_mac_close(vswp);
2202 		mutex_exit(&vswp->mac_lock);
2203 
2204 		/*
2205 		 * Update phys name.
2206 		 */
2207 		if (updated & MD_physname) {
2208 			cmn_err(CE_NOTE, "!vsw%d: changing from %s to %s",
2209 			    vswp->instance, vswp->physname, physname);
2210 			(void) strncpy(vswp->physname,
2211 			    physname, strlen(physname) + 1);
2212 		}
2213 
2214 		/*
2215 		 * Update array with the new switch mode values.
2216 		 */
2217 		if (updated & MD_smode) {
2218 			vswp->smode = new_smode;
2219 		}
2220 
2221 		/* Update mtu */
2222 		if (updated & MD_mtu) {
2223 			rv = vsw_mtu_update(vswp, mtu);
2224 			if (rv != 0) {
2225 				goto fail_update;
2226 			}
2227 		}
2228 
2229 		/*
2230 		 * ..and attach, start the new device.
2231 		 */
2232 		rv = vsw_setup_switching(vswp);
2233 		if (rv == EAGAIN) {
2234 			/*
2235 			 * Unable to setup switching mode.
2236 			 * As the error is EAGAIN, schedule a thread to retry
2237 			 * and return. Programming addresses of ports and
2238 			 * vsw interface will be done by the thread when the
2239 			 * switching setup completes successfully.
2240 			 */
2241 			if (vsw_setup_switching_start(vswp) != 0) {
2242 				goto fail_update;
2243 			}
2244 			return;
2245 
2246 		} else if (rv) {
2247 			goto fail_update;
2248 		}
2249 
2250 		vsw_setup_switching_post_process(vswp);
2251 	} else if (updated & MD_macaddr) {
2252 		/*
2253 		 * We enter here if only MD_macaddr is exclusively updated.
2254 		 * If MD_physname and/or MD_smode are also updated, then
2255 		 * as part of that, we would have implicitly processed
2256 		 * MD_macaddr update (above).
2257 		 */
2258 		cmn_err(CE_NOTE, "!vsw%d: changing mac address to 0x%lx",
2259 		    vswp->instance, macaddr);
2260 
2261 		READ_ENTER(&vswp->if_lockrw);
2262 		if (vswp->if_state & VSW_IF_UP) {
2263 			/* reconfigure with new address */
2264 			vsw_if_mac_reconfig(vswp, B_FALSE, 0, NULL, 0);
2265 
2266 			/*
2267 			 * Notify the MAC layer of the changed address.
2268 			 */
2269 			mac_unicst_update(vswp->if_mh,
2270 			    (uint8_t *)&vswp->if_addr);
2271 
2272 		}
2273 		RW_EXIT(&vswp->if_lockrw);
2274 
2275 	}
2276 
2277 	if (updated & MD_vlans) {
2278 		/* Remove existing vlan ids from the hash table. */
2279 		vsw_vlan_remove_ids(vswp, VSW_LOCALDEV);
2280 
2281 		if (vswp->if_state & VSW_IF_UP) {
2282 			vsw_if_mac_reconfig(vswp, B_TRUE, pvid, vids, nvids);
2283 		} else {
2284 			if (vswp->nvids != 0) {
2285 				kmem_free(vswp->vids,
2286 				    sizeof (vsw_vlanid_t) * vswp->nvids);
2287 			}
2288 			vswp->vids = vids;
2289 			vswp->nvids = nvids;
2290 			vswp->pvid = pvid;
2291 		}
2292 
2293 		/* add these new vlan ids into hash table */
2294 		vsw_vlan_add_ids(vswp, VSW_LOCALDEV);
2295 	} else {
2296 		if (nvids != 0) {
2297 			kmem_free(vids, sizeof (vsw_vlanid_t) * nvids);
2298 		}
2299 	}
2300 
2301 	return;
2302 
2303 fail_reconf:
2304 	cmn_err(CE_WARN, "!vsw%d: configuration unchanged", vswp->instance);
2305 	return;
2306 
2307 fail_update:
2308 	cmn_err(CE_WARN, "!vsw%d: re-configuration failed",
2309 	    vswp->instance);
2310 }
2311 
2312 /*
2313  * Read the port's md properties.
2314  */
2315 static int
2316 vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp,
2317 	md_t *mdp, mde_cookie_t *node)
2318 {
2319 	uint64_t		ldc_id;
2320 	uint8_t			*addrp;
2321 	int			i, addrsz;
2322 	int			num_nodes = 0, nchan = 0;
2323 	int			listsz = 0;
2324 	mde_cookie_t		*listp = NULL;
2325 	struct ether_addr	ea;
2326 	uint64_t		macaddr;
2327 	uint64_t		inst = 0;
2328 	uint64_t		val;
2329 
2330 	if (md_get_prop_val(mdp, *node, id_propname, &inst)) {
2331 		DWARN(vswp, "%s: prop(%s) not found", __func__,
2332 		    id_propname);
2333 		return (1);
2334 	}
2335 
2336 	/*
2337 	 * Find the channel endpoint node(s) (which should be under this
2338 	 * port node) which contain the channel id(s).
2339 	 */
2340 	if ((num_nodes = md_node_count(mdp)) <= 0) {
2341 		DERR(vswp, "%s: invalid number of nodes found (%d)",
2342 		    __func__, num_nodes);
2343 		return (1);
2344 	}
2345 
2346 	D2(vswp, "%s: %d nodes found", __func__, num_nodes);
2347 
2348 	/* allocate enough space for node list */
2349 	listsz = num_nodes * sizeof (mde_cookie_t);
2350 	listp = kmem_zalloc(listsz, KM_SLEEP);
2351 
2352 	nchan = md_scan_dag(mdp, *node, md_find_name(mdp, chan_propname),
2353 	    md_find_name(mdp, "fwd"), listp);
2354 
2355 	if (nchan <= 0) {
2356 		DWARN(vswp, "%s: no %s nodes found", __func__, chan_propname);
2357 		kmem_free(listp, listsz);
2358 		return (1);
2359 	}
2360 
2361 	D2(vswp, "%s: %d %s nodes found", __func__, nchan, chan_propname);
2362 
2363 	/* use property from first node found */
2364 	if (md_get_prop_val(mdp, listp[0], id_propname, &ldc_id)) {
2365 		DWARN(vswp, "%s: prop(%s) not found\n", __func__,
2366 		    id_propname);
2367 		kmem_free(listp, listsz);
2368 		return (1);
2369 	}
2370 
2371 	/* don't need list any more */
2372 	kmem_free(listp, listsz);
2373 
2374 	D2(vswp, "%s: ldc_id 0x%llx", __func__, ldc_id);
2375 
2376 	/* read mac-address property */
2377 	if (md_get_prop_data(mdp, *node, remaddr_propname,
2378 	    &addrp, &addrsz)) {
2379 		DWARN(vswp, "%s: prop(%s) not found",
2380 		    __func__, remaddr_propname);
2381 		return (1);
2382 	}
2383 
2384 	if (addrsz < ETHERADDRL) {
2385 		DWARN(vswp, "%s: invalid address size", __func__);
2386 		return (1);
2387 	}
2388 
2389 	macaddr = *((uint64_t *)addrp);
2390 	D2(vswp, "%s: remote mac address 0x%llx", __func__, macaddr);
2391 
2392 	for (i = ETHERADDRL - 1; i >= 0; i--) {
2393 		ea.ether_addr_octet[i] = macaddr & 0xFF;
2394 		macaddr >>= 8;
2395 	}
2396 
2397 	/* now update all properties into the port */
2398 	portp->p_vswp = vswp;
2399 	portp->p_instance = inst;
2400 	portp->addr_set = B_FALSE;
2401 	ether_copy(&ea, &portp->p_macaddr);
2402 	if (nchan > VSW_PORT_MAX_LDCS) {
2403 		D2(vswp, "%s: using first of %d ldc ids",
2404 		    __func__, nchan);
2405 		nchan = VSW_PORT_MAX_LDCS;
2406 	}
2407 	portp->num_ldcs = nchan;
2408 	portp->ldc_ids =
2409 	    kmem_zalloc(sizeof (uint64_t) * nchan, KM_SLEEP);
2410 	bcopy(&ldc_id, (portp->ldc_ids), sizeof (uint64_t) * nchan);
2411 
2412 	/* read vlan id properties of this port node */
2413 	vsw_vlan_read_ids(portp, VSW_VNETPORT, mdp, *node, &portp->pvid,
2414 	    &portp->vids, &portp->nvids, NULL);
2415 
2416 	/* Check if hybrid property is present */
2417 	if (md_get_prop_val(mdp, *node, hybrid_propname, &val) == 0) {
2418 		D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname);
2419 		portp->p_hio_enabled = B_TRUE;
2420 	} else {
2421 		portp->p_hio_enabled = B_FALSE;
2422 	}
2423 	/*
2424 	 * Port hio capability determined after version
2425 	 * negotiation, i.e., when we know the peer is HybridIO capable.
2426 	 */
2427 	portp->p_hio_capable = B_FALSE;
2428 	return (0);
2429 }
2430 
2431 /*
2432  * Add a new port to the system.
2433  *
2434  * Returns 0 on success, 1 on failure.
2435  */
2436 int
2437 vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node)
2438 {
2439 	vsw_port_t	*portp;
2440 	int		rv;
2441 
2442 	portp = kmem_zalloc(sizeof (vsw_port_t), KM_SLEEP);
2443 
2444 	rv = vsw_port_read_props(portp, vswp, mdp, node);
2445 	if (rv != 0) {
2446 		kmem_free(portp, sizeof (*portp));
2447 		return (1);
2448 	}
2449 
2450 	rv = vsw_port_attach(portp);
2451 	if (rv != 0) {
2452 		DERR(vswp, "%s: failed to attach port", __func__);
2453 		return (1);
2454 	}
2455 
2456 	return (0);
2457 }
2458 
2459 static int
2460 vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex,
2461 	md_t *prev_mdp, mde_cookie_t prev_mdex)
2462 {
2463 	uint64_t	cport_num;
2464 	uint64_t	pport_num;
2465 	vsw_port_list_t	*plistp;
2466 	vsw_port_t	*portp;
2467 	boolean_t	updated_vlans = B_FALSE;
2468 	uint16_t	pvid;
2469 	vsw_vlanid_t	*vids;
2470 	uint16_t	nvids;
2471 	uint64_t	val;
2472 	boolean_t	hio_enabled = B_FALSE;
2473 
2474 	/*
2475 	 * For now, we get port updates only if vlan ids changed.
2476 	 * We read the port num and do some sanity check.
2477 	 */
2478 	if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) {
2479 		return (1);
2480 	}
2481 
2482 	if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) {
2483 		return (1);
2484 	}
2485 	if (cport_num != pport_num)
2486 		return (1);
2487 
2488 	plistp = &(vswp->plist);
2489 
2490 	READ_ENTER(&plistp->lockrw);
2491 
2492 	portp = vsw_lookup_port(vswp, cport_num);
2493 	if (portp == NULL) {
2494 		RW_EXIT(&plistp->lockrw);
2495 		return (1);
2496 	}
2497 
2498 	/* Read the vlan ids */
2499 	vsw_vlan_read_ids(portp, VSW_VNETPORT, curr_mdp, curr_mdex, &pvid,
2500 	    &vids, &nvids, NULL);
2501 
2502 	/* Determine if there are any vlan id updates */
2503 	if ((pvid != portp->pvid) ||		/* pvid changed? */
2504 	    (nvids != portp->nvids) ||		/* # of vids changed? */
2505 	    ((nvids != 0) && (portp->nvids != 0) &&	/* vids changed? */
2506 	    !vsw_cmp_vids(vids, portp->vids, nvids))) {
2507 		updated_vlans = B_TRUE;
2508 	}
2509 
2510 	if (updated_vlans == B_TRUE) {
2511 
2512 		/* Remove existing vlan ids from the hash table. */
2513 		vsw_vlan_remove_ids(portp, VSW_VNETPORT);
2514 
2515 		/* Reconfigure vlans with network device */
2516 		vsw_mac_port_reconfig_vlans(portp, pvid, vids, nvids);
2517 
2518 		/* add these new vlan ids into hash table */
2519 		vsw_vlan_add_ids(portp, VSW_VNETPORT);
2520 
2521 		/* reset the port if it is vlan unaware (ver < 1.3) */
2522 		vsw_vlan_unaware_port_reset(portp);
2523 	}
2524 
2525 	/* Check if hybrid property is present */
2526 	if (md_get_prop_val(curr_mdp, curr_mdex, hybrid_propname, &val) == 0) {
2527 		D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname);
2528 		hio_enabled = B_TRUE;
2529 	}
2530 
2531 	if (portp->p_hio_enabled != hio_enabled) {
2532 		vsw_hio_port_update(portp, hio_enabled);
2533 	}
2534 
2535 	RW_EXIT(&plistp->lockrw);
2536 
2537 	return (0);
2538 }
2539 
2540 /*
2541  * vsw_mac_rx -- A common function to send packets to the interface.
2542  * By default this function check if the interface is UP or not, the
2543  * rest of the behaviour depends on the flags as below:
2544  *
2545  *	VSW_MACRX_PROMISC -- Check if the promisc mode set or not.
2546  *	VSW_MACRX_COPYMSG -- Make a copy of the message(s).
2547  *	VSW_MACRX_FREEMSG -- Free if the messages cannot be sent up the stack.
2548  */
2549 void
2550 vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh,
2551     mblk_t *mp, vsw_macrx_flags_t flags)
2552 {
2553 	mblk_t		*mpt;
2554 
2555 	D1(vswp, "%s:enter\n", __func__);
2556 	READ_ENTER(&vswp->if_lockrw);
2557 	/* Check if the interface is up */
2558 	if (!(vswp->if_state & VSW_IF_UP)) {
2559 		RW_EXIT(&vswp->if_lockrw);
2560 		/* Free messages only if FREEMSG flag specified */
2561 		if (flags & VSW_MACRX_FREEMSG) {
2562 			freemsgchain(mp);
2563 		}
2564 		D1(vswp, "%s:exit\n", __func__);
2565 		return;
2566 	}
2567 	/*
2568 	 * If PROMISC flag is passed, then check if
2569 	 * the interface is in the PROMISC mode.
2570 	 * If not, drop the messages.
2571 	 */
2572 	if (flags & VSW_MACRX_PROMISC) {
2573 		if (!(vswp->if_state & VSW_IF_PROMISC)) {
2574 			RW_EXIT(&vswp->if_lockrw);
2575 			/* Free messages only if FREEMSG flag specified */
2576 			if (flags & VSW_MACRX_FREEMSG) {
2577 				freemsgchain(mp);
2578 			}
2579 			D1(vswp, "%s:exit\n", __func__);
2580 			return;
2581 		}
2582 	}
2583 	RW_EXIT(&vswp->if_lockrw);
2584 	/*
2585 	 * If COPYMSG flag is passed, then make a copy
2586 	 * of the message chain and send up the copy.
2587 	 */
2588 	if (flags & VSW_MACRX_COPYMSG) {
2589 		mp = copymsgchain(mp);
2590 		if (mp == NULL) {
2591 			D1(vswp, "%s:exit\n", __func__);
2592 			return;
2593 		}
2594 	}
2595 
2596 	D2(vswp, "%s: sending up stack", __func__);
2597 
2598 	mpt = NULL;
2599 	(void) vsw_vlan_frame_untag(vswp, VSW_LOCALDEV, &mp, &mpt);
2600 	if (mp != NULL) {
2601 		mac_rx(vswp->if_mh, mrh, mp);
2602 	}
2603 	D1(vswp, "%s:exit\n", __func__);
2604 }
2605 
2606 /* copy mac address of vsw into soft state structure */
2607 static void
2608 vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr)
2609 {
2610 	int	i;
2611 
2612 	WRITE_ENTER(&vswp->if_lockrw);
2613 	for (i = ETHERADDRL - 1; i >= 0; i--) {
2614 		vswp->if_addr.ether_addr_octet[i] = macaddr & 0xFF;
2615 		macaddr >>= 8;
2616 	}
2617 	RW_EXIT(&vswp->if_lockrw);
2618 }
2619 
2620 /* Compare VLAN ids, array size expected to be same. */
2621 static boolean_t
2622 vsw_cmp_vids(vsw_vlanid_t *vids1, vsw_vlanid_t *vids2, int nvids)
2623 {
2624 	int i, j;
2625 	uint16_t vid;
2626 
2627 	for (i = 0; i < nvids; i++) {
2628 		vid = vids1[i].vl_vid;
2629 		for (j = 0; j < nvids; j++) {
2630 			if (vid == vids2[i].vl_vid)
2631 				break;
2632 		}
2633 		if (j == nvids) {
2634 			return (B_FALSE);
2635 		}
2636 	}
2637 	return (B_TRUE);
2638 }
2639