xref: /titanic_52/usr/src/uts/sun4v/io/vsw.c (revision 88e8a7f2124e61277dc5f6bf9718881bfb8b724e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/errno.h>
29 #include <sys/debug.h>
30 #include <sys/time.h>
31 #include <sys/sysmacros.h>
32 #include <sys/systm.h>
33 #include <sys/user.h>
34 #include <sys/stropts.h>
35 #include <sys/stream.h>
36 #include <sys/strlog.h>
37 #include <sys/strsubr.h>
38 #include <sys/cmn_err.h>
39 #include <sys/cpu.h>
40 #include <sys/kmem.h>
41 #include <sys/conf.h>
42 #include <sys/ddi.h>
43 #include <sys/sunddi.h>
44 #include <sys/ksynch.h>
45 #include <sys/stat.h>
46 #include <sys/kstat.h>
47 #include <sys/vtrace.h>
48 #include <sys/strsun.h>
49 #include <sys/dlpi.h>
50 #include <sys/ethernet.h>
51 #include <net/if.h>
52 #include <sys/varargs.h>
53 #include <sys/machsystm.h>
54 #include <sys/modctl.h>
55 #include <sys/modhash.h>
56 #include <sys/mac_provider.h>
57 #include <sys/mac_ether.h>
58 #include <sys/taskq.h>
59 #include <sys/note.h>
60 #include <sys/mach_descrip.h>
61 #include <sys/mac_provider.h>
62 #include <sys/mdeg.h>
63 #include <sys/ldc.h>
64 #include <sys/vsw_fdb.h>
65 #include <sys/vsw.h>
66 #include <sys/vio_mailbox.h>
67 #include <sys/vnet_mailbox.h>
68 #include <sys/vnet_common.h>
69 #include <sys/vio_util.h>
70 #include <sys/sdt.h>
71 #include <sys/atomic.h>
72 #include <sys/callb.h>
73 #include <sys/vlan.h>
74 
75 /*
76  * Function prototypes.
77  */
78 static	int vsw_attach(dev_info_t *, ddi_attach_cmd_t);
79 static	int vsw_detach(dev_info_t *, ddi_detach_cmd_t);
80 static	int vsw_unattach(vsw_t *vswp);
81 static	int vsw_get_md_physname(vsw_t *, md_t *, mde_cookie_t, char *);
82 static	int vsw_get_md_smodes(vsw_t *, md_t *, mde_cookie_t, uint8_t *);
83 void vsw_destroy_rxpools(void *);
84 
85 /* MDEG routines */
86 static	int vsw_mdeg_register(vsw_t *vswp);
87 static	void vsw_mdeg_unregister(vsw_t *vswp);
88 static	int vsw_mdeg_cb(void *cb_argp, mdeg_result_t *);
89 static	int vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *);
90 static	int vsw_get_initial_md_properties(vsw_t *vswp, md_t *, mde_cookie_t);
91 static	int vsw_read_mdprops(vsw_t *vswp);
92 static	void vsw_vlan_read_ids(void *arg, int type, md_t *mdp,
93 	mde_cookie_t node, uint16_t *pvidp, vsw_vlanid_t **vidspp,
94 	uint16_t *nvidsp, uint16_t *default_idp);
95 static	void vsw_port_read_bandwidth(vsw_port_t *portp, md_t *mdp,
96 	mde_cookie_t node, uint64_t *bw);
97 static	int vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp,
98 	md_t *mdp, mde_cookie_t *node);
99 static	void vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp,
100 	mde_cookie_t node);
101 static	void vsw_mtu_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node,
102 	uint32_t *mtu);
103 static	int vsw_mtu_update(vsw_t *vswp, uint32_t mtu);
104 static	void vsw_linkprop_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node,
105 	boolean_t *pls);
106 static	void vsw_bandwidth_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node,
107 	uint64_t *bw);
108 static	void vsw_update_md_prop(vsw_t *, md_t *, mde_cookie_t);
109 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr);
110 static boolean_t vsw_cmp_vids(vsw_vlanid_t *vids1,
111 	vsw_vlanid_t *vids2, int nvids);
112 
113 /* Mac driver related routines */
114 static int vsw_mac_register(vsw_t *);
115 static int vsw_mac_unregister(vsw_t *);
116 static int vsw_m_stat(void *, uint_t, uint64_t *);
117 static void vsw_m_stop(void *arg);
118 static int vsw_m_start(void *arg);
119 static int vsw_m_unicst(void *arg, const uint8_t *);
120 static int vsw_m_multicst(void *arg, boolean_t, const uint8_t *);
121 static int vsw_m_promisc(void *arg, boolean_t);
122 static mblk_t *vsw_m_tx(void *arg, mblk_t *);
123 void vsw_mac_link_update(vsw_t *vswp, link_state_t link_state);
124 void vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh,
125     mblk_t *mp, vsw_macrx_flags_t flags);
126 void vsw_physlink_state_update(vsw_t *vswp);
127 
128 /*
129  * Functions imported from other files.
130  */
131 extern void vsw_setup_switching_thread(void *arg);
132 extern int vsw_setup_switching_start(vsw_t *vswp);
133 extern void vsw_setup_switching_stop(vsw_t *vswp);
134 extern int vsw_setup_switching(vsw_t *);
135 extern void vsw_switch_frame_nop(vsw_t *vswp, mblk_t *mp, int caller,
136     vsw_port_t *port, mac_resource_handle_t mrh);
137 extern int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *);
138 extern int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *);
139 extern void vsw_del_mcst_vsw(vsw_t *);
140 extern mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr);
141 extern void vsw_detach_ports(vsw_t *vswp);
142 extern int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node);
143 extern int vsw_port_detach(vsw_t *vswp, int p_instance);
144 static int vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex,
145 	md_t *prev_mdp, mde_cookie_t prev_mdex);
146 extern	int vsw_port_attach(vsw_port_t *port);
147 extern vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance);
148 extern int vsw_mac_open(vsw_t *vswp);
149 extern void vsw_mac_close(vsw_t *vswp);
150 extern void vsw_mac_cleanup_ports(vsw_t *vswp);
151 extern void vsw_unset_addrs(vsw_t *vswp);
152 extern void vsw_setup_switching_post_process(vsw_t *vswp);
153 extern void vsw_create_vlans(void *arg, int type);
154 extern void vsw_destroy_vlans(void *arg, int type);
155 extern void vsw_vlan_add_ids(void *arg, int type);
156 extern void vsw_vlan_remove_ids(void *arg, int type);
157 extern void vsw_vlan_unaware_port_reset(vsw_port_t *portp);
158 extern uint32_t vsw_vlan_frame_untag(void *arg, int type, mblk_t **np,
159 	mblk_t **npt);
160 extern mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp);
161 extern void vsw_hio_cleanup(vsw_t *vswp);
162 extern void vsw_hio_start_ports(vsw_t *vswp);
163 extern void vsw_hio_port_update(vsw_port_t *portp, boolean_t hio_enabled);
164 extern int vsw_mac_multicast_add(vsw_t *, vsw_port_t *, mcst_addr_t *, int);
165 extern void vsw_mac_multicast_remove(vsw_t *, vsw_port_t *, mcst_addr_t *, int);
166 extern void vsw_mac_port_reconfig_vlans(vsw_port_t *portp, uint16_t new_pvid,
167     vsw_vlanid_t *new_vids, int new_nvids);
168 extern int vsw_mac_client_init(vsw_t *vswp, vsw_port_t *port, int type);
169 extern void vsw_mac_client_cleanup(vsw_t *vswp, vsw_port_t *port, int type);
170 extern void vsw_if_mac_reconfig(vsw_t *vswp, boolean_t update_vlans,
171     uint16_t new_pvid, vsw_vlanid_t *new_vids, int new_nvids);
172 extern void vsw_reset_ports(vsw_t *vswp);
173 extern void vsw_port_reset(vsw_port_t *portp);
174 extern void vsw_physlink_update_ports(vsw_t *vswp);
175 extern void vsw_update_bandwidth(vsw_t *vswp, vsw_port_t *port, int type,
176     uint64_t maxbw);
177 
178 /*
179  * Internal tunables.
180  */
181 int	vsw_num_handshakes = VNET_NUM_HANDSHAKES; /* # of handshake attempts */
182 int	vsw_wretries = 100;		/* # of write attempts */
183 int	vsw_desc_delay = 0;		/* delay in us */
184 int	vsw_read_attempts = 5;		/* # of reads of descriptor */
185 int	vsw_setup_switching_delay = 3;	/* setup sw timeout interval in sec */
186 int	vsw_mac_open_retries = 300;	/* max # of mac_open() retries */
187 					/* 300*3 = 900sec(15min) of max tmout */
188 int	vsw_ldc_tx_delay = 5;		/* delay(ticks) for tx retries */
189 int	vsw_ldc_tx_retries = 10;	/* # of ldc tx retries */
190 int	vsw_ldc_retries = 5;		/* # of ldc_close() retries */
191 int	vsw_ldc_delay = 1000;		/* 1 ms delay for ldc_close() */
192 boolean_t vsw_ldc_rxthr_enabled = B_TRUE;	/* LDC Rx thread enabled */
193 boolean_t vsw_ldc_txthr_enabled = B_TRUE;	/* LDC Tx thread enabled */
194 int	vsw_rxpool_cleanup_delay = 100000;	/* 100ms */
195 
196 
197 uint32_t	vsw_fdb_nchains = 8;	/* # of chains in fdb hash table */
198 uint32_t	vsw_vlan_nchains = 4;	/* # of chains in vlan id hash table */
199 uint32_t	vsw_ethermtu = 1500;	/* mtu of the device */
200 
201 /* delay in usec to wait for all references on a fdb entry to be dropped */
202 uint32_t vsw_fdbe_refcnt_delay = 10;
203 
204 /*
205  * Default vlan id. This is only used internally when the "default-vlan-id"
206  * property is not present in the MD device node. Therefore, this should not be
207  * used as a tunable; if this value is changed, the corresponding variable
208  * should be updated to the same value in all vnets connected to this vsw.
209  */
210 uint16_t	vsw_default_vlan_id = 1;
211 
212 /*
213  * Workaround for a version handshake bug in obp's vnet.
214  * If vsw initiates version negotiation starting from the highest version,
215  * obp sends a nack and terminates version handshake. To workaround
216  * this, we do not initiate version handshake when the channel comes up.
217  * Instead, we wait for the peer to send its version info msg and go through
218  * the version protocol exchange. If we successfully negotiate a version,
219  * before sending the ack, we send our version info msg to the peer
220  * using the <major,minor> version that we are about to ack.
221  */
222 boolean_t vsw_obp_ver_proto_workaround = B_TRUE;
223 
224 /*
225  * In the absence of "priority-ether-types" property in MD, the following
226  * internal tunable can be set to specify a single priority ethertype.
227  */
228 uint64_t vsw_pri_eth_type = 0;
229 
230 /*
231  * Number of transmit priority buffers that are preallocated per device.
232  * This number is chosen to be a small value to throttle transmission
233  * of priority packets. Note: Must be a power of 2 for vio_create_mblks().
234  */
235 uint32_t vsw_pri_tx_nmblks = 64;
236 
237 /*
238  * Number of RARP packets sent to announce macaddr to the physical switch,
239  * after vsw's physical device is changed dynamically or after a guest (client
240  * vnet) is live migrated in.
241  */
242 uint32_t vsw_publish_macaddr_count = 3;
243 
244 boolean_t vsw_hio_enabled = B_TRUE;	/* Enable/disable HybridIO */
245 int vsw_hio_max_cleanup_retries = 10;	/* Max retries for HybridIO cleanp */
246 int vsw_hio_cleanup_delay = 10000;	/* 10ms */
247 
248 /* Number of transmit descriptors -  must be power of 2 */
249 uint32_t vsw_ntxds = VSW_RING_NUM_EL;
250 
251 /*
252  * Max number of mblks received in one receive operation.
253  */
254 uint32_t vsw_chain_len = (VSW_NUM_MBLKS * 0.6);
255 
256 /*
257  * Internal tunables for receive buffer pools, that is,  the size and number of
258  * mblks for each pool. At least 3 sizes must be specified if these are used.
259  * The sizes must be specified in increasing order. Non-zero value of the first
260  * size will be used as a hint to use these values instead of the algorithm
261  * that determines the sizes based on MTU.
262  */
263 uint32_t vsw_mblk_size1 = 0;
264 uint32_t vsw_mblk_size2 = 0;
265 uint32_t vsw_mblk_size3 = 0;
266 uint32_t vsw_mblk_size4 = 0;
267 uint32_t vsw_num_mblks1 = VSW_NUM_MBLKS;	/* number of mblks for pool1 */
268 uint32_t vsw_num_mblks2 = VSW_NUM_MBLKS;	/* number of mblks for pool2 */
269 uint32_t vsw_num_mblks3 = VSW_NUM_MBLKS;	/* number of mblks for pool3 */
270 uint32_t vsw_num_mblks4 = VSW_NUM_MBLKS;	/* number of mblks for pool4 */
271 
272 /*
273  * Set this to non-zero to enable additional internal receive buffer pools
274  * based on the MTU of the device for better performance at the cost of more
275  * memory consumption. This is turned off by default, to use allocb(9F) for
276  * receive buffer allocations of sizes > 2K.
277  */
278 boolean_t vsw_jumbo_rxpools = B_FALSE;
279 
280 /*
281  * vsw_max_tx_qcount is the maximum # of packets that can be queued
282  * before the tx worker thread begins processing the queue. Its value
283  * is chosen to be 4x the default length of tx descriptor ring.
284  */
285 uint32_t vsw_max_tx_qcount = 4 * VSW_RING_NUM_EL;
286 
287 /*
288  * MAC callbacks
289  */
290 static	mac_callbacks_t	vsw_m_callbacks = {
291 	0,
292 	vsw_m_stat,
293 	vsw_m_start,
294 	vsw_m_stop,
295 	vsw_m_promisc,
296 	vsw_m_multicst,
297 	vsw_m_unicst,
298 	vsw_m_tx
299 };
300 
301 static	struct	cb_ops	vsw_cb_ops = {
302 	nulldev,			/* cb_open */
303 	nulldev,			/* cb_close */
304 	nodev,				/* cb_strategy */
305 	nodev,				/* cb_print */
306 	nodev,				/* cb_dump */
307 	nodev,				/* cb_read */
308 	nodev,				/* cb_write */
309 	nodev,				/* cb_ioctl */
310 	nodev,				/* cb_devmap */
311 	nodev,				/* cb_mmap */
312 	nodev,				/* cb_segmap */
313 	nochpoll,			/* cb_chpoll */
314 	ddi_prop_op,			/* cb_prop_op */
315 	NULL,				/* cb_stream */
316 	D_MP,				/* cb_flag */
317 	CB_REV,				/* rev */
318 	nodev,				/* int (*cb_aread)() */
319 	nodev				/* int (*cb_awrite)() */
320 };
321 
322 static	struct	dev_ops	vsw_ops = {
323 	DEVO_REV,		/* devo_rev */
324 	0,			/* devo_refcnt */
325 	NULL,			/* devo_getinfo */
326 	nulldev,		/* devo_identify */
327 	nulldev,		/* devo_probe */
328 	vsw_attach,		/* devo_attach */
329 	vsw_detach,		/* devo_detach */
330 	nodev,			/* devo_reset */
331 	&vsw_cb_ops,		/* devo_cb_ops */
332 	(struct bus_ops *)NULL,	/* devo_bus_ops */
333 	ddi_power		/* devo_power */
334 };
335 
336 extern	struct	mod_ops	mod_driverops;
337 static struct modldrv vswmodldrv = {
338 	&mod_driverops,
339 	"sun4v Virtual Switch",
340 	&vsw_ops,
341 };
342 
343 #define	LDC_ENTER_LOCK(ldcp)	\
344 				mutex_enter(&((ldcp)->ldc_cblock));\
345 				mutex_enter(&((ldcp)->ldc_rxlock));\
346 				mutex_enter(&((ldcp)->ldc_txlock));
347 #define	LDC_EXIT_LOCK(ldcp)	\
348 				mutex_exit(&((ldcp)->ldc_txlock));\
349 				mutex_exit(&((ldcp)->ldc_rxlock));\
350 				mutex_exit(&((ldcp)->ldc_cblock));
351 
352 /* Driver soft state ptr  */
353 static void	*vsw_state;
354 
355 /*
356  * Linked list of "vsw_t" structures - one per instance.
357  */
358 vsw_t		*vsw_head = NULL;
359 krwlock_t	vsw_rw;
360 
361 /*
362  * Property names
363  */
364 static char vdev_propname[] = "virtual-device";
365 static char vsw_propname[] = "virtual-network-switch";
366 static char physdev_propname[] = "vsw-phys-dev";
367 static char smode_propname[] = "vsw-switch-mode";
368 static char macaddr_propname[] = "local-mac-address";
369 static char remaddr_propname[] = "remote-mac-address";
370 static char ldcids_propname[] = "ldc-ids";
371 static char chan_propname[] = "channel-endpoint";
372 static char id_propname[] = "id";
373 static char reg_propname[] = "reg";
374 static char pri_types_propname[] = "priority-ether-types";
375 static char vsw_pvid_propname[] = "port-vlan-id";
376 static char vsw_vid_propname[] = "vlan-id";
377 static char vsw_dvid_propname[] = "default-vlan-id";
378 static char port_pvid_propname[] = "remote-port-vlan-id";
379 static char port_vid_propname[] = "remote-vlan-id";
380 static char hybrid_propname[] = "hybrid";
381 static char vsw_mtu_propname[] = "mtu";
382 static char vsw_linkprop_propname[] = "linkprop";
383 static char vsw_maxbw_propname[] = "maxbw";
384 static char port_maxbw_propname[] = "maxbw";
385 
386 /*
387  * Matching criteria passed to the MDEG to register interest
388  * in changes to 'virtual-device-port' nodes identified by their
389  * 'id' property.
390  */
391 static md_prop_match_t vport_prop_match[] = {
392 	{ MDET_PROP_VAL,    "id"   },
393 	{ MDET_LIST_END,    NULL    }
394 };
395 
396 static mdeg_node_match_t vport_match = { "virtual-device-port",
397 						vport_prop_match };
398 
399 /*
400  * Matching criteria passed to the MDEG to register interest
401  * in changes to 'virtual-device' nodes (i.e. vsw nodes) identified
402  * by their 'name' and 'cfg-handle' properties.
403  */
404 static md_prop_match_t vdev_prop_match[] = {
405 	{ MDET_PROP_STR,    "name"   },
406 	{ MDET_PROP_VAL,    "cfg-handle" },
407 	{ MDET_LIST_END,    NULL    }
408 };
409 
410 static mdeg_node_match_t vdev_match = { "virtual-device",
411 						vdev_prop_match };
412 
413 
414 /*
415  * Specification of an MD node passed to the MDEG to filter any
416  * 'vport' nodes that do not belong to the specified node. This
417  * template is copied for each vsw instance and filled in with
418  * the appropriate 'cfg-handle' value before being passed to the MDEG.
419  */
420 static mdeg_prop_spec_t vsw_prop_template[] = {
421 	{ MDET_PROP_STR,    "name",		vsw_propname },
422 	{ MDET_PROP_VAL,    "cfg-handle",	NULL	},
423 	{ MDET_LIST_END,    NULL,		NULL	}
424 };
425 
426 #define	VSW_SET_MDEG_PROP_INST(specp, val)	(specp)[1].ps_val = (val);
427 
428 #ifdef	DEBUG
429 /*
430  * Print debug messages - set to 0x1f to enable all msgs
431  * or 0x0 to turn all off.
432  */
433 int vswdbg = 0x0;
434 
435 /*
436  * debug levels:
437  * 0x01:	Function entry/exit tracing
438  * 0x02:	Internal function messages
439  * 0x04:	Verbose internal messages
440  * 0x08:	Warning messages
441  * 0x10:	Error messages
442  */
443 
444 void
445 vswdebug(vsw_t *vswp, const char *fmt, ...)
446 {
447 	char buf[512];
448 	va_list ap;
449 
450 	va_start(ap, fmt);
451 	(void) vsprintf(buf, fmt, ap);
452 	va_end(ap);
453 
454 	if (vswp == NULL)
455 		cmn_err(CE_CONT, "%s\n", buf);
456 	else
457 		cmn_err(CE_CONT, "vsw%d: %s\n", vswp->instance, buf);
458 }
459 
460 #endif	/* DEBUG */
461 
462 static struct modlinkage modlinkage = {
463 	MODREV_1,
464 	&vswmodldrv,
465 	NULL
466 };
467 
468 int
469 _init(void)
470 {
471 	int status;
472 
473 	rw_init(&vsw_rw, NULL, RW_DRIVER, NULL);
474 
475 	status = ddi_soft_state_init(&vsw_state, sizeof (vsw_t), 1);
476 	if (status != 0) {
477 		return (status);
478 	}
479 
480 	mac_init_ops(&vsw_ops, DRV_NAME);
481 	status = mod_install(&modlinkage);
482 	if (status != 0) {
483 		ddi_soft_state_fini(&vsw_state);
484 	}
485 	return (status);
486 }
487 
488 int
489 _fini(void)
490 {
491 	int status;
492 
493 	status = mod_remove(&modlinkage);
494 	if (status != 0)
495 		return (status);
496 	mac_fini_ops(&vsw_ops);
497 	ddi_soft_state_fini(&vsw_state);
498 
499 	rw_destroy(&vsw_rw);
500 
501 	return (status);
502 }
503 
504 int
505 _info(struct modinfo *modinfop)
506 {
507 	return (mod_info(&modlinkage, modinfop));
508 }
509 
510 static int
511 vsw_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
512 {
513 	vsw_t			*vswp;
514 	int			instance;
515 	char			hashname[MAXNAMELEN];
516 	char			qname[TASKQ_NAMELEN];
517 	vsw_attach_progress_t	progress = PROG_init;
518 	int			rv;
519 
520 	switch (cmd) {
521 	case DDI_ATTACH:
522 		break;
523 	case DDI_RESUME:
524 		/* nothing to do for this non-device */
525 		return (DDI_SUCCESS);
526 	case DDI_PM_RESUME:
527 	default:
528 		return (DDI_FAILURE);
529 	}
530 
531 	instance = ddi_get_instance(dip);
532 	if (ddi_soft_state_zalloc(vsw_state, instance) != DDI_SUCCESS) {
533 		DERR(NULL, "vsw%d: ddi_soft_state_zalloc failed", instance);
534 		return (DDI_FAILURE);
535 	}
536 	vswp = ddi_get_soft_state(vsw_state, instance);
537 
538 	if (vswp == NULL) {
539 		DERR(NULL, "vsw%d: ddi_get_soft_state failed", instance);
540 		goto vsw_attach_fail;
541 	}
542 
543 	vswp->dip = dip;
544 	vswp->instance = instance;
545 	vswp->phys_link_state = LINK_STATE_UNKNOWN;
546 	ddi_set_driver_private(dip, (caddr_t)vswp);
547 
548 	mutex_init(&vswp->mac_lock, NULL, MUTEX_DRIVER, NULL);
549 	mutex_init(&vswp->mca_lock, NULL, MUTEX_DRIVER, NULL);
550 	mutex_init(&vswp->sw_thr_lock, NULL, MUTEX_DRIVER, NULL);
551 	cv_init(&vswp->sw_thr_cv, NULL, CV_DRIVER, NULL);
552 	rw_init(&vswp->maccl_rwlock, NULL, RW_DRIVER, NULL);
553 	rw_init(&vswp->if_lockrw, NULL, RW_DRIVER, NULL);
554 	rw_init(&vswp->mfdbrw, NULL, RW_DRIVER, NULL);
555 	rw_init(&vswp->plist.lockrw, NULL, RW_DRIVER, NULL);
556 
557 	progress |= PROG_locks;
558 
559 	rv = vsw_read_mdprops(vswp);
560 	if (rv != 0)
561 		goto vsw_attach_fail;
562 
563 	progress |= PROG_readmd;
564 
565 	/* setup the unicast forwarding database  */
566 	(void) snprintf(hashname, MAXNAMELEN, "vsw_unicst_table-%d",
567 	    vswp->instance);
568 	D2(vswp, "creating unicast hash table (%s)...", hashname);
569 	vswp->fdb_nchains = vsw_fdb_nchains;
570 	vswp->fdb_hashp = mod_hash_create_ptrhash(hashname, vswp->fdb_nchains,
571 	    mod_hash_null_valdtor, sizeof (void *));
572 	vsw_create_vlans((void *)vswp, VSW_LOCALDEV);
573 	progress |= PROG_fdb;
574 
575 	/* setup the multicast fowarding database */
576 	(void) snprintf(hashname, MAXNAMELEN, "vsw_mcst_table-%d",
577 	    vswp->instance);
578 	D2(vswp, "creating multicast hash table %s)...", hashname);
579 	vswp->mfdb = mod_hash_create_ptrhash(hashname, vsw_fdb_nchains,
580 	    mod_hash_null_valdtor, sizeof (void *));
581 
582 	progress |= PROG_mfdb;
583 
584 	/*
585 	 * Create the taskq which will process all the VIO
586 	 * control messages.
587 	 */
588 	(void) snprintf(qname, TASKQ_NAMELEN, "vsw_taskq%d", vswp->instance);
589 	if ((vswp->taskq_p = ddi_taskq_create(vswp->dip, qname, 1,
590 	    TASKQ_DEFAULTPRI, 0)) == NULL) {
591 		cmn_err(CE_WARN, "!vsw%d: Unable to create task queue",
592 		    vswp->instance);
593 		goto vsw_attach_fail;
594 	}
595 
596 	progress |= PROG_taskq;
597 
598 	(void) snprintf(qname, TASKQ_NAMELEN, "vsw_rxp_taskq%d",
599 	    vswp->instance);
600 	if ((vswp->rxp_taskq = ddi_taskq_create(vswp->dip, qname, 1,
601 	    TASKQ_DEFAULTPRI, 0)) == NULL) {
602 		cmn_err(CE_WARN, "!vsw%d: Unable to create rxp task queue",
603 		    vswp->instance);
604 		goto vsw_attach_fail;
605 	}
606 
607 	progress |= PROG_rxp_taskq;
608 
609 	/* prevent auto-detaching */
610 	if (ddi_prop_update_int(DDI_DEV_T_NONE, vswp->dip,
611 	    DDI_NO_AUTODETACH, 1) != DDI_SUCCESS) {
612 		cmn_err(CE_NOTE, "!Unable to set \"%s\" property for "
613 		    "instance %u", DDI_NO_AUTODETACH, instance);
614 	}
615 
616 	/*
617 	 * The null switching function is set to avoid panic until
618 	 * switch mode is setup.
619 	 */
620 	vswp->vsw_switch_frame = vsw_switch_frame_nop;
621 
622 	/*
623 	 * Setup the required switching mode, based on the mdprops that we read
624 	 * earlier. We start a thread to do this, to avoid calling mac_open()
625 	 * directly from attach().
626 	 */
627 	rv = vsw_setup_switching_start(vswp);
628 	if (rv != 0) {
629 		goto vsw_attach_fail;
630 	}
631 
632 	progress |= PROG_swmode;
633 
634 	/* Register with mac layer as a provider */
635 	rv = vsw_mac_register(vswp);
636 	if (rv != 0)
637 		goto vsw_attach_fail;
638 
639 	progress |= PROG_macreg;
640 
641 	/*
642 	 * Now we have everything setup, register an interest in
643 	 * specific MD nodes.
644 	 *
645 	 * The callback is invoked in 2 cases, firstly if upon mdeg
646 	 * registration there are existing nodes which match our specified
647 	 * criteria, and secondly if the MD is changed (and again, there
648 	 * are nodes which we are interested in present within it. Note
649 	 * that our callback will be invoked even if our specified nodes
650 	 * have not actually changed).
651 	 *
652 	 */
653 	rv = vsw_mdeg_register(vswp);
654 	if (rv != 0)
655 		goto vsw_attach_fail;
656 
657 	progress |= PROG_mdreg;
658 
659 	vswp->attach_progress = progress;
660 
661 	WRITE_ENTER(&vsw_rw);
662 	vswp->next = vsw_head;
663 	vsw_head = vswp;
664 	RW_EXIT(&vsw_rw);
665 
666 	ddi_report_dev(vswp->dip);
667 	return (DDI_SUCCESS);
668 
669 vsw_attach_fail:
670 	DERR(NULL, "vsw_attach: failed");
671 
672 	vswp->attach_progress = progress;
673 	(void) vsw_unattach(vswp);
674 	ddi_soft_state_free(vsw_state, instance);
675 	return (DDI_FAILURE);
676 }
677 
678 static int
679 vsw_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
680 {
681 	vsw_t			**vswpp, *vswp;
682 	int 			instance;
683 
684 	instance = ddi_get_instance(dip);
685 	vswp = ddi_get_soft_state(vsw_state, instance);
686 
687 	if (vswp == NULL) {
688 		return (DDI_FAILURE);
689 	}
690 
691 	switch (cmd) {
692 	case DDI_DETACH:
693 		break;
694 	case DDI_SUSPEND:
695 	case DDI_PM_SUSPEND:
696 	default:
697 		return (DDI_FAILURE);
698 	}
699 
700 	D2(vswp, "detaching instance %d", instance);
701 
702 	if (vsw_unattach(vswp) != 0) {
703 		return (DDI_FAILURE);
704 	}
705 
706 	ddi_remove_minor_node(dip, NULL);
707 
708 	WRITE_ENTER(&vsw_rw);
709 	for (vswpp = &vsw_head; *vswpp; vswpp = &(*vswpp)->next) {
710 		if (*vswpp == vswp) {
711 			*vswpp = vswp->next;
712 			break;
713 		}
714 	}
715 	RW_EXIT(&vsw_rw);
716 
717 	ddi_soft_state_free(vsw_state, instance);
718 
719 	return (DDI_SUCCESS);
720 }
721 
722 /*
723  * Common routine to handle vsw_attach() failure and vsw_detach(). Note that
724  * the only reason this function could fail is if mac_unregister() fails.
725  * Otherwise, this function must ensure that all resources are freed and return
726  * success.
727  */
728 static int
729 vsw_unattach(vsw_t *vswp)
730 {
731 	vsw_attach_progress_t	progress;
732 
733 	progress = vswp->attach_progress;
734 
735 	/*
736 	 * Unregister from the gldv3 subsystem. This can fail, in particular
737 	 * if there are still any open references to this mac device; in which
738 	 * case we just return failure without continuing to detach further.
739 	 */
740 	if (progress & PROG_macreg) {
741 		if (vsw_mac_unregister(vswp) != 0) {
742 			cmn_err(CE_WARN, "!vsw%d: Unable to detach from "
743 			    "MAC layer", vswp->instance);
744 			return (1);
745 		}
746 		progress &= ~PROG_macreg;
747 	}
748 
749 	/*
750 	 * Now that we have unregistered from gldv3, we must finish all other
751 	 * steps and successfully return from this function; otherwise we will
752 	 * end up leaving the device in a broken/unusable state.
753 	 *
754 	 * If we have registered with mdeg, unregister now to stop further
755 	 * callbacks to this vsw device and/or its ports. Then, detach any
756 	 * existing ports.
757 	 */
758 	if (progress & PROG_mdreg) {
759 		vsw_mdeg_unregister(vswp);
760 		vsw_detach_ports(vswp);
761 		progress &= ~PROG_mdreg;
762 	}
763 
764 	/*
765 	 * If we have started a thread to setup the switching mode, stop it, if
766 	 * it is still running. If it has finished setting up the switching
767 	 * mode, then we need to clean up some additional things if we are
768 	 * running in L2 mode: first free up any hybrid resources; then stop
769 	 * and close the underlying physical device. Note that we would have
770 	 * already released all per mac_client resources (ucast, mcast addrs,
771 	 * hio-shares etc) as all the ports are detached and if the vsw device
772 	 * itself was in use as an interface, it has been unplumbed (otherwise
773 	 * mac_unregister() above would fail).
774 	 */
775 	if (progress & PROG_swmode) {
776 
777 		vsw_setup_switching_stop(vswp);
778 
779 		if (vswp->hio_capable == B_TRUE) {
780 			vsw_hio_cleanup(vswp);
781 			vswp->hio_capable = B_FALSE;
782 		}
783 
784 		mutex_enter(&vswp->mac_lock);
785 		vsw_mac_close(vswp);
786 		mutex_exit(&vswp->mac_lock);
787 
788 		progress &= ~PROG_swmode;
789 	}
790 
791 	/*
792 	 * We now destroy the taskq used to clean up rx mblk pools that
793 	 * couldn't be destroyed when the ports/channels were detached.
794 	 * We implicitly wait for those tasks to complete in
795 	 * ddi_taskq_destroy().
796 	 */
797 	if (progress & PROG_rxp_taskq) {
798 		ddi_taskq_destroy(vswp->rxp_taskq);
799 		progress &= ~PROG_rxp_taskq;
800 	}
801 
802 	/*
803 	 * By now any pending tasks have finished and the underlying
804 	 * ldc's have been destroyed, so its safe to delete the control
805 	 * message taskq.
806 	 */
807 	if (progress & PROG_taskq) {
808 		ddi_taskq_destroy(vswp->taskq_p);
809 		progress &= ~PROG_taskq;
810 	}
811 
812 	/* Destroy the multicast hash table */
813 	if (progress & PROG_mfdb) {
814 		mod_hash_destroy_hash(vswp->mfdb);
815 		progress &= ~PROG_mfdb;
816 	}
817 
818 	/* Destroy the vlan hash table and fdb */
819 	if (progress & PROG_fdb) {
820 		vsw_destroy_vlans(vswp, VSW_LOCALDEV);
821 		mod_hash_destroy_hash(vswp->fdb_hashp);
822 		progress &= ~PROG_fdb;
823 	}
824 
825 	if (progress & PROG_readmd) {
826 		if (VSW_PRI_ETH_DEFINED(vswp)) {
827 			kmem_free(vswp->pri_types,
828 			    sizeof (uint16_t) * vswp->pri_num_types);
829 			(void) vio_destroy_mblks(vswp->pri_tx_vmp);
830 		}
831 		progress &= ~PROG_readmd;
832 	}
833 
834 	if (progress & PROG_locks) {
835 		rw_destroy(&vswp->plist.lockrw);
836 		rw_destroy(&vswp->mfdbrw);
837 		rw_destroy(&vswp->if_lockrw);
838 		rw_destroy(&vswp->maccl_rwlock);
839 		cv_destroy(&vswp->sw_thr_cv);
840 		mutex_destroy(&vswp->sw_thr_lock);
841 		mutex_destroy(&vswp->mca_lock);
842 		mutex_destroy(&vswp->mac_lock);
843 		progress &= ~PROG_locks;
844 	}
845 
846 	vswp->attach_progress = progress;
847 
848 	return (0);
849 }
850 
851 void
852 vsw_destroy_rxpools(void *arg)
853 {
854 	vio_mblk_pool_t	*poolp = (vio_mblk_pool_t *)arg;
855 	vio_mblk_pool_t	*npoolp;
856 
857 	while (poolp != NULL) {
858 		npoolp =  poolp->nextp;
859 		while (vio_destroy_mblks(poolp) != 0) {
860 			drv_usecwait(vsw_rxpool_cleanup_delay);
861 		}
862 		poolp = npoolp;
863 	}
864 }
865 
866 /*
867  * Get the value of the "vsw-phys-dev" property in the specified
868  * node. This property is the name of the physical device that
869  * the virtual switch will use to talk to the outside world.
870  *
871  * Note it is valid for this property to be NULL (but the property
872  * itself must exist). Callers of this routine should verify that
873  * the value returned is what they expected (i.e. either NULL or non NULL).
874  *
875  * On success returns value of the property in region pointed to by
876  * the 'name' argument, and with return value of 0. Otherwise returns 1.
877  */
878 static int
879 vsw_get_md_physname(vsw_t *vswp, md_t *mdp, mde_cookie_t node, char *name)
880 {
881 	int		len = 0;
882 	int		instance;
883 	char		*physname = NULL;
884 	char		*dev;
885 	const char	*dev_name;
886 	char		myname[MAXNAMELEN];
887 
888 	dev_name = ddi_driver_name(vswp->dip);
889 	instance = ddi_get_instance(vswp->dip);
890 	(void) snprintf(myname, MAXNAMELEN, "%s%d", dev_name, instance);
891 
892 	if (md_get_prop_data(mdp, node, physdev_propname,
893 	    (uint8_t **)(&physname), &len) != 0) {
894 		cmn_err(CE_WARN, "!vsw%d: Unable to get name(s) of physical "
895 		    "device(s) from MD", vswp->instance);
896 		return (1);
897 	} else if ((strlen(physname) + 1) > LIFNAMSIZ) {
898 		cmn_err(CE_WARN, "!vsw%d: %s is too long a device name",
899 		    vswp->instance, physname);
900 		return (1);
901 	} else if (strcmp(myname, physname) == 0) {
902 		/*
903 		 * Prevent the vswitch from opening itself as the
904 		 * network device.
905 		 */
906 		cmn_err(CE_WARN, "!vsw%d: %s is an invalid device name",
907 		    vswp->instance, physname);
908 		return (1);
909 	} else {
910 		(void) strncpy(name, physname, strlen(physname) + 1);
911 		D2(vswp, "%s: using first device specified (%s)",
912 		    __func__, physname);
913 	}
914 
915 #ifdef DEBUG
916 	/*
917 	 * As a temporary measure to aid testing we check to see if there
918 	 * is a vsw.conf file present. If there is we use the value of the
919 	 * vsw_physname property in the file as the name of the physical
920 	 * device, overriding the value from the MD.
921 	 *
922 	 * There may be multiple devices listed, but for the moment
923 	 * we just use the first one.
924 	 */
925 	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vswp->dip, 0,
926 	    "vsw_physname", &dev) == DDI_PROP_SUCCESS) {
927 		if ((strlen(dev) + 1) > LIFNAMSIZ) {
928 			cmn_err(CE_WARN, "vsw%d: %s is too long a device name",
929 			    vswp->instance, dev);
930 			ddi_prop_free(dev);
931 			return (1);
932 		} else {
933 			cmn_err(CE_NOTE, "vsw%d: Using device name (%s) from "
934 			    "config file", vswp->instance, dev);
935 
936 			(void) strncpy(name, dev, strlen(dev) + 1);
937 		}
938 
939 		ddi_prop_free(dev);
940 	}
941 #endif
942 
943 	return (0);
944 }
945 
946 /*
947  * Read the 'vsw-switch-mode' property from the specified MD node.
948  *
949  * Returns 0 on success, otherwise returns 1.
950  */
951 static int
952 vsw_get_md_smodes(vsw_t *vswp, md_t *mdp, mde_cookie_t node, uint8_t *mode)
953 {
954 	int		len = 0;
955 	char		*smode = NULL;
956 	char		*curr_mode = NULL;
957 
958 	D1(vswp, "%s: enter", __func__);
959 
960 	/*
961 	 * Get the switch-mode property. The modes are listed in
962 	 * decreasing order of preference, i.e. prefered mode is
963 	 * first item in list.
964 	 */
965 	len = 0;
966 	if (md_get_prop_data(mdp, node, smode_propname,
967 	    (uint8_t **)(&smode), &len) != 0) {
968 		/*
969 		 * Unable to get switch-mode property from MD, nothing
970 		 * more we can do.
971 		 */
972 		cmn_err(CE_WARN, "!vsw%d: Unable to get switch mode property"
973 		    " from the MD", vswp->instance);
974 		return (1);
975 	}
976 
977 	curr_mode = smode;
978 	/*
979 	 * Modes of operation:
980 	 * 'switched'	 - layer 2 switching, underlying HW in
981 	 *			programmed mode.
982 	 * 'promiscuous' - layer 2 switching, underlying HW in
983 	 *			promiscuous mode.
984 	 * 'routed'	 - layer 3 (i.e. IP) routing, underlying HW
985 	 *			in non-promiscuous mode.
986 	 */
987 	while (curr_mode < (smode + len)) {
988 		D2(vswp, "%s: curr_mode = [%s]", __func__, curr_mode);
989 		if (strcmp(curr_mode, "switched") == 0) {
990 			*mode = VSW_LAYER2;
991 		} else if (strcmp(curr_mode, "promiscuous") == 0) {
992 			*mode = VSW_LAYER2 | VSW_LAYER2_PROMISC;
993 		} else if (strcmp(curr_mode, "routed") == 0) {
994 			*mode = VSW_LAYER3;
995 		} else {
996 			cmn_err(CE_WARN, "!vsw%d: Unknown switch mode %s, "
997 			    "setting to default switched mode",
998 			    vswp->instance, curr_mode);
999 			*mode = VSW_LAYER2;
1000 		}
1001 		curr_mode += strlen(curr_mode) + 1;
1002 	}
1003 
1004 	D2(vswp, "%s: %d mode", __func__, *mode);
1005 
1006 	D1(vswp, "%s: exit", __func__);
1007 
1008 	return (0);
1009 }
1010 
1011 /*
1012  * Register with the MAC layer as a network device, so we
1013  * can be plumbed if necessary.
1014  */
1015 static int
1016 vsw_mac_register(vsw_t *vswp)
1017 {
1018 	mac_register_t	*macp;
1019 	int		rv;
1020 
1021 	D1(vswp, "%s: enter", __func__);
1022 
1023 	if ((macp = mac_alloc(MAC_VERSION)) == NULL)
1024 		return (EINVAL);
1025 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1026 	macp->m_driver = vswp;
1027 	macp->m_dip = vswp->dip;
1028 	macp->m_src_addr = (uint8_t *)&vswp->if_addr;
1029 	macp->m_callbacks = &vsw_m_callbacks;
1030 	macp->m_min_sdu = 0;
1031 	macp->m_max_sdu = vswp->mtu;
1032 	macp->m_margin = VLAN_TAGSZ;
1033 	rv = mac_register(macp, &vswp->if_mh);
1034 	mac_free(macp);
1035 	if (rv != 0) {
1036 		/*
1037 		 * Treat this as a non-fatal error as we may be
1038 		 * able to operate in some other mode.
1039 		 */
1040 		cmn_err(CE_NOTE, "!vsw%d: Unable to register as "
1041 		    "a provider with MAC layer", vswp->instance);
1042 		return (rv);
1043 	}
1044 
1045 	vswp->if_state |= VSW_IF_REG;
1046 
1047 	D1(vswp, "%s: exit", __func__);
1048 
1049 	return (rv);
1050 }
1051 
1052 static int
1053 vsw_mac_unregister(vsw_t *vswp)
1054 {
1055 	int		rv = 0;
1056 
1057 	D1(vswp, "%s: enter", __func__);
1058 
1059 	WRITE_ENTER(&vswp->if_lockrw);
1060 
1061 	if (vswp->if_state & VSW_IF_REG) {
1062 		rv = mac_unregister(vswp->if_mh);
1063 		if (rv != 0) {
1064 			DWARN(vswp, "%s: unable to unregister from MAC "
1065 			    "framework", __func__);
1066 
1067 			RW_EXIT(&vswp->if_lockrw);
1068 			D1(vswp, "%s: fail exit", __func__);
1069 			return (rv);
1070 		}
1071 
1072 		/* mark i/f as down and unregistered */
1073 		vswp->if_state &= ~(VSW_IF_UP | VSW_IF_REG);
1074 	}
1075 	RW_EXIT(&vswp->if_lockrw);
1076 
1077 	D1(vswp, "%s: exit", __func__);
1078 
1079 	return (rv);
1080 }
1081 
1082 static int
1083 vsw_m_stat(void *arg, uint_t stat, uint64_t *val)
1084 {
1085 	vsw_t			*vswp = (vsw_t *)arg;
1086 
1087 	D1(vswp, "%s: enter", __func__);
1088 
1089 	mutex_enter(&vswp->mac_lock);
1090 	if (vswp->mh == NULL) {
1091 		mutex_exit(&vswp->mac_lock);
1092 		return (EINVAL);
1093 	}
1094 
1095 	/* return stats from underlying device */
1096 	*val = mac_stat_get(vswp->mh, stat);
1097 
1098 	mutex_exit(&vswp->mac_lock);
1099 
1100 	return (0);
1101 }
1102 
1103 static void
1104 vsw_m_stop(void *arg)
1105 {
1106 	vsw_t	*vswp = (vsw_t *)arg;
1107 
1108 	D1(vswp, "%s: enter", __func__);
1109 
1110 	WRITE_ENTER(&vswp->if_lockrw);
1111 	vswp->if_state &= ~VSW_IF_UP;
1112 	RW_EXIT(&vswp->if_lockrw);
1113 
1114 	/* Cleanup and close the mac client */
1115 	vsw_mac_client_cleanup(vswp, NULL, VSW_LOCALDEV);
1116 
1117 	D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state);
1118 }
1119 
1120 static int
1121 vsw_m_start(void *arg)
1122 {
1123 	int		rv;
1124 	vsw_t		*vswp = (vsw_t *)arg;
1125 
1126 	D1(vswp, "%s: enter", __func__);
1127 
1128 	WRITE_ENTER(&vswp->if_lockrw);
1129 
1130 	vswp->if_state |= VSW_IF_UP;
1131 
1132 	if (vswp->switching_setup_done == B_FALSE) {
1133 		/*
1134 		 * If the switching mode has not been setup yet, just
1135 		 * return. The unicast address will be programmed
1136 		 * after the physical device is successfully setup by the
1137 		 * timeout handler.
1138 		 */
1139 		RW_EXIT(&vswp->if_lockrw);
1140 		return (0);
1141 	}
1142 
1143 	/* if in layer2 mode, program unicast address. */
1144 	if (vswp->mh != NULL) {
1145 		/* Init a mac client and program addresses */
1146 		rv = vsw_mac_client_init(vswp, NULL, VSW_LOCALDEV);
1147 		if (rv != 0) {
1148 			cmn_err(CE_NOTE,
1149 			    "!vsw%d: failed to program interface "
1150 			    "unicast address\n", vswp->instance);
1151 		}
1152 	}
1153 
1154 	RW_EXIT(&vswp->if_lockrw);
1155 
1156 	D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state);
1157 	return (0);
1158 }
1159 
1160 /*
1161  * Change the local interface address.
1162  *
1163  * Note: we don't support this entry point. The local
1164  * mac address of the switch can only be changed via its
1165  * MD node properties.
1166  */
1167 static int
1168 vsw_m_unicst(void *arg, const uint8_t *macaddr)
1169 {
1170 	_NOTE(ARGUNUSED(arg, macaddr))
1171 
1172 	return (DDI_FAILURE);
1173 }
1174 
1175 static int
1176 vsw_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
1177 {
1178 	vsw_t		*vswp = (vsw_t *)arg;
1179 	mcst_addr_t	*mcst_p = NULL;
1180 	uint64_t	addr = 0x0;
1181 	int		i, ret = 0;
1182 
1183 	D1(vswp, "%s: enter", __func__);
1184 
1185 	/*
1186 	 * Convert address into form that can be used
1187 	 * as hash table key.
1188 	 */
1189 	for (i = 0; i < ETHERADDRL; i++) {
1190 		addr = (addr << 8) | mca[i];
1191 	}
1192 
1193 	D2(vswp, "%s: addr = 0x%llx", __func__, addr);
1194 
1195 	if (add) {
1196 		D2(vswp, "%s: adding multicast", __func__);
1197 		if (vsw_add_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) {
1198 			/*
1199 			 * Update the list of multicast addresses
1200 			 * contained within the vsw_t structure to
1201 			 * include this new one.
1202 			 */
1203 			mcst_p = kmem_zalloc(sizeof (mcst_addr_t), KM_NOSLEEP);
1204 			if (mcst_p == NULL) {
1205 				DERR(vswp, "%s unable to alloc mem", __func__);
1206 				(void) vsw_del_mcst(vswp,
1207 				    VSW_LOCALDEV, addr, NULL);
1208 				return (1);
1209 			}
1210 			mcst_p->addr = addr;
1211 			ether_copy(mca, &mcst_p->mca);
1212 
1213 			/*
1214 			 * Call into the underlying driver to program the
1215 			 * address into HW.
1216 			 */
1217 			ret = vsw_mac_multicast_add(vswp, NULL, mcst_p,
1218 			    VSW_LOCALDEV);
1219 			if (ret != 0) {
1220 				(void) vsw_del_mcst(vswp,
1221 				    VSW_LOCALDEV, addr, NULL);
1222 				kmem_free(mcst_p, sizeof (*mcst_p));
1223 				return (ret);
1224 			}
1225 
1226 			mutex_enter(&vswp->mca_lock);
1227 			mcst_p->nextp = vswp->mcap;
1228 			vswp->mcap = mcst_p;
1229 			mutex_exit(&vswp->mca_lock);
1230 		} else {
1231 			cmn_err(CE_WARN, "!vsw%d: unable to add multicast "
1232 			    "address", vswp->instance);
1233 		}
1234 		return (ret);
1235 	}
1236 
1237 	D2(vswp, "%s: removing multicast", __func__);
1238 	/*
1239 	 * Remove the address from the hash table..
1240 	 */
1241 	if (vsw_del_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) {
1242 
1243 		/*
1244 		 * ..and then from the list maintained in the
1245 		 * vsw_t structure.
1246 		 */
1247 		mcst_p = vsw_del_addr(VSW_LOCALDEV, vswp, addr);
1248 		ASSERT(mcst_p != NULL);
1249 
1250 		vsw_mac_multicast_remove(vswp, NULL, mcst_p, VSW_LOCALDEV);
1251 		kmem_free(mcst_p, sizeof (*mcst_p));
1252 	}
1253 
1254 	D1(vswp, "%s: exit", __func__);
1255 
1256 	return (0);
1257 }
1258 
1259 static int
1260 vsw_m_promisc(void *arg, boolean_t on)
1261 {
1262 	vsw_t		*vswp = (vsw_t *)arg;
1263 
1264 	D1(vswp, "%s: enter", __func__);
1265 
1266 	WRITE_ENTER(&vswp->if_lockrw);
1267 	if (on)
1268 		vswp->if_state |= VSW_IF_PROMISC;
1269 	else
1270 		vswp->if_state &= ~VSW_IF_PROMISC;
1271 	RW_EXIT(&vswp->if_lockrw);
1272 
1273 	D1(vswp, "%s: exit", __func__);
1274 
1275 	return (0);
1276 }
1277 
1278 static mblk_t *
1279 vsw_m_tx(void *arg, mblk_t *mp)
1280 {
1281 	vsw_t		*vswp = (vsw_t *)arg;
1282 
1283 	D1(vswp, "%s: enter", __func__);
1284 
1285 	mp = vsw_vlan_frame_pretag(vswp, VSW_LOCALDEV, mp);
1286 
1287 	if (mp == NULL) {
1288 		return (NULL);
1289 	}
1290 
1291 	vswp->vsw_switch_frame(vswp, mp, VSW_LOCALDEV, NULL, NULL);
1292 
1293 	D1(vswp, "%s: exit", __func__);
1294 
1295 	return (NULL);
1296 }
1297 
1298 /*
1299  * Register for machine description (MD) updates.
1300  *
1301  * Returns 0 on success, 1 on failure.
1302  */
1303 static int
1304 vsw_mdeg_register(vsw_t *vswp)
1305 {
1306 	mdeg_prop_spec_t	*pspecp;
1307 	mdeg_node_spec_t	*inst_specp;
1308 	mdeg_handle_t		mdeg_hdl, mdeg_port_hdl;
1309 	size_t			templatesz;
1310 	int			rv;
1311 
1312 	D1(vswp, "%s: enter", __func__);
1313 
1314 	/*
1315 	 * Allocate and initialize a per-instance copy
1316 	 * of the global property spec array that will
1317 	 * uniquely identify this vsw instance.
1318 	 */
1319 	templatesz = sizeof (vsw_prop_template);
1320 	pspecp = kmem_zalloc(templatesz, KM_SLEEP);
1321 
1322 	bcopy(vsw_prop_template, pspecp, templatesz);
1323 
1324 	VSW_SET_MDEG_PROP_INST(pspecp, vswp->regprop);
1325 
1326 	/* initialize the complete prop spec structure */
1327 	inst_specp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP);
1328 	inst_specp->namep = "virtual-device";
1329 	inst_specp->specp = pspecp;
1330 
1331 	D2(vswp, "%s: instance %d registering with mdeg", __func__,
1332 	    vswp->regprop);
1333 	/*
1334 	 * Register an interest in 'virtual-device' nodes with a
1335 	 * 'name' property of 'virtual-network-switch'
1336 	 */
1337 	rv = mdeg_register(inst_specp, &vdev_match, vsw_mdeg_cb,
1338 	    (void *)vswp, &mdeg_hdl);
1339 	if (rv != MDEG_SUCCESS) {
1340 		DERR(vswp, "%s: mdeg_register failed (%d) for vsw node",
1341 		    __func__, rv);
1342 		goto mdeg_reg_fail;
1343 	}
1344 
1345 	/*
1346 	 * Register an interest in 'vsw-port' nodes.
1347 	 */
1348 	rv = mdeg_register(inst_specp, &vport_match, vsw_port_mdeg_cb,
1349 	    (void *)vswp, &mdeg_port_hdl);
1350 	if (rv != MDEG_SUCCESS) {
1351 		DERR(vswp, "%s: mdeg_register failed (%d)\n", __func__, rv);
1352 		(void) mdeg_unregister(mdeg_hdl);
1353 		goto mdeg_reg_fail;
1354 	}
1355 
1356 	/* save off data that will be needed later */
1357 	vswp->inst_spec = inst_specp;
1358 	vswp->mdeg_hdl = mdeg_hdl;
1359 	vswp->mdeg_port_hdl = mdeg_port_hdl;
1360 
1361 	D1(vswp, "%s: exit", __func__);
1362 	return (0);
1363 
1364 mdeg_reg_fail:
1365 	cmn_err(CE_WARN, "!vsw%d: Unable to register MDEG callbacks",
1366 	    vswp->instance);
1367 	kmem_free(pspecp, templatesz);
1368 	kmem_free(inst_specp, sizeof (mdeg_node_spec_t));
1369 
1370 	vswp->mdeg_hdl = NULL;
1371 	vswp->mdeg_port_hdl = NULL;
1372 
1373 	return (1);
1374 }
1375 
1376 static void
1377 vsw_mdeg_unregister(vsw_t *vswp)
1378 {
1379 	D1(vswp, "vsw_mdeg_unregister: enter");
1380 
1381 	if (vswp->mdeg_hdl != NULL)
1382 		(void) mdeg_unregister(vswp->mdeg_hdl);
1383 
1384 	if (vswp->mdeg_port_hdl != NULL)
1385 		(void) mdeg_unregister(vswp->mdeg_port_hdl);
1386 
1387 	if (vswp->inst_spec != NULL) {
1388 		if (vswp->inst_spec->specp != NULL) {
1389 			(void) kmem_free(vswp->inst_spec->specp,
1390 			    sizeof (vsw_prop_template));
1391 			vswp->inst_spec->specp = NULL;
1392 		}
1393 
1394 		(void) kmem_free(vswp->inst_spec, sizeof (mdeg_node_spec_t));
1395 		vswp->inst_spec = NULL;
1396 	}
1397 
1398 	D1(vswp, "vsw_mdeg_unregister: exit");
1399 }
1400 
1401 /*
1402  * Mdeg callback invoked for the vsw node itself.
1403  */
1404 static int
1405 vsw_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1406 {
1407 	vsw_t		*vswp;
1408 	md_t		*mdp;
1409 	mde_cookie_t	node;
1410 	uint64_t	inst;
1411 	char		*node_name = NULL;
1412 
1413 	if (resp == NULL)
1414 		return (MDEG_FAILURE);
1415 
1416 	vswp = (vsw_t *)cb_argp;
1417 
1418 	D1(vswp, "%s: added %d : removed %d : curr matched %d"
1419 	    " : prev matched %d", __func__, resp->added.nelem,
1420 	    resp->removed.nelem, resp->match_curr.nelem,
1421 	    resp->match_prev.nelem);
1422 
1423 	/*
1424 	 * We get an initial callback for this node as 'added'
1425 	 * after registering with mdeg. Note that we would have
1426 	 * already gathered information about this vsw node by
1427 	 * walking MD earlier during attach (in vsw_read_mdprops()).
1428 	 * So, there is a window where the properties of this
1429 	 * node might have changed when we get this initial 'added'
1430 	 * callback. We handle this as if an update occured
1431 	 * and invoke the same function which handles updates to
1432 	 * the properties of this vsw-node if any.
1433 	 *
1434 	 * A non-zero 'match' value indicates that the MD has been
1435 	 * updated and that a virtual-network-switch node is
1436 	 * present which may or may not have been updated. It is
1437 	 * up to the clients to examine their own nodes and
1438 	 * determine if they have changed.
1439 	 */
1440 	if (resp->added.nelem != 0) {
1441 
1442 		if (resp->added.nelem != 1) {
1443 			cmn_err(CE_NOTE, "!vsw%d: number of nodes added "
1444 			    "invalid: %d\n", vswp->instance, resp->added.nelem);
1445 			return (MDEG_FAILURE);
1446 		}
1447 
1448 		mdp = resp->added.mdp;
1449 		node = resp->added.mdep[0];
1450 
1451 	} else if (resp->match_curr.nelem != 0) {
1452 
1453 		if (resp->match_curr.nelem != 1) {
1454 			cmn_err(CE_NOTE, "!vsw%d: number of nodes updated "
1455 			    "invalid: %d\n", vswp->instance,
1456 			    resp->match_curr.nelem);
1457 			return (MDEG_FAILURE);
1458 		}
1459 
1460 		mdp = resp->match_curr.mdp;
1461 		node = resp->match_curr.mdep[0];
1462 
1463 	} else {
1464 		return (MDEG_FAILURE);
1465 	}
1466 
1467 	/* Validate name and instance */
1468 	if (md_get_prop_str(mdp, node, "name", &node_name) != 0) {
1469 		DERR(vswp, "%s: unable to get node name\n",  __func__);
1470 		return (MDEG_FAILURE);
1471 	}
1472 
1473 	/* is this a virtual-network-switch? */
1474 	if (strcmp(node_name, vsw_propname) != 0) {
1475 		DERR(vswp, "%s: Invalid node name: %s\n",
1476 		    __func__, node_name);
1477 		return (MDEG_FAILURE);
1478 	}
1479 
1480 	if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) {
1481 		DERR(vswp, "%s: prop(cfg-handle) not found\n",
1482 		    __func__);
1483 		return (MDEG_FAILURE);
1484 	}
1485 
1486 	/* is this the right instance of vsw? */
1487 	if (inst != vswp->regprop) {
1488 		DERR(vswp, "%s: Invalid cfg-handle: %lx\n",
1489 		    __func__, inst);
1490 		return (MDEG_FAILURE);
1491 	}
1492 
1493 	vsw_update_md_prop(vswp, mdp, node);
1494 
1495 	return (MDEG_SUCCESS);
1496 }
1497 
1498 /*
1499  * Mdeg callback invoked for changes to the vsw-port nodes
1500  * under the vsw node.
1501  */
1502 static int
1503 vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1504 {
1505 	vsw_t		*vswp;
1506 	int		idx;
1507 	md_t		*mdp;
1508 	mde_cookie_t	node;
1509 	uint64_t	inst;
1510 	int		rv;
1511 
1512 	if ((resp == NULL) || (cb_argp == NULL))
1513 		return (MDEG_FAILURE);
1514 
1515 	vswp = (vsw_t *)cb_argp;
1516 
1517 	D2(vswp, "%s: added %d : removed %d : curr matched %d"
1518 	    " : prev matched %d", __func__, resp->added.nelem,
1519 	    resp->removed.nelem, resp->match_curr.nelem,
1520 	    resp->match_prev.nelem);
1521 
1522 	/* process added ports */
1523 	for (idx = 0; idx < resp->added.nelem; idx++) {
1524 		mdp = resp->added.mdp;
1525 		node = resp->added.mdep[idx];
1526 
1527 		D2(vswp, "%s: adding node(%d) 0x%lx", __func__, idx, node);
1528 
1529 		if ((rv = vsw_port_add(vswp, mdp, &node)) != 0) {
1530 			cmn_err(CE_WARN, "!vsw%d: Unable to add new port "
1531 			    "(0x%lx), err=%d", vswp->instance, node, rv);
1532 		}
1533 	}
1534 
1535 	/* process removed ports */
1536 	for (idx = 0; idx < resp->removed.nelem; idx++) {
1537 		mdp = resp->removed.mdp;
1538 		node = resp->removed.mdep[idx];
1539 
1540 		if (md_get_prop_val(mdp, node, id_propname, &inst)) {
1541 			DERR(vswp, "%s: prop(%s) not found in port(%d)",
1542 			    __func__, id_propname, idx);
1543 			continue;
1544 		}
1545 
1546 		D2(vswp, "%s: removing node(%d) 0x%lx", __func__, idx, node);
1547 
1548 		if (vsw_port_detach(vswp, inst) != 0) {
1549 			cmn_err(CE_WARN, "!vsw%d: Unable to remove port %ld",
1550 			    vswp->instance, inst);
1551 		}
1552 	}
1553 
1554 	for (idx = 0; idx < resp->match_curr.nelem; idx++) {
1555 		(void) vsw_port_update(vswp, resp->match_curr.mdp,
1556 		    resp->match_curr.mdep[idx],
1557 		    resp->match_prev.mdp,
1558 		    resp->match_prev.mdep[idx]);
1559 	}
1560 
1561 	D1(vswp, "%s: exit", __func__);
1562 
1563 	return (MDEG_SUCCESS);
1564 }
1565 
1566 /*
1567  * Scan the machine description for this instance of vsw
1568  * and read its properties. Called only from vsw_attach().
1569  * Returns: 0 on success, 1 on failure.
1570  */
1571 static int
1572 vsw_read_mdprops(vsw_t *vswp)
1573 {
1574 	md_t		*mdp = NULL;
1575 	mde_cookie_t	rootnode;
1576 	mde_cookie_t	*listp = NULL;
1577 	uint64_t	inst;
1578 	uint64_t	cfgh;
1579 	char		*name;
1580 	int		rv = 1;
1581 	int		num_nodes = 0;
1582 	int		num_devs = 0;
1583 	int		listsz = 0;
1584 	int		i;
1585 
1586 	/*
1587 	 * In each 'virtual-device' node in the MD there is a
1588 	 * 'cfg-handle' property which is the MD's concept of
1589 	 * an instance number (this may be completely different from
1590 	 * the device drivers instance #). OBP reads that value and
1591 	 * stores it in the 'reg' property of the appropriate node in
1592 	 * the device tree. We first read this reg property and use this
1593 	 * to compare against the 'cfg-handle' property of vsw nodes
1594 	 * in MD to get to this specific vsw instance and then read
1595 	 * other properties that we are interested in.
1596 	 * We also cache the value of 'reg' property and use it later
1597 	 * to register callbacks with mdeg (see vsw_mdeg_register())
1598 	 */
1599 	inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip,
1600 	    DDI_PROP_DONTPASS, reg_propname, -1);
1601 	if (inst == -1) {
1602 		cmn_err(CE_NOTE, "!vsw%d: Unable to read %s property from "
1603 		    "OBP device tree", vswp->instance, reg_propname);
1604 		return (rv);
1605 	}
1606 
1607 	vswp->regprop = inst;
1608 
1609 	if ((mdp = md_get_handle()) == NULL) {
1610 		DWARN(vswp, "%s: cannot init MD\n", __func__);
1611 		return (rv);
1612 	}
1613 
1614 	num_nodes = md_node_count(mdp);
1615 	ASSERT(num_nodes > 0);
1616 
1617 	listsz = num_nodes * sizeof (mde_cookie_t);
1618 	listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP);
1619 
1620 	rootnode = md_root_node(mdp);
1621 
1622 	/* search for all "virtual_device" nodes */
1623 	num_devs = md_scan_dag(mdp, rootnode,
1624 	    md_find_name(mdp, vdev_propname),
1625 	    md_find_name(mdp, "fwd"), listp);
1626 	if (num_devs <= 0) {
1627 		DWARN(vswp, "%s: invalid num_devs:%d\n", __func__, num_devs);
1628 		goto vsw_readmd_exit;
1629 	}
1630 
1631 	/*
1632 	 * Now loop through the list of virtual-devices looking for
1633 	 * devices with name "virtual-network-switch" and for each
1634 	 * such device compare its instance with what we have from
1635 	 * the 'reg' property to find the right node in MD and then
1636 	 * read all its properties.
1637 	 */
1638 	for (i = 0; i < num_devs; i++) {
1639 
1640 		if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) {
1641 			DWARN(vswp, "%s: name property not found\n",
1642 			    __func__);
1643 			goto vsw_readmd_exit;
1644 		}
1645 
1646 		/* is this a virtual-network-switch? */
1647 		if (strcmp(name, vsw_propname) != 0)
1648 			continue;
1649 
1650 		if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) {
1651 			DWARN(vswp, "%s: cfg-handle property not found\n",
1652 			    __func__);
1653 			goto vsw_readmd_exit;
1654 		}
1655 
1656 		/* is this the required instance of vsw? */
1657 		if (inst != cfgh)
1658 			continue;
1659 
1660 		/* now read all properties of this vsw instance */
1661 		rv = vsw_get_initial_md_properties(vswp, mdp, listp[i]);
1662 		break;
1663 	}
1664 
1665 vsw_readmd_exit:
1666 
1667 	kmem_free(listp, listsz);
1668 	(void) md_fini_handle(mdp);
1669 	return (rv);
1670 }
1671 
1672 /*
1673  * Read the initial start-of-day values from the specified MD node.
1674  */
1675 static int
1676 vsw_get_initial_md_properties(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
1677 {
1678 	uint64_t	macaddr = 0;
1679 
1680 	D1(vswp, "%s: enter", __func__);
1681 
1682 	if (vsw_get_md_physname(vswp, mdp, node, vswp->physname) != 0) {
1683 		return (1);
1684 	}
1685 
1686 	/* mac address for vswitch device itself */
1687 	if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) {
1688 		cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD",
1689 		    vswp->instance);
1690 		return (1);
1691 	}
1692 
1693 	vsw_save_lmacaddr(vswp, macaddr);
1694 
1695 	if (vsw_get_md_smodes(vswp, mdp, node, &vswp->smode)) {
1696 		DWARN(vswp, "%s: Unable to read %s property from MD, "
1697 		    "defaulting to 'switched' mode",
1698 		    __func__, smode_propname);
1699 
1700 		vswp->smode = VSW_LAYER2;
1701 	}
1702 
1703 	/*
1704 	 * Read the 'linkprop' property to know if this
1705 	 * vsw device wants to get physical link updates.
1706 	 */
1707 	vsw_linkprop_read(vswp, mdp, node, &vswp->pls_update);
1708 
1709 	/* read mtu */
1710 	vsw_mtu_read(vswp, mdp, node, &vswp->mtu);
1711 	if (vswp->mtu < ETHERMTU || vswp->mtu > VNET_MAX_MTU) {
1712 		vswp->mtu = ETHERMTU;
1713 	}
1714 	vswp->max_frame_size = vswp->mtu + sizeof (struct ether_header) +
1715 	    VLAN_TAGSZ;
1716 
1717 	/* read vlan id properties of this vsw instance */
1718 	vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &vswp->pvid,
1719 	    &vswp->vids, &vswp->nvids, &vswp->default_vlan_id);
1720 
1721 	/* read priority-ether-types */
1722 	vsw_read_pri_eth_types(vswp, mdp, node);
1723 
1724 	/* read bandwidth property of this vsw instance */
1725 	vsw_bandwidth_read(vswp, mdp, node, &vswp->bandwidth);
1726 
1727 	D1(vswp, "%s: exit", __func__);
1728 	return (0);
1729 }
1730 
1731 /*
1732  * Read vlan id properties of the given MD node.
1733  * Arguments:
1734  *   arg:          device argument(vsw device or a port)
1735  *   type:         type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port)
1736  *   mdp:          machine description
1737  *   node:         md node cookie
1738  *
1739  * Returns:
1740  *   pvidp:        port-vlan-id of the node
1741  *   vidspp:       list of vlan-ids of the node
1742  *   nvidsp:       # of vlan-ids in the list
1743  *   default_idp:  default-vlan-id of the node(if node is vsw device)
1744  */
1745 static void
1746 vsw_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node,
1747 	uint16_t *pvidp, vsw_vlanid_t **vidspp, uint16_t *nvidsp,
1748 	uint16_t *default_idp)
1749 {
1750 	vsw_t		*vswp;
1751 	vsw_port_t	*portp;
1752 	char		*pvid_propname;
1753 	char		*vid_propname;
1754 	uint_t		nvids = 0;
1755 	uint32_t	vids_size;
1756 	int		rv;
1757 	int		i;
1758 	uint64_t	*data;
1759 	uint64_t	val;
1760 	int		size;
1761 	int		inst;
1762 
1763 	if (type == VSW_LOCALDEV) {
1764 
1765 		vswp = (vsw_t *)arg;
1766 		pvid_propname = vsw_pvid_propname;
1767 		vid_propname = vsw_vid_propname;
1768 		inst = vswp->instance;
1769 
1770 	} else if (type == VSW_VNETPORT) {
1771 
1772 		portp = (vsw_port_t *)arg;
1773 		vswp = portp->p_vswp;
1774 		pvid_propname = port_pvid_propname;
1775 		vid_propname = port_vid_propname;
1776 		inst = portp->p_instance;
1777 
1778 	} else {
1779 		return;
1780 	}
1781 
1782 	if (type == VSW_LOCALDEV && default_idp != NULL) {
1783 		rv = md_get_prop_val(mdp, node, vsw_dvid_propname, &val);
1784 		if (rv != 0) {
1785 			DWARN(vswp, "%s: prop(%s) not found", __func__,
1786 			    vsw_dvid_propname);
1787 
1788 			*default_idp = vsw_default_vlan_id;
1789 		} else {
1790 			*default_idp = val & 0xFFF;
1791 			D2(vswp, "%s: %s(%d): (%d)\n", __func__,
1792 			    vsw_dvid_propname, inst, *default_idp);
1793 		}
1794 	}
1795 
1796 	rv = md_get_prop_val(mdp, node, pvid_propname, &val);
1797 	if (rv != 0) {
1798 		DWARN(vswp, "%s: prop(%s) not found", __func__, pvid_propname);
1799 		*pvidp = vsw_default_vlan_id;
1800 	} else {
1801 
1802 		*pvidp = val & 0xFFF;
1803 		D2(vswp, "%s: %s(%d): (%d)\n", __func__,
1804 		    pvid_propname, inst, *pvidp);
1805 	}
1806 
1807 	rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data,
1808 	    &size);
1809 	if (rv != 0) {
1810 		D2(vswp, "%s: prop(%s) not found", __func__, vid_propname);
1811 		size = 0;
1812 	} else {
1813 		size /= sizeof (uint64_t);
1814 	}
1815 	nvids = size;
1816 
1817 	if (nvids != 0) {
1818 		D2(vswp, "%s: %s(%d): ", __func__, vid_propname, inst);
1819 		vids_size = sizeof (vsw_vlanid_t) * nvids;
1820 		*vidspp = kmem_zalloc(vids_size, KM_SLEEP);
1821 		for (i = 0; i < nvids; i++) {
1822 			(*vidspp)[i].vl_vid = data[i] & 0xFFFF;
1823 			(*vidspp)[i].vl_set = B_FALSE;
1824 			D2(vswp, " %d ", (*vidspp)[i].vl_vid);
1825 		}
1826 		D2(vswp, "\n");
1827 	}
1828 
1829 	*nvidsp = nvids;
1830 }
1831 
1832 static void
1833 vsw_port_read_bandwidth(vsw_port_t *portp, md_t *mdp, mde_cookie_t node,
1834     uint64_t *bw)
1835 {
1836 	int		rv;
1837 	uint64_t	val;
1838 	vsw_t		*vswp;
1839 
1840 	vswp = portp->p_vswp;
1841 
1842 	rv = md_get_prop_val(mdp, node, port_maxbw_propname, &val);
1843 
1844 	if (rv != 0) {
1845 		*bw = 0;
1846 		D3(vswp, "%s: prop(%s) not found\n", __func__,
1847 		    port_maxbw_propname);
1848 	} else {
1849 		*bw = val;
1850 		D3(vswp, "%s: %s nodes found", __func__, port_maxbw_propname);
1851 	}
1852 }
1853 
1854 /*
1855  * This function reads "priority-ether-types" property from md. This property
1856  * is used to enable support for priority frames. Applications which need
1857  * guaranteed and timely delivery of certain high priority frames to/from
1858  * a vnet or vsw within ldoms, should configure this property by providing
1859  * the ether type(s) for which the priority facility is needed.
1860  * Normal data frames are delivered over a ldc channel using the descriptor
1861  * ring mechanism which is constrained by factors such as descriptor ring size,
1862  * the rate at which the ring is processed at the peer ldc end point, etc.
1863  * The priority mechanism provides an Out-Of-Band path to send/receive frames
1864  * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the
1865  * descriptor ring path and enables a more reliable and timely delivery of
1866  * frames to the peer.
1867  */
1868 static void
1869 vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
1870 {
1871 	int		rv;
1872 	uint16_t	*types;
1873 	uint64_t	*data;
1874 	int		size;
1875 	int		i;
1876 	size_t		mblk_sz;
1877 
1878 	rv = md_get_prop_data(mdp, node, pri_types_propname,
1879 	    (uint8_t **)&data, &size);
1880 	if (rv != 0) {
1881 		/*
1882 		 * Property may not exist if we are running pre-ldoms1.1 f/w.
1883 		 * Check if 'vsw_pri_eth_type' has been set in that case.
1884 		 */
1885 		if (vsw_pri_eth_type != 0) {
1886 			size = sizeof (vsw_pri_eth_type);
1887 			data = &vsw_pri_eth_type;
1888 		} else {
1889 			D3(vswp, "%s: prop(%s) not found", __func__,
1890 			    pri_types_propname);
1891 			size = 0;
1892 		}
1893 	}
1894 
1895 	if (size == 0) {
1896 		vswp->pri_num_types = 0;
1897 		return;
1898 	}
1899 
1900 	/*
1901 	 * we have some priority-ether-types defined;
1902 	 * allocate a table of these types and also
1903 	 * allocate a pool of mblks to transmit these
1904 	 * priority packets.
1905 	 */
1906 	size /= sizeof (uint64_t);
1907 	vswp->pri_num_types = size;
1908 	vswp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP);
1909 	for (i = 0, types = vswp->pri_types; i < size; i++) {
1910 		types[i] = data[i] & 0xFFFF;
1911 	}
1912 	mblk_sz = (VIO_PKT_DATA_HDRSIZE + ETHERMAX + 7) & ~7;
1913 	(void) vio_create_mblks(vsw_pri_tx_nmblks, mblk_sz, &vswp->pri_tx_vmp);
1914 }
1915 
1916 static void
1917 vsw_mtu_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node, uint32_t *mtu)
1918 {
1919 	int		rv;
1920 	int		inst;
1921 	uint64_t	val;
1922 	char		*mtu_propname;
1923 
1924 	mtu_propname = vsw_mtu_propname;
1925 	inst = vswp->instance;
1926 
1927 	rv = md_get_prop_val(mdp, node, mtu_propname, &val);
1928 	if (rv != 0) {
1929 		D3(vswp, "%s: prop(%s) not found", __func__, mtu_propname);
1930 		*mtu = vsw_ethermtu;
1931 	} else {
1932 
1933 		*mtu = val & 0xFFFF;
1934 		D2(vswp, "%s: %s(%d): (%d)\n", __func__,
1935 		    mtu_propname, inst, *mtu);
1936 	}
1937 }
1938 
1939 /*
1940  * Update the mtu of the vsw device. We first check if the device has been
1941  * plumbed and if so fail the mtu update. Otherwise, we continue to update the
1942  * new mtu and reset all ports to initiate handshake re-negotiation with peers
1943  * using the new mtu.
1944  */
1945 static int
1946 vsw_mtu_update(vsw_t *vswp, uint32_t mtu)
1947 {
1948 	int	rv;
1949 
1950 	WRITE_ENTER(&vswp->if_lockrw);
1951 
1952 	if (vswp->if_state & VSW_IF_UP) {
1953 
1954 		RW_EXIT(&vswp->if_lockrw);
1955 
1956 		cmn_err(CE_NOTE, "!vsw%d: Unable to process mtu update"
1957 		    " as the device is plumbed\n", vswp->instance);
1958 		return (EBUSY);
1959 
1960 	} else {
1961 
1962 		D2(vswp, "%s: curr_mtu(%d) new_mtu(%d)\n",
1963 		    __func__, vswp->mtu, mtu);
1964 
1965 		vswp->mtu = mtu;
1966 		vswp->max_frame_size = vswp->mtu +
1967 		    sizeof (struct ether_header) + VLAN_TAGSZ;
1968 
1969 		rv = mac_maxsdu_update(vswp->if_mh, mtu);
1970 		if (rv != 0) {
1971 			cmn_err(CE_NOTE,
1972 			    "!vsw%d: Unable to update mtu with mac"
1973 			    " layer\n", vswp->instance);
1974 		}
1975 
1976 		RW_EXIT(&vswp->if_lockrw);
1977 
1978 		/* Reset ports to renegotiate with the new mtu */
1979 		vsw_reset_ports(vswp);
1980 
1981 	}
1982 
1983 	return (0);
1984 }
1985 
1986 static void
1987 vsw_linkprop_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node,
1988 	boolean_t *pls)
1989 {
1990 	int		rv;
1991 	uint64_t	val;
1992 	char		*linkpropname;
1993 
1994 	linkpropname = vsw_linkprop_propname;
1995 
1996 	rv = md_get_prop_val(mdp, node, linkpropname, &val);
1997 	if (rv != 0) {
1998 		D3(vswp, "%s: prop(%s) not found", __func__, linkpropname);
1999 		*pls = B_FALSE;
2000 	} else {
2001 
2002 		*pls = (val & 0x1) ? B_TRUE : B_FALSE;
2003 		D2(vswp, "%s: %s(%d): (%d)\n", __func__, linkpropname,
2004 		    vswp->instance, *pls);
2005 	}
2006 }
2007 
2008 void
2009 vsw_mac_link_update(vsw_t *vswp, link_state_t link_state)
2010 {
2011 	READ_ENTER(&vswp->if_lockrw);
2012 
2013 	if (vswp->if_state & VSW_IF_REG) {
2014 		mac_link_update(vswp->if_mh, link_state);
2015 	}
2016 
2017 	RW_EXIT(&vswp->if_lockrw);
2018 }
2019 
2020 void
2021 vsw_physlink_state_update(vsw_t *vswp)
2022 {
2023 	if (vswp->pls_update == B_TRUE) {
2024 		vsw_mac_link_update(vswp, vswp->phys_link_state);
2025 	}
2026 	vsw_physlink_update_ports(vswp);
2027 }
2028 
2029 static void
2030 vsw_bandwidth_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node, uint64_t *bw)
2031 {
2032 	/* read the vsw bandwidth from md */
2033 	int		rv;
2034 	uint64_t	val;
2035 
2036 	rv = md_get_prop_val(mdp, node, vsw_maxbw_propname, &val);
2037 	if (rv != 0) {
2038 		*bw = 0;
2039 		D3(vswp, "%s: prop(%s) not found", __func__,
2040 		    vsw_maxbw_propname);
2041 	} else {
2042 		*bw = val;
2043 		D3(vswp, "%s: %s(%d): (%ld)\n", __func__,
2044 		    vsw_maxbw_propname, vswp->instance, *bw);
2045 	}
2046 }
2047 
2048 /*
2049  * Check to see if the relevant properties in the specified node have
2050  * changed, and if so take the appropriate action.
2051  *
2052  * If any of the properties are missing or invalid we don't take
2053  * any action, as this function should only be invoked when modifications
2054  * have been made to what we assume is a working configuration, which
2055  * we leave active.
2056  *
2057  * Note it is legal for this routine to be invoked even if none of the
2058  * properties in the port node within the MD have actually changed.
2059  */
2060 static void
2061 vsw_update_md_prop(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
2062 {
2063 	char		physname[LIFNAMSIZ];
2064 	char		drv[LIFNAMSIZ];
2065 	uint_t		ddi_instance;
2066 	uint8_t		new_smode;
2067 	int		i;
2068 	uint64_t 	macaddr = 0;
2069 	enum		{MD_init = 0x1,
2070 				MD_physname = 0x2,
2071 				MD_macaddr = 0x4,
2072 				MD_smode = 0x8,
2073 				MD_vlans = 0x10,
2074 				MD_mtu = 0x20,
2075 				MD_pls = 0x40,
2076 				MD_bw = 0x80} updated;
2077 	int		rv;
2078 	uint16_t	pvid;
2079 	vsw_vlanid_t	*vids;
2080 	uint16_t	nvids;
2081 	uint32_t	mtu;
2082 	boolean_t	pls_update;
2083 	uint64_t	maxbw;
2084 
2085 	updated = MD_init;
2086 
2087 	D1(vswp, "%s: enter", __func__);
2088 
2089 	/*
2090 	 * Check if name of physical device in MD has changed.
2091 	 */
2092 	if (vsw_get_md_physname(vswp, mdp, node, (char *)&physname) == 0) {
2093 		/*
2094 		 * Do basic sanity check on new device name/instance,
2095 		 * if its non NULL. It is valid for the device name to
2096 		 * have changed from a non NULL to a NULL value, i.e.
2097 		 * the vsw is being changed to 'routed' mode.
2098 		 */
2099 		if ((strlen(physname) != 0) &&
2100 		    (ddi_parse(physname, drv,
2101 		    &ddi_instance) != DDI_SUCCESS)) {
2102 			cmn_err(CE_WARN, "!vsw%d: physical device %s is not"
2103 			    " a valid device name/instance",
2104 			    vswp->instance, physname);
2105 			goto fail_reconf;
2106 		}
2107 
2108 		if (strcmp(physname, vswp->physname)) {
2109 			D2(vswp, "%s: device name changed from %s to %s",
2110 			    __func__, vswp->physname, physname);
2111 
2112 			updated |= MD_physname;
2113 		} else {
2114 			D2(vswp, "%s: device name unchanged at %s",
2115 			    __func__, vswp->physname);
2116 		}
2117 	} else {
2118 		cmn_err(CE_WARN, "!vsw%d: Unable to read name of physical "
2119 		    "device from updated MD.", vswp->instance);
2120 		goto fail_reconf;
2121 	}
2122 
2123 	/*
2124 	 * Check if MAC address has changed.
2125 	 */
2126 	if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) {
2127 		cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD",
2128 		    vswp->instance);
2129 		goto fail_reconf;
2130 	} else {
2131 		uint64_t maddr = macaddr;
2132 		READ_ENTER(&vswp->if_lockrw);
2133 		for (i = ETHERADDRL - 1; i >= 0; i--) {
2134 			if (vswp->if_addr.ether_addr_octet[i]
2135 			    != (macaddr & 0xFF)) {
2136 				D2(vswp, "%s: octet[%d] 0x%x != 0x%x",
2137 				    __func__, i,
2138 				    vswp->if_addr.ether_addr_octet[i],
2139 				    (macaddr & 0xFF));
2140 				updated |= MD_macaddr;
2141 				macaddr = maddr;
2142 				break;
2143 			}
2144 			macaddr >>= 8;
2145 		}
2146 		RW_EXIT(&vswp->if_lockrw);
2147 		if (updated & MD_macaddr) {
2148 			vsw_save_lmacaddr(vswp, macaddr);
2149 		}
2150 	}
2151 
2152 	/*
2153 	 * Check if switching modes have changed.
2154 	 */
2155 	if (vsw_get_md_smodes(vswp, mdp, node, &new_smode)) {
2156 		cmn_err(CE_WARN, "!vsw%d: Unable to read %s property from MD",
2157 		    vswp->instance, smode_propname);
2158 		goto fail_reconf;
2159 	} else {
2160 		if (new_smode != vswp->smode) {
2161 			D2(vswp, "%s: switching mode changed from %d to %d",
2162 			    __func__, vswp->smode, new_smode);
2163 
2164 			updated |= MD_smode;
2165 		}
2166 	}
2167 
2168 	/* Read the vlan ids */
2169 	vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &pvid, &vids,
2170 	    &nvids, NULL);
2171 
2172 	/* Determine if there are any vlan id updates */
2173 	if ((pvid != vswp->pvid) ||		/* pvid changed? */
2174 	    (nvids != vswp->nvids) ||		/* # of vids changed? */
2175 	    ((nvids != 0) && (vswp->nvids != 0) &&	/* vids changed? */
2176 	    !vsw_cmp_vids(vids, vswp->vids, nvids))) {
2177 		updated |= MD_vlans;
2178 	}
2179 
2180 	/* Read mtu */
2181 	vsw_mtu_read(vswp, mdp, node, &mtu);
2182 	if (mtu != vswp->mtu) {
2183 		if (mtu >= ETHERMTU && mtu <= VNET_MAX_MTU) {
2184 			updated |= MD_mtu;
2185 		} else {
2186 			cmn_err(CE_NOTE, "!vsw%d: Unable to process mtu update"
2187 			    " as the specified value:%d is invalid\n",
2188 			    vswp->instance, mtu);
2189 		}
2190 	}
2191 
2192 	/*
2193 	 * Read the 'linkprop' property.
2194 	 */
2195 	vsw_linkprop_read(vswp, mdp, node, &pls_update);
2196 	if (pls_update != vswp->pls_update) {
2197 		updated |= MD_pls;
2198 	}
2199 
2200 	/* Read bandwidth */
2201 	vsw_bandwidth_read(vswp, mdp, node, &maxbw);
2202 	if (maxbw != vswp->bandwidth) {
2203 		if (maxbw >= MRP_MAXBW_MINVAL || maxbw == 0) {
2204 			updated |= MD_bw;
2205 		} else {
2206 			cmn_err(CE_NOTE, "!vsw%d: Unable to process bandwidth"
2207 			    " update as the specified value:%ld is invalid\n",
2208 			    vswp->instance, maxbw);
2209 		}
2210 	}
2211 
2212 	/*
2213 	 * Now make any changes which are needed...
2214 	 */
2215 	if (updated & MD_pls) {
2216 
2217 		/* save the updated property. */
2218 		vswp->pls_update = pls_update;
2219 
2220 		if (pls_update == B_FALSE) {
2221 			/*
2222 			 * Phys link state update is now disabled for this vsw
2223 			 * interface. If we had previously reported a link-down
2224 			 * to the stack, undo that by sending a link-up.
2225 			 */
2226 			if (vswp->phys_link_state == LINK_STATE_DOWN) {
2227 				vsw_mac_link_update(vswp, LINK_STATE_UP);
2228 			}
2229 		} else {
2230 			/*
2231 			 * Phys link state update is now enabled. Send up an
2232 			 * update based on the current phys link state.
2233 			 */
2234 			if (vswp->smode & VSW_LAYER2) {
2235 				vsw_mac_link_update(vswp,
2236 				    vswp->phys_link_state);
2237 			}
2238 		}
2239 
2240 	}
2241 
2242 	if (updated & (MD_physname | MD_smode | MD_mtu)) {
2243 
2244 		/*
2245 		 * Stop any pending thread to setup switching mode.
2246 		 */
2247 		vsw_setup_switching_stop(vswp);
2248 
2249 		/* Cleanup HybridIO */
2250 		vsw_hio_cleanup(vswp);
2251 
2252 		/*
2253 		 * Remove unicst, mcst addrs of vsw interface
2254 		 * and ports from the physdev. This also closes
2255 		 * the corresponding mac clients.
2256 		 */
2257 		vsw_unset_addrs(vswp);
2258 
2259 		/*
2260 		 * Stop, detach and close the old device..
2261 		 */
2262 		mutex_enter(&vswp->mac_lock);
2263 		vsw_mac_close(vswp);
2264 		mutex_exit(&vswp->mac_lock);
2265 
2266 		/*
2267 		 * Update phys name.
2268 		 */
2269 		if (updated & MD_physname) {
2270 			cmn_err(CE_NOTE, "!vsw%d: changing from %s to %s",
2271 			    vswp->instance, vswp->physname, physname);
2272 			(void) strncpy(vswp->physname,
2273 			    physname, strlen(physname) + 1);
2274 		}
2275 
2276 		/*
2277 		 * Update array with the new switch mode values.
2278 		 */
2279 		if (updated & MD_smode) {
2280 			vswp->smode = new_smode;
2281 		}
2282 
2283 		/* Update mtu */
2284 		if (updated & MD_mtu) {
2285 			rv = vsw_mtu_update(vswp, mtu);
2286 			if (rv != 0) {
2287 				goto fail_update;
2288 			}
2289 		}
2290 
2291 		/*
2292 		 * ..and attach, start the new device.
2293 		 */
2294 		rv = vsw_setup_switching(vswp);
2295 		if (rv == EAGAIN) {
2296 			/*
2297 			 * Unable to setup switching mode.
2298 			 * As the error is EAGAIN, schedule a thread to retry
2299 			 * and return. Programming addresses of ports and
2300 			 * vsw interface will be done by the thread when the
2301 			 * switching setup completes successfully.
2302 			 */
2303 			if (vsw_setup_switching_start(vswp) != 0) {
2304 				goto fail_update;
2305 			}
2306 			return;
2307 
2308 		} else if (rv) {
2309 			goto fail_update;
2310 		}
2311 
2312 		vsw_setup_switching_post_process(vswp);
2313 	} else if (updated & MD_macaddr) {
2314 		/*
2315 		 * We enter here if only MD_macaddr is exclusively updated.
2316 		 * If MD_physname and/or MD_smode are also updated, then
2317 		 * as part of that, we would have implicitly processed
2318 		 * MD_macaddr update (above).
2319 		 */
2320 		cmn_err(CE_NOTE, "!vsw%d: changing mac address to 0x%lx",
2321 		    vswp->instance, macaddr);
2322 
2323 		READ_ENTER(&vswp->if_lockrw);
2324 		if (vswp->if_state & VSW_IF_UP) {
2325 			/* reconfigure with new address */
2326 			vsw_if_mac_reconfig(vswp, B_FALSE, 0, NULL, 0);
2327 
2328 			/*
2329 			 * Notify the MAC layer of the changed address.
2330 			 */
2331 			mac_unicst_update(vswp->if_mh,
2332 			    (uint8_t *)&vswp->if_addr);
2333 
2334 		}
2335 		RW_EXIT(&vswp->if_lockrw);
2336 
2337 	}
2338 
2339 	if (updated & MD_vlans) {
2340 		/* Remove existing vlan ids from the hash table. */
2341 		vsw_vlan_remove_ids(vswp, VSW_LOCALDEV);
2342 
2343 		if (vswp->if_state & VSW_IF_UP) {
2344 			vsw_if_mac_reconfig(vswp, B_TRUE, pvid, vids, nvids);
2345 		} else {
2346 			if (vswp->nvids != 0) {
2347 				kmem_free(vswp->vids,
2348 				    sizeof (vsw_vlanid_t) * vswp->nvids);
2349 			}
2350 			vswp->vids = vids;
2351 			vswp->nvids = nvids;
2352 			vswp->pvid = pvid;
2353 		}
2354 
2355 		/* add these new vlan ids into hash table */
2356 		vsw_vlan_add_ids(vswp, VSW_LOCALDEV);
2357 	} else {
2358 		if (nvids != 0) {
2359 			kmem_free(vids, sizeof (vsw_vlanid_t) * nvids);
2360 		}
2361 	}
2362 
2363 	if (updated & MD_bw) {
2364 		vsw_update_bandwidth(vswp, NULL, VSW_LOCALDEV, maxbw);
2365 	}
2366 
2367 	return;
2368 
2369 fail_reconf:
2370 	cmn_err(CE_WARN, "!vsw%d: configuration unchanged", vswp->instance);
2371 	return;
2372 
2373 fail_update:
2374 	cmn_err(CE_WARN, "!vsw%d: re-configuration failed",
2375 	    vswp->instance);
2376 }
2377 
2378 /*
2379  * Read the port's md properties.
2380  */
2381 static int
2382 vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp,
2383 	md_t *mdp, mde_cookie_t *node)
2384 {
2385 	uint64_t		ldc_id;
2386 	uint8_t			*addrp;
2387 	int			i, addrsz;
2388 	int			num_nodes = 0, nchan = 0;
2389 	int			listsz = 0;
2390 	mde_cookie_t		*listp = NULL;
2391 	struct ether_addr	ea;
2392 	uint64_t		macaddr;
2393 	uint64_t		inst = 0;
2394 	uint64_t		val;
2395 
2396 	if (md_get_prop_val(mdp, *node, id_propname, &inst)) {
2397 		DWARN(vswp, "%s: prop(%s) not found", __func__,
2398 		    id_propname);
2399 		return (1);
2400 	}
2401 
2402 	/*
2403 	 * Find the channel endpoint node(s) (which should be under this
2404 	 * port node) which contain the channel id(s).
2405 	 */
2406 	if ((num_nodes = md_node_count(mdp)) <= 0) {
2407 		DERR(vswp, "%s: invalid number of nodes found (%d)",
2408 		    __func__, num_nodes);
2409 		return (1);
2410 	}
2411 
2412 	D2(vswp, "%s: %d nodes found", __func__, num_nodes);
2413 
2414 	/* allocate enough space for node list */
2415 	listsz = num_nodes * sizeof (mde_cookie_t);
2416 	listp = kmem_zalloc(listsz, KM_SLEEP);
2417 
2418 	nchan = md_scan_dag(mdp, *node, md_find_name(mdp, chan_propname),
2419 	    md_find_name(mdp, "fwd"), listp);
2420 
2421 	if (nchan <= 0) {
2422 		DWARN(vswp, "%s: no %s nodes found", __func__, chan_propname);
2423 		kmem_free(listp, listsz);
2424 		return (1);
2425 	}
2426 
2427 	D2(vswp, "%s: %d %s nodes found", __func__, nchan, chan_propname);
2428 
2429 	/* use property from first node found */
2430 	if (md_get_prop_val(mdp, listp[0], id_propname, &ldc_id)) {
2431 		DWARN(vswp, "%s: prop(%s) not found\n", __func__,
2432 		    id_propname);
2433 		kmem_free(listp, listsz);
2434 		return (1);
2435 	}
2436 
2437 	/* don't need list any more */
2438 	kmem_free(listp, listsz);
2439 
2440 	D2(vswp, "%s: ldc_id 0x%llx", __func__, ldc_id);
2441 
2442 	/* read mac-address property */
2443 	if (md_get_prop_data(mdp, *node, remaddr_propname,
2444 	    &addrp, &addrsz)) {
2445 		DWARN(vswp, "%s: prop(%s) not found",
2446 		    __func__, remaddr_propname);
2447 		return (1);
2448 	}
2449 
2450 	if (addrsz < ETHERADDRL) {
2451 		DWARN(vswp, "%s: invalid address size", __func__);
2452 		return (1);
2453 	}
2454 
2455 	macaddr = *((uint64_t *)addrp);
2456 	D2(vswp, "%s: remote mac address 0x%llx", __func__, macaddr);
2457 
2458 	for (i = ETHERADDRL - 1; i >= 0; i--) {
2459 		ea.ether_addr_octet[i] = macaddr & 0xFF;
2460 		macaddr >>= 8;
2461 	}
2462 
2463 	/* now update all properties into the port */
2464 	portp->p_vswp = vswp;
2465 	portp->p_instance = inst;
2466 	portp->addr_set = B_FALSE;
2467 	ether_copy(&ea, &portp->p_macaddr);
2468 	if (nchan > VSW_PORT_MAX_LDCS) {
2469 		D2(vswp, "%s: using first of %d ldc ids",
2470 		    __func__, nchan);
2471 		nchan = VSW_PORT_MAX_LDCS;
2472 	}
2473 	portp->num_ldcs = nchan;
2474 	portp->ldc_ids =
2475 	    kmem_zalloc(sizeof (uint64_t) * nchan, KM_SLEEP);
2476 	bcopy(&ldc_id, (portp->ldc_ids), sizeof (uint64_t) * nchan);
2477 
2478 	/* read vlan id properties of this port node */
2479 	vsw_vlan_read_ids(portp, VSW_VNETPORT, mdp, *node, &portp->pvid,
2480 	    &portp->vids, &portp->nvids, NULL);
2481 
2482 	/* Check if hybrid property is present */
2483 	if (md_get_prop_val(mdp, *node, hybrid_propname, &val) == 0) {
2484 		D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname);
2485 		portp->p_hio_enabled = B_TRUE;
2486 	} else {
2487 		portp->p_hio_enabled = B_FALSE;
2488 	}
2489 	/*
2490 	 * Port hio capability determined after version
2491 	 * negotiation, i.e., when we know the peer is HybridIO capable.
2492 	 */
2493 	portp->p_hio_capable = B_FALSE;
2494 
2495 	/* Read bandwidth of this port */
2496 	vsw_port_read_bandwidth(portp, mdp, *node, &portp->p_bandwidth);
2497 
2498 	return (0);
2499 }
2500 
2501 /*
2502  * Add a new port to the system.
2503  *
2504  * Returns 0 on success, 1 on failure.
2505  */
2506 int
2507 vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node)
2508 {
2509 	vsw_port_t	*portp;
2510 	int		rv;
2511 
2512 	portp = kmem_zalloc(sizeof (vsw_port_t), KM_SLEEP);
2513 
2514 	rv = vsw_port_read_props(portp, vswp, mdp, node);
2515 	if (rv != 0) {
2516 		kmem_free(portp, sizeof (*portp));
2517 		return (1);
2518 	}
2519 
2520 	rv = vsw_port_attach(portp);
2521 	if (rv != 0) {
2522 		DERR(vswp, "%s: failed to attach port", __func__);
2523 		return (1);
2524 	}
2525 
2526 	return (0);
2527 }
2528 
2529 static int
2530 vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex,
2531 	md_t *prev_mdp, mde_cookie_t prev_mdex)
2532 {
2533 	uint64_t	cport_num;
2534 	uint64_t	pport_num;
2535 	vsw_port_list_t	*plistp;
2536 	vsw_port_t	*portp;
2537 	uint16_t	pvid;
2538 	vsw_vlanid_t	*vids;
2539 	uint16_t	nvids;
2540 	uint64_t	val;
2541 	boolean_t	hio_enabled = B_FALSE;
2542 	uint64_t	maxbw;
2543 	enum		{P_MD_init = 0x1,
2544 				P_MD_vlans = 0x2,
2545 				P_MD_hio = 0x4,
2546 				P_MD_maxbw = 0x8} updated;
2547 
2548 	updated = P_MD_init;
2549 
2550 	/*
2551 	 * For now, we get port updates only if vlan ids changed.
2552 	 * We read the port num and do some sanity check.
2553 	 */
2554 	if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) {
2555 		return (1);
2556 	}
2557 
2558 	if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) {
2559 		return (1);
2560 	}
2561 	if (cport_num != pport_num)
2562 		return (1);
2563 
2564 	plistp = &(vswp->plist);
2565 
2566 	READ_ENTER(&plistp->lockrw);
2567 
2568 	portp = vsw_lookup_port(vswp, cport_num);
2569 	if (portp == NULL) {
2570 		RW_EXIT(&plistp->lockrw);
2571 		return (1);
2572 	}
2573 
2574 	/* Read the vlan ids */
2575 	vsw_vlan_read_ids(portp, VSW_VNETPORT, curr_mdp, curr_mdex, &pvid,
2576 	    &vids, &nvids, NULL);
2577 
2578 	/* Determine if there are any vlan id updates */
2579 	if ((pvid != portp->pvid) ||		/* pvid changed? */
2580 	    (nvids != portp->nvids) ||		/* # of vids changed? */
2581 	    ((nvids != 0) && (portp->nvids != 0) &&	/* vids changed? */
2582 	    !vsw_cmp_vids(vids, portp->vids, nvids))) {
2583 		updated |= P_MD_vlans;
2584 	}
2585 
2586 	/* Check if hybrid property is present */
2587 	if (md_get_prop_val(curr_mdp, curr_mdex, hybrid_propname, &val) == 0) {
2588 		D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname);
2589 		hio_enabled = B_TRUE;
2590 	}
2591 
2592 	if (portp->p_hio_enabled != hio_enabled) {
2593 		updated |= P_MD_hio;
2594 	}
2595 
2596 	/* Check if maxbw property is present */
2597 	vsw_port_read_bandwidth(portp, curr_mdp, curr_mdex, &maxbw);
2598 	if (maxbw != portp->p_bandwidth) {
2599 		if (maxbw >= MRP_MAXBW_MINVAL || maxbw == 0) {
2600 			updated |= P_MD_maxbw;
2601 		} else {
2602 			cmn_err(CE_NOTE, "!vsw%d: Unable to process bandwidth"
2603 			    " update for port %d as the specified value:%ld"
2604 			    " is invalid\n",
2605 			    vswp->instance, portp->p_instance, maxbw);
2606 		}
2607 	}
2608 
2609 	if (updated & P_MD_vlans) {
2610 		/* Remove existing vlan ids from the hash table. */
2611 		vsw_vlan_remove_ids(portp, VSW_VNETPORT);
2612 
2613 		/* Reconfigure vlans with network device */
2614 		vsw_mac_port_reconfig_vlans(portp, pvid, vids, nvids);
2615 
2616 		/* add these new vlan ids into hash table */
2617 		vsw_vlan_add_ids(portp, VSW_VNETPORT);
2618 
2619 		/* reset the port if it is vlan unaware (ver < 1.3) */
2620 		vsw_vlan_unaware_port_reset(portp);
2621 	}
2622 
2623 	if (updated & P_MD_hio) {
2624 		vsw_hio_port_update(portp, hio_enabled);
2625 	}
2626 
2627 	if (updated & P_MD_maxbw) {
2628 		vsw_update_bandwidth(NULL, portp, VSW_VNETPORT, maxbw);
2629 	}
2630 
2631 	RW_EXIT(&plistp->lockrw);
2632 
2633 	return (0);
2634 }
2635 
2636 /*
2637  * vsw_mac_rx -- A common function to send packets to the interface.
2638  * By default this function check if the interface is UP or not, the
2639  * rest of the behaviour depends on the flags as below:
2640  *
2641  *	VSW_MACRX_PROMISC -- Check if the promisc mode set or not.
2642  *	VSW_MACRX_COPYMSG -- Make a copy of the message(s).
2643  *	VSW_MACRX_FREEMSG -- Free if the messages cannot be sent up the stack.
2644  */
2645 void
2646 vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh,
2647     mblk_t *mp, vsw_macrx_flags_t flags)
2648 {
2649 	mblk_t		*mpt;
2650 
2651 	D1(vswp, "%s:enter\n", __func__);
2652 	READ_ENTER(&vswp->if_lockrw);
2653 	/* Check if the interface is up */
2654 	if (!(vswp->if_state & VSW_IF_UP)) {
2655 		RW_EXIT(&vswp->if_lockrw);
2656 		/* Free messages only if FREEMSG flag specified */
2657 		if (flags & VSW_MACRX_FREEMSG) {
2658 			freemsgchain(mp);
2659 		}
2660 		D1(vswp, "%s:exit\n", __func__);
2661 		return;
2662 	}
2663 	/*
2664 	 * If PROMISC flag is passed, then check if
2665 	 * the interface is in the PROMISC mode.
2666 	 * If not, drop the messages.
2667 	 */
2668 	if (flags & VSW_MACRX_PROMISC) {
2669 		if (!(vswp->if_state & VSW_IF_PROMISC)) {
2670 			RW_EXIT(&vswp->if_lockrw);
2671 			/* Free messages only if FREEMSG flag specified */
2672 			if (flags & VSW_MACRX_FREEMSG) {
2673 				freemsgchain(mp);
2674 			}
2675 			D1(vswp, "%s:exit\n", __func__);
2676 			return;
2677 		}
2678 	}
2679 	RW_EXIT(&vswp->if_lockrw);
2680 	/*
2681 	 * If COPYMSG flag is passed, then make a copy
2682 	 * of the message chain and send up the copy.
2683 	 */
2684 	if (flags & VSW_MACRX_COPYMSG) {
2685 		mp = copymsgchain(mp);
2686 		if (mp == NULL) {
2687 			D1(vswp, "%s:exit\n", __func__);
2688 			return;
2689 		}
2690 	}
2691 
2692 	D2(vswp, "%s: sending up stack", __func__);
2693 
2694 	mpt = NULL;
2695 	(void) vsw_vlan_frame_untag(vswp, VSW_LOCALDEV, &mp, &mpt);
2696 	if (mp != NULL) {
2697 		mac_rx(vswp->if_mh, mrh, mp);
2698 	}
2699 	D1(vswp, "%s:exit\n", __func__);
2700 }
2701 
2702 /* copy mac address of vsw into soft state structure */
2703 static void
2704 vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr)
2705 {
2706 	int	i;
2707 
2708 	WRITE_ENTER(&vswp->if_lockrw);
2709 	for (i = ETHERADDRL - 1; i >= 0; i--) {
2710 		vswp->if_addr.ether_addr_octet[i] = macaddr & 0xFF;
2711 		macaddr >>= 8;
2712 	}
2713 	RW_EXIT(&vswp->if_lockrw);
2714 }
2715 
2716 /* Compare VLAN ids, array size expected to be same. */
2717 static boolean_t
2718 vsw_cmp_vids(vsw_vlanid_t *vids1, vsw_vlanid_t *vids2, int nvids)
2719 {
2720 	int i, j;
2721 	uint16_t vid;
2722 
2723 	for (i = 0; i < nvids; i++) {
2724 		vid = vids1[i].vl_vid;
2725 		for (j = 0; j < nvids; j++) {
2726 			if (vid == vids2[i].vl_vid)
2727 				break;
2728 		}
2729 		if (j == nvids) {
2730 			return (B_FALSE);
2731 		}
2732 	}
2733 	return (B_TRUE);
2734 }
2735