xref: /titanic_44/usr/src/uts/sun4v/io/vsw.c (revision a4aeef46cda1835da2b19f8f62b4526de6521e6c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/errno.h>
29 #include <sys/debug.h>
30 #include <sys/time.h>
31 #include <sys/sysmacros.h>
32 #include <sys/systm.h>
33 #include <sys/user.h>
34 #include <sys/stropts.h>
35 #include <sys/stream.h>
36 #include <sys/strlog.h>
37 #include <sys/strsubr.h>
38 #include <sys/cmn_err.h>
39 #include <sys/cpu.h>
40 #include <sys/kmem.h>
41 #include <sys/conf.h>
42 #include <sys/ddi.h>
43 #include <sys/sunddi.h>
44 #include <sys/ksynch.h>
45 #include <sys/stat.h>
46 #include <sys/kstat.h>
47 #include <sys/vtrace.h>
48 #include <sys/strsun.h>
49 #include <sys/dlpi.h>
50 #include <sys/ethernet.h>
51 #include <net/if.h>
52 #include <sys/varargs.h>
53 #include <sys/machsystm.h>
54 #include <sys/modctl.h>
55 #include <sys/modhash.h>
56 #include <sys/mac_provider.h>
57 #include <sys/mac_ether.h>
58 #include <sys/taskq.h>
59 #include <sys/note.h>
60 #include <sys/mach_descrip.h>
61 #include <sys/mac_provider.h>
62 #include <sys/mdeg.h>
63 #include <sys/ldc.h>
64 #include <sys/vsw_fdb.h>
65 #include <sys/vsw.h>
66 #include <sys/vio_mailbox.h>
67 #include <sys/vnet_mailbox.h>
68 #include <sys/vnet_common.h>
69 #include <sys/vio_util.h>
70 #include <sys/sdt.h>
71 #include <sys/atomic.h>
72 #include <sys/callb.h>
73 #include <sys/vlan.h>
74 
75 /*
76  * Function prototypes.
77  */
78 static	int vsw_attach(dev_info_t *, ddi_attach_cmd_t);
79 static	int vsw_detach(dev_info_t *, ddi_detach_cmd_t);
80 static	int vsw_unattach(vsw_t *vswp);
81 static	int vsw_get_md_physname(vsw_t *, md_t *, mde_cookie_t, char *);
82 static	int vsw_get_md_smodes(vsw_t *, md_t *, mde_cookie_t, uint8_t *);
83 void vsw_destroy_rxpools(void *);
84 
85 /* MDEG routines */
86 static	int vsw_mdeg_register(vsw_t *vswp);
87 static	void vsw_mdeg_unregister(vsw_t *vswp);
88 static	int vsw_mdeg_cb(void *cb_argp, mdeg_result_t *);
89 static	int vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *);
90 static	int vsw_get_initial_md_properties(vsw_t *vswp, md_t *, mde_cookie_t);
91 static	int vsw_read_mdprops(vsw_t *vswp);
92 static	void vsw_vlan_read_ids(void *arg, int type, md_t *mdp,
93 	mde_cookie_t node, uint16_t *pvidp, vsw_vlanid_t **vidspp,
94 	uint16_t *nvidsp, uint16_t *default_idp);
95 static	void vsw_port_read_bandwidth(vsw_port_t *portp, md_t *mdp,
96 	mde_cookie_t node, uint64_t *bw);
97 static	int vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp,
98 	md_t *mdp, mde_cookie_t *node);
99 static	void vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp,
100 	mde_cookie_t node);
101 static	void vsw_mtu_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node,
102 	uint32_t *mtu);
103 static	int vsw_mtu_update(vsw_t *vswp, uint32_t mtu);
104 static	void vsw_linkprop_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node,
105 	boolean_t *pls);
106 static	void vsw_bandwidth_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node,
107 	uint64_t *bw);
108 static	void vsw_update_md_prop(vsw_t *, md_t *, mde_cookie_t);
109 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr);
110 static boolean_t vsw_cmp_vids(vsw_vlanid_t *vids1,
111 	vsw_vlanid_t *vids2, int nvids);
112 
113 /* Mac driver related routines */
114 static int vsw_mac_register(vsw_t *);
115 static int vsw_mac_unregister(vsw_t *);
116 static int vsw_m_stat(void *, uint_t, uint64_t *);
117 static void vsw_m_stop(void *arg);
118 static int vsw_m_start(void *arg);
119 static int vsw_m_unicst(void *arg, const uint8_t *);
120 static int vsw_m_multicst(void *arg, boolean_t, const uint8_t *);
121 static int vsw_m_promisc(void *arg, boolean_t);
122 static mblk_t *vsw_m_tx(void *arg, mblk_t *);
123 void vsw_mac_link_update(vsw_t *vswp, link_state_t link_state);
124 void vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh,
125     mblk_t *mp, vsw_macrx_flags_t flags);
126 void vsw_physlink_state_update(vsw_t *vswp);
127 
128 /*
129  * Functions imported from other files.
130  */
131 extern void vsw_setup_switching_thread(void *arg);
132 extern int vsw_setup_switching_start(vsw_t *vswp);
133 extern void vsw_setup_switching_stop(vsw_t *vswp);
134 extern int vsw_setup_switching(vsw_t *);
135 extern void vsw_switch_frame_nop(vsw_t *vswp, mblk_t *mp, int caller,
136     vsw_port_t *port, mac_resource_handle_t mrh);
137 extern int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *);
138 extern int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *);
139 extern void vsw_del_mcst_vsw(vsw_t *);
140 extern mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr);
141 extern void vsw_detach_ports(vsw_t *vswp);
142 extern int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node);
143 extern int vsw_port_detach(vsw_t *vswp, int p_instance);
144 static int vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex,
145 	md_t *prev_mdp, mde_cookie_t prev_mdex);
146 extern	int vsw_port_attach(vsw_port_t *port);
147 extern vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance);
148 extern int vsw_mac_open(vsw_t *vswp);
149 extern void vsw_mac_close(vsw_t *vswp);
150 extern void vsw_mac_cleanup_ports(vsw_t *vswp);
151 extern void vsw_unset_addrs(vsw_t *vswp);
152 extern void vsw_setup_switching_post_process(vsw_t *vswp);
153 extern void vsw_create_vlans(void *arg, int type);
154 extern void vsw_destroy_vlans(void *arg, int type);
155 extern void vsw_vlan_add_ids(void *arg, int type);
156 extern void vsw_vlan_remove_ids(void *arg, int type);
157 extern void vsw_vlan_unaware_port_reset(vsw_port_t *portp);
158 extern uint32_t vsw_vlan_frame_untag(void *arg, int type, mblk_t **np,
159 	mblk_t **npt);
160 extern mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp);
161 extern void vsw_hio_cleanup(vsw_t *vswp);
162 extern void vsw_hio_start_ports(vsw_t *vswp);
163 extern void vsw_hio_port_update(vsw_port_t *portp, boolean_t hio_enabled);
164 extern int vsw_mac_multicast_add(vsw_t *, vsw_port_t *, mcst_addr_t *, int);
165 extern void vsw_mac_multicast_remove(vsw_t *, vsw_port_t *, mcst_addr_t *, int);
166 extern void vsw_mac_port_reconfig_vlans(vsw_port_t *portp, uint16_t new_pvid,
167     vsw_vlanid_t *new_vids, int new_nvids);
168 extern int vsw_mac_client_init(vsw_t *vswp, vsw_port_t *port, int type);
169 extern void vsw_mac_client_cleanup(vsw_t *vswp, vsw_port_t *port, int type);
170 extern void vsw_if_mac_reconfig(vsw_t *vswp, boolean_t update_vlans,
171     uint16_t new_pvid, vsw_vlanid_t *new_vids, int new_nvids);
172 extern void vsw_reset_ports(vsw_t *vswp);
173 extern void vsw_port_reset(vsw_port_t *portp);
174 extern void vsw_physlink_update_ports(vsw_t *vswp);
175 extern void vsw_update_bandwidth(vsw_t *vswp, vsw_port_t *port, int type,
176     uint64_t maxbw);
177 
178 /*
179  * Internal tunables.
180  */
181 int	vsw_num_handshakes = VNET_NUM_HANDSHAKES; /* # of handshake attempts */
182 int	vsw_wretries = 100;		/* # of write attempts */
183 int	vsw_desc_delay = 0;		/* delay in us */
184 int	vsw_read_attempts = 5;		/* # of reads of descriptor */
185 int	vsw_setup_switching_delay = 3;	/* setup sw timeout interval in sec */
186 int	vsw_mac_open_retries = 300;	/* max # of mac_open() retries */
187 					/* 300*3 = 900sec(15min) of max tmout */
188 int	vsw_ldc_tx_delay = 5;		/* delay(ticks) for tx retries */
189 int	vsw_ldc_tx_retries = 10;	/* # of ldc tx retries */
190 int	vsw_ldc_retries = 5;		/* # of ldc_close() retries */
191 int	vsw_ldc_delay = 1000;		/* 1 ms delay for ldc_close() */
192 boolean_t vsw_ldc_rxthr_enabled = B_TRUE;	/* LDC Rx thread enabled */
193 boolean_t vsw_ldc_txthr_enabled = B_TRUE;	/* LDC Tx thread enabled */
194 int	vsw_rxpool_cleanup_delay = 100000;	/* 100ms */
195 
196 
197 uint32_t	vsw_fdb_nchains = 8;	/* # of chains in fdb hash table */
198 uint32_t	vsw_vlan_nchains = 4;	/* # of chains in vlan id hash table */
199 uint32_t	vsw_ethermtu = 1500;	/* mtu of the device */
200 
201 /* delay in usec to wait for all references on a fdb entry to be dropped */
202 uint32_t vsw_fdbe_refcnt_delay = 10;
203 
204 /*
205  * Default vlan id. This is only used internally when the "default-vlan-id"
206  * property is not present in the MD device node. Therefore, this should not be
207  * used as a tunable; if this value is changed, the corresponding variable
208  * should be updated to the same value in all vnets connected to this vsw.
209  */
210 uint16_t	vsw_default_vlan_id = 1;
211 
212 /*
213  * Workaround for a version handshake bug in obp's vnet.
214  * If vsw initiates version negotiation starting from the highest version,
215  * obp sends a nack and terminates version handshake. To workaround
216  * this, we do not initiate version handshake when the channel comes up.
217  * Instead, we wait for the peer to send its version info msg and go through
218  * the version protocol exchange. If we successfully negotiate a version,
219  * before sending the ack, we send our version info msg to the peer
220  * using the <major,minor> version that we are about to ack.
221  */
222 boolean_t vsw_obp_ver_proto_workaround = B_TRUE;
223 
224 /*
225  * In the absence of "priority-ether-types" property in MD, the following
226  * internal tunable can be set to specify a single priority ethertype.
227  */
228 uint64_t vsw_pri_eth_type = 0;
229 
230 /*
231  * Number of transmit priority buffers that are preallocated per device.
232  * This number is chosen to be a small value to throttle transmission
233  * of priority packets. Note: Must be a power of 2 for vio_create_mblks().
234  */
235 uint32_t vsw_pri_tx_nmblks = 64;
236 
237 /*
238  * Number of RARP packets sent to announce macaddr to the physical switch,
239  * after vsw's physical device is changed dynamically or after a guest (client
240  * vnet) is live migrated in.
241  */
242 uint32_t vsw_publish_macaddr_count = 3;
243 
244 boolean_t vsw_hio_enabled = B_TRUE;	/* Enable/disable HybridIO */
245 int vsw_hio_max_cleanup_retries = 10;	/* Max retries for HybridIO cleanp */
246 int vsw_hio_cleanup_delay = 10000;	/* 10ms */
247 
248 /* Number of transmit descriptors -  must be power of 2 */
249 uint32_t vsw_ntxds = VSW_RING_NUM_EL;
250 
251 /*
252  * Max number of mblks received in one receive operation.
253  */
254 uint32_t vsw_chain_len = (VSW_NUM_MBLKS * 0.6);
255 
256 /*
257  * Internal tunables for receive buffer pools, that is,  the size and number of
258  * mblks for each pool. At least 3 sizes must be specified if these are used.
259  * The sizes must be specified in increasing order. Non-zero value of the first
260  * size will be used as a hint to use these values instead of the algorithm
261  * that determines the sizes based on MTU.
262  */
263 uint32_t vsw_mblk_size1 = 0;
264 uint32_t vsw_mblk_size2 = 0;
265 uint32_t vsw_mblk_size3 = 0;
266 uint32_t vsw_mblk_size4 = 0;
267 uint32_t vsw_num_mblks1 = VSW_NUM_MBLKS;	/* number of mblks for pool1 */
268 uint32_t vsw_num_mblks2 = VSW_NUM_MBLKS;	/* number of mblks for pool2 */
269 uint32_t vsw_num_mblks3 = VSW_NUM_MBLKS;	/* number of mblks for pool3 */
270 uint32_t vsw_num_mblks4 = VSW_NUM_MBLKS;	/* number of mblks for pool4 */
271 
272 /*
273  * Set this to non-zero to enable additional internal receive buffer pools
274  * based on the MTU of the device for better performance at the cost of more
275  * memory consumption. This is turned off by default, to use allocb(9F) for
276  * receive buffer allocations of sizes > 2K.
277  */
278 boolean_t vsw_jumbo_rxpools = B_FALSE;
279 
280 /*
281  * vsw_max_tx_qcount is the maximum # of packets that can be queued
282  * before the tx worker thread begins processing the queue. Its value
283  * is chosen to be 4x the default length of tx descriptor ring.
284  */
285 uint32_t vsw_max_tx_qcount = 4 * VSW_RING_NUM_EL;
286 
287 /*
288  * MAC callbacks
289  */
290 static	mac_callbacks_t	vsw_m_callbacks = {
291 	0,
292 	vsw_m_stat,
293 	vsw_m_start,
294 	vsw_m_stop,
295 	vsw_m_promisc,
296 	vsw_m_multicst,
297 	vsw_m_unicst,
298 	vsw_m_tx,
299 	NULL,
300 	NULL,
301 	NULL
302 };
303 
304 static	struct	cb_ops	vsw_cb_ops = {
305 	nulldev,			/* cb_open */
306 	nulldev,			/* cb_close */
307 	nodev,				/* cb_strategy */
308 	nodev,				/* cb_print */
309 	nodev,				/* cb_dump */
310 	nodev,				/* cb_read */
311 	nodev,				/* cb_write */
312 	nodev,				/* cb_ioctl */
313 	nodev,				/* cb_devmap */
314 	nodev,				/* cb_mmap */
315 	nodev,				/* cb_segmap */
316 	nochpoll,			/* cb_chpoll */
317 	ddi_prop_op,			/* cb_prop_op */
318 	NULL,				/* cb_stream */
319 	D_MP,				/* cb_flag */
320 	CB_REV,				/* rev */
321 	nodev,				/* int (*cb_aread)() */
322 	nodev				/* int (*cb_awrite)() */
323 };
324 
325 static	struct	dev_ops	vsw_ops = {
326 	DEVO_REV,		/* devo_rev */
327 	0,			/* devo_refcnt */
328 	NULL,			/* devo_getinfo */
329 	nulldev,		/* devo_identify */
330 	nulldev,		/* devo_probe */
331 	vsw_attach,		/* devo_attach */
332 	vsw_detach,		/* devo_detach */
333 	nodev,			/* devo_reset */
334 	&vsw_cb_ops,		/* devo_cb_ops */
335 	(struct bus_ops *)NULL,	/* devo_bus_ops */
336 	ddi_power		/* devo_power */
337 };
338 
339 extern	struct	mod_ops	mod_driverops;
340 static struct modldrv vswmodldrv = {
341 	&mod_driverops,
342 	"sun4v Virtual Switch",
343 	&vsw_ops,
344 };
345 
346 #define	LDC_ENTER_LOCK(ldcp)	\
347 				mutex_enter(&((ldcp)->ldc_cblock));\
348 				mutex_enter(&((ldcp)->ldc_rxlock));\
349 				mutex_enter(&((ldcp)->ldc_txlock));
350 #define	LDC_EXIT_LOCK(ldcp)	\
351 				mutex_exit(&((ldcp)->ldc_txlock));\
352 				mutex_exit(&((ldcp)->ldc_rxlock));\
353 				mutex_exit(&((ldcp)->ldc_cblock));
354 
355 /* Driver soft state ptr  */
356 static void	*vsw_state;
357 
358 /*
359  * Linked list of "vsw_t" structures - one per instance.
360  */
361 vsw_t		*vsw_head = NULL;
362 krwlock_t	vsw_rw;
363 
364 /*
365  * Property names
366  */
367 static char vdev_propname[] = "virtual-device";
368 static char vsw_propname[] = "virtual-network-switch";
369 static char physdev_propname[] = "vsw-phys-dev";
370 static char smode_propname[] = "vsw-switch-mode";
371 static char macaddr_propname[] = "local-mac-address";
372 static char remaddr_propname[] = "remote-mac-address";
373 static char ldcids_propname[] = "ldc-ids";
374 static char chan_propname[] = "channel-endpoint";
375 static char id_propname[] = "id";
376 static char reg_propname[] = "reg";
377 static char pri_types_propname[] = "priority-ether-types";
378 static char vsw_pvid_propname[] = "port-vlan-id";
379 static char vsw_vid_propname[] = "vlan-id";
380 static char vsw_dvid_propname[] = "default-vlan-id";
381 static char port_pvid_propname[] = "remote-port-vlan-id";
382 static char port_vid_propname[] = "remote-vlan-id";
383 static char hybrid_propname[] = "hybrid";
384 static char vsw_mtu_propname[] = "mtu";
385 static char vsw_linkprop_propname[] = "linkprop";
386 static char vsw_maxbw_propname[] = "maxbw";
387 static char port_maxbw_propname[] = "maxbw";
388 
389 /*
390  * Matching criteria passed to the MDEG to register interest
391  * in changes to 'virtual-device-port' nodes identified by their
392  * 'id' property.
393  */
394 static md_prop_match_t vport_prop_match[] = {
395 	{ MDET_PROP_VAL,    "id"   },
396 	{ MDET_LIST_END,    NULL    }
397 };
398 
399 static mdeg_node_match_t vport_match = { "virtual-device-port",
400 						vport_prop_match };
401 
402 /*
403  * Matching criteria passed to the MDEG to register interest
404  * in changes to 'virtual-device' nodes (i.e. vsw nodes) identified
405  * by their 'name' and 'cfg-handle' properties.
406  */
407 static md_prop_match_t vdev_prop_match[] = {
408 	{ MDET_PROP_STR,    "name"   },
409 	{ MDET_PROP_VAL,    "cfg-handle" },
410 	{ MDET_LIST_END,    NULL    }
411 };
412 
413 static mdeg_node_match_t vdev_match = { "virtual-device",
414 						vdev_prop_match };
415 
416 
417 /*
418  * Specification of an MD node passed to the MDEG to filter any
419  * 'vport' nodes that do not belong to the specified node. This
420  * template is copied for each vsw instance and filled in with
421  * the appropriate 'cfg-handle' value before being passed to the MDEG.
422  */
423 static mdeg_prop_spec_t vsw_prop_template[] = {
424 	{ MDET_PROP_STR,    "name",		vsw_propname },
425 	{ MDET_PROP_VAL,    "cfg-handle",	NULL	},
426 	{ MDET_LIST_END,    NULL,		NULL	}
427 };
428 
429 #define	VSW_SET_MDEG_PROP_INST(specp, val)	(specp)[1].ps_val = (val);
430 
431 #ifdef	DEBUG
432 /*
433  * Print debug messages - set to 0x1f to enable all msgs
434  * or 0x0 to turn all off.
435  */
436 int vswdbg = 0x0;
437 
438 /*
439  * debug levels:
440  * 0x01:	Function entry/exit tracing
441  * 0x02:	Internal function messages
442  * 0x04:	Verbose internal messages
443  * 0x08:	Warning messages
444  * 0x10:	Error messages
445  */
446 
447 void
448 vswdebug(vsw_t *vswp, const char *fmt, ...)
449 {
450 	char buf[512];
451 	va_list ap;
452 
453 	va_start(ap, fmt);
454 	(void) vsprintf(buf, fmt, ap);
455 	va_end(ap);
456 
457 	if (vswp == NULL)
458 		cmn_err(CE_CONT, "%s\n", buf);
459 	else
460 		cmn_err(CE_CONT, "vsw%d: %s\n", vswp->instance, buf);
461 }
462 
463 #endif	/* DEBUG */
464 
465 static struct modlinkage modlinkage = {
466 	MODREV_1,
467 	&vswmodldrv,
468 	NULL
469 };
470 
471 int
472 _init(void)
473 {
474 	int status;
475 
476 	rw_init(&vsw_rw, NULL, RW_DRIVER, NULL);
477 
478 	status = ddi_soft_state_init(&vsw_state, sizeof (vsw_t), 1);
479 	if (status != 0) {
480 		return (status);
481 	}
482 
483 	mac_init_ops(&vsw_ops, DRV_NAME);
484 	status = mod_install(&modlinkage);
485 	if (status != 0) {
486 		ddi_soft_state_fini(&vsw_state);
487 	}
488 	return (status);
489 }
490 
491 int
492 _fini(void)
493 {
494 	int status;
495 
496 	status = mod_remove(&modlinkage);
497 	if (status != 0)
498 		return (status);
499 	mac_fini_ops(&vsw_ops);
500 	ddi_soft_state_fini(&vsw_state);
501 
502 	rw_destroy(&vsw_rw);
503 
504 	return (status);
505 }
506 
507 int
508 _info(struct modinfo *modinfop)
509 {
510 	return (mod_info(&modlinkage, modinfop));
511 }
512 
513 static int
514 vsw_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
515 {
516 	vsw_t			*vswp;
517 	int			instance;
518 	char			hashname[MAXNAMELEN];
519 	char			qname[TASKQ_NAMELEN];
520 	vsw_attach_progress_t	progress = PROG_init;
521 	int			rv;
522 
523 	switch (cmd) {
524 	case DDI_ATTACH:
525 		break;
526 	case DDI_RESUME:
527 		/* nothing to do for this non-device */
528 		return (DDI_SUCCESS);
529 	case DDI_PM_RESUME:
530 	default:
531 		return (DDI_FAILURE);
532 	}
533 
534 	instance = ddi_get_instance(dip);
535 	if (ddi_soft_state_zalloc(vsw_state, instance) != DDI_SUCCESS) {
536 		DERR(NULL, "vsw%d: ddi_soft_state_zalloc failed", instance);
537 		return (DDI_FAILURE);
538 	}
539 	vswp = ddi_get_soft_state(vsw_state, instance);
540 
541 	if (vswp == NULL) {
542 		DERR(NULL, "vsw%d: ddi_get_soft_state failed", instance);
543 		goto vsw_attach_fail;
544 	}
545 
546 	vswp->dip = dip;
547 	vswp->instance = instance;
548 	vswp->phys_link_state = LINK_STATE_UNKNOWN;
549 	ddi_set_driver_private(dip, (caddr_t)vswp);
550 
551 	mutex_init(&vswp->mac_lock, NULL, MUTEX_DRIVER, NULL);
552 	mutex_init(&vswp->mca_lock, NULL, MUTEX_DRIVER, NULL);
553 	mutex_init(&vswp->sw_thr_lock, NULL, MUTEX_DRIVER, NULL);
554 	cv_init(&vswp->sw_thr_cv, NULL, CV_DRIVER, NULL);
555 	rw_init(&vswp->maccl_rwlock, NULL, RW_DRIVER, NULL);
556 	rw_init(&vswp->if_lockrw, NULL, RW_DRIVER, NULL);
557 	rw_init(&vswp->mfdbrw, NULL, RW_DRIVER, NULL);
558 	rw_init(&vswp->plist.lockrw, NULL, RW_DRIVER, NULL);
559 
560 	progress |= PROG_locks;
561 
562 	rv = vsw_read_mdprops(vswp);
563 	if (rv != 0)
564 		goto vsw_attach_fail;
565 
566 	progress |= PROG_readmd;
567 
568 	/* setup the unicast forwarding database  */
569 	(void) snprintf(hashname, MAXNAMELEN, "vsw_unicst_table-%d",
570 	    vswp->instance);
571 	D2(vswp, "creating unicast hash table (%s)...", hashname);
572 	vswp->fdb_nchains = vsw_fdb_nchains;
573 	vswp->fdb_hashp = mod_hash_create_ptrhash(hashname, vswp->fdb_nchains,
574 	    mod_hash_null_valdtor, sizeof (void *));
575 	vsw_create_vlans((void *)vswp, VSW_LOCALDEV);
576 	progress |= PROG_fdb;
577 
578 	/* setup the multicast fowarding database */
579 	(void) snprintf(hashname, MAXNAMELEN, "vsw_mcst_table-%d",
580 	    vswp->instance);
581 	D2(vswp, "creating multicast hash table %s)...", hashname);
582 	vswp->mfdb = mod_hash_create_ptrhash(hashname, vsw_fdb_nchains,
583 	    mod_hash_null_valdtor, sizeof (void *));
584 
585 	progress |= PROG_mfdb;
586 
587 	/*
588 	 * Create the taskq which will process all the VIO
589 	 * control messages.
590 	 */
591 	(void) snprintf(qname, TASKQ_NAMELEN, "vsw_taskq%d", vswp->instance);
592 	if ((vswp->taskq_p = ddi_taskq_create(vswp->dip, qname, 1,
593 	    TASKQ_DEFAULTPRI, 0)) == NULL) {
594 		cmn_err(CE_WARN, "!vsw%d: Unable to create task queue",
595 		    vswp->instance);
596 		goto vsw_attach_fail;
597 	}
598 
599 	progress |= PROG_taskq;
600 
601 	(void) snprintf(qname, TASKQ_NAMELEN, "vsw_rxp_taskq%d",
602 	    vswp->instance);
603 	if ((vswp->rxp_taskq = ddi_taskq_create(vswp->dip, qname, 1,
604 	    TASKQ_DEFAULTPRI, 0)) == NULL) {
605 		cmn_err(CE_WARN, "!vsw%d: Unable to create rxp task queue",
606 		    vswp->instance);
607 		goto vsw_attach_fail;
608 	}
609 
610 	progress |= PROG_rxp_taskq;
611 
612 	/* prevent auto-detaching */
613 	if (ddi_prop_update_int(DDI_DEV_T_NONE, vswp->dip,
614 	    DDI_NO_AUTODETACH, 1) != DDI_SUCCESS) {
615 		cmn_err(CE_NOTE, "!Unable to set \"%s\" property for "
616 		    "instance %u", DDI_NO_AUTODETACH, instance);
617 	}
618 
619 	/*
620 	 * The null switching function is set to avoid panic until
621 	 * switch mode is setup.
622 	 */
623 	vswp->vsw_switch_frame = vsw_switch_frame_nop;
624 
625 	/*
626 	 * Setup the required switching mode, based on the mdprops that we read
627 	 * earlier. We start a thread to do this, to avoid calling mac_open()
628 	 * directly from attach().
629 	 */
630 	rv = vsw_setup_switching_start(vswp);
631 	if (rv != 0) {
632 		goto vsw_attach_fail;
633 	}
634 
635 	progress |= PROG_swmode;
636 
637 	/* Register with mac layer as a provider */
638 	rv = vsw_mac_register(vswp);
639 	if (rv != 0)
640 		goto vsw_attach_fail;
641 
642 	progress |= PROG_macreg;
643 
644 	/*
645 	 * Now we have everything setup, register an interest in
646 	 * specific MD nodes.
647 	 *
648 	 * The callback is invoked in 2 cases, firstly if upon mdeg
649 	 * registration there are existing nodes which match our specified
650 	 * criteria, and secondly if the MD is changed (and again, there
651 	 * are nodes which we are interested in present within it. Note
652 	 * that our callback will be invoked even if our specified nodes
653 	 * have not actually changed).
654 	 *
655 	 */
656 	rv = vsw_mdeg_register(vswp);
657 	if (rv != 0)
658 		goto vsw_attach_fail;
659 
660 	progress |= PROG_mdreg;
661 
662 	vswp->attach_progress = progress;
663 
664 	WRITE_ENTER(&vsw_rw);
665 	vswp->next = vsw_head;
666 	vsw_head = vswp;
667 	RW_EXIT(&vsw_rw);
668 
669 	ddi_report_dev(vswp->dip);
670 	return (DDI_SUCCESS);
671 
672 vsw_attach_fail:
673 	DERR(NULL, "vsw_attach: failed");
674 
675 	vswp->attach_progress = progress;
676 	(void) vsw_unattach(vswp);
677 	ddi_soft_state_free(vsw_state, instance);
678 	return (DDI_FAILURE);
679 }
680 
681 static int
682 vsw_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
683 {
684 	vsw_t			**vswpp, *vswp;
685 	int 			instance;
686 
687 	instance = ddi_get_instance(dip);
688 	vswp = ddi_get_soft_state(vsw_state, instance);
689 
690 	if (vswp == NULL) {
691 		return (DDI_FAILURE);
692 	}
693 
694 	switch (cmd) {
695 	case DDI_DETACH:
696 		break;
697 	case DDI_SUSPEND:
698 	case DDI_PM_SUSPEND:
699 	default:
700 		return (DDI_FAILURE);
701 	}
702 
703 	D2(vswp, "detaching instance %d", instance);
704 
705 	if (vsw_unattach(vswp) != 0) {
706 		return (DDI_FAILURE);
707 	}
708 
709 	ddi_remove_minor_node(dip, NULL);
710 
711 	WRITE_ENTER(&vsw_rw);
712 	for (vswpp = &vsw_head; *vswpp; vswpp = &(*vswpp)->next) {
713 		if (*vswpp == vswp) {
714 			*vswpp = vswp->next;
715 			break;
716 		}
717 	}
718 	RW_EXIT(&vsw_rw);
719 
720 	ddi_soft_state_free(vsw_state, instance);
721 
722 	return (DDI_SUCCESS);
723 }
724 
725 /*
726  * Common routine to handle vsw_attach() failure and vsw_detach(). Note that
727  * the only reason this function could fail is if mac_unregister() fails.
728  * Otherwise, this function must ensure that all resources are freed and return
729  * success.
730  */
731 static int
732 vsw_unattach(vsw_t *vswp)
733 {
734 	vsw_attach_progress_t	progress;
735 
736 	progress = vswp->attach_progress;
737 
738 	/*
739 	 * Unregister from the gldv3 subsystem. This can fail, in particular
740 	 * if there are still any open references to this mac device; in which
741 	 * case we just return failure without continuing to detach further.
742 	 */
743 	if (progress & PROG_macreg) {
744 		if (vsw_mac_unregister(vswp) != 0) {
745 			cmn_err(CE_WARN, "!vsw%d: Unable to detach from "
746 			    "MAC layer", vswp->instance);
747 			return (1);
748 		}
749 		progress &= ~PROG_macreg;
750 	}
751 
752 	/*
753 	 * Now that we have unregistered from gldv3, we must finish all other
754 	 * steps and successfully return from this function; otherwise we will
755 	 * end up leaving the device in a broken/unusable state.
756 	 *
757 	 * If we have registered with mdeg, unregister now to stop further
758 	 * callbacks to this vsw device and/or its ports. Then, detach any
759 	 * existing ports.
760 	 */
761 	if (progress & PROG_mdreg) {
762 		vsw_mdeg_unregister(vswp);
763 		vsw_detach_ports(vswp);
764 		progress &= ~PROG_mdreg;
765 	}
766 
767 	/*
768 	 * If we have started a thread to setup the switching mode, stop it, if
769 	 * it is still running. If it has finished setting up the switching
770 	 * mode, then we need to clean up some additional things if we are
771 	 * running in L2 mode: first free up any hybrid resources; then stop
772 	 * and close the underlying physical device. Note that we would have
773 	 * already released all per mac_client resources (ucast, mcast addrs,
774 	 * hio-shares etc) as all the ports are detached and if the vsw device
775 	 * itself was in use as an interface, it has been unplumbed (otherwise
776 	 * mac_unregister() above would fail).
777 	 */
778 	if (progress & PROG_swmode) {
779 
780 		vsw_setup_switching_stop(vswp);
781 
782 		if (vswp->hio_capable == B_TRUE) {
783 			vsw_hio_cleanup(vswp);
784 			vswp->hio_capable = B_FALSE;
785 		}
786 
787 		mutex_enter(&vswp->mac_lock);
788 		vsw_mac_close(vswp);
789 		mutex_exit(&vswp->mac_lock);
790 
791 		progress &= ~PROG_swmode;
792 	}
793 
794 	/*
795 	 * We now destroy the taskq used to clean up rx mblk pools that
796 	 * couldn't be destroyed when the ports/channels were detached.
797 	 * We implicitly wait for those tasks to complete in
798 	 * ddi_taskq_destroy().
799 	 */
800 	if (progress & PROG_rxp_taskq) {
801 		ddi_taskq_destroy(vswp->rxp_taskq);
802 		progress &= ~PROG_rxp_taskq;
803 	}
804 
805 	/*
806 	 * By now any pending tasks have finished and the underlying
807 	 * ldc's have been destroyed, so its safe to delete the control
808 	 * message taskq.
809 	 */
810 	if (progress & PROG_taskq) {
811 		ddi_taskq_destroy(vswp->taskq_p);
812 		progress &= ~PROG_taskq;
813 	}
814 
815 	/* Destroy the multicast hash table */
816 	if (progress & PROG_mfdb) {
817 		mod_hash_destroy_hash(vswp->mfdb);
818 		progress &= ~PROG_mfdb;
819 	}
820 
821 	/* Destroy the vlan hash table and fdb */
822 	if (progress & PROG_fdb) {
823 		vsw_destroy_vlans(vswp, VSW_LOCALDEV);
824 		mod_hash_destroy_hash(vswp->fdb_hashp);
825 		progress &= ~PROG_fdb;
826 	}
827 
828 	if (progress & PROG_readmd) {
829 		if (VSW_PRI_ETH_DEFINED(vswp)) {
830 			kmem_free(vswp->pri_types,
831 			    sizeof (uint16_t) * vswp->pri_num_types);
832 			(void) vio_destroy_mblks(vswp->pri_tx_vmp);
833 		}
834 		progress &= ~PROG_readmd;
835 	}
836 
837 	if (progress & PROG_locks) {
838 		rw_destroy(&vswp->plist.lockrw);
839 		rw_destroy(&vswp->mfdbrw);
840 		rw_destroy(&vswp->if_lockrw);
841 		rw_destroy(&vswp->maccl_rwlock);
842 		cv_destroy(&vswp->sw_thr_cv);
843 		mutex_destroy(&vswp->sw_thr_lock);
844 		mutex_destroy(&vswp->mca_lock);
845 		mutex_destroy(&vswp->mac_lock);
846 		progress &= ~PROG_locks;
847 	}
848 
849 	vswp->attach_progress = progress;
850 
851 	return (0);
852 }
853 
854 void
855 vsw_destroy_rxpools(void *arg)
856 {
857 	vio_mblk_pool_t	*poolp = (vio_mblk_pool_t *)arg;
858 	vio_mblk_pool_t	*npoolp;
859 
860 	while (poolp != NULL) {
861 		npoolp =  poolp->nextp;
862 		while (vio_destroy_mblks(poolp) != 0) {
863 			drv_usecwait(vsw_rxpool_cleanup_delay);
864 		}
865 		poolp = npoolp;
866 	}
867 }
868 
869 /*
870  * Get the value of the "vsw-phys-dev" property in the specified
871  * node. This property is the name of the physical device that
872  * the virtual switch will use to talk to the outside world.
873  *
874  * Note it is valid for this property to be NULL (but the property
875  * itself must exist). Callers of this routine should verify that
876  * the value returned is what they expected (i.e. either NULL or non NULL).
877  *
878  * On success returns value of the property in region pointed to by
879  * the 'name' argument, and with return value of 0. Otherwise returns 1.
880  */
881 static int
882 vsw_get_md_physname(vsw_t *vswp, md_t *mdp, mde_cookie_t node, char *name)
883 {
884 	int		len = 0;
885 	int		instance;
886 	char		*physname = NULL;
887 	char		*dev;
888 	const char	*dev_name;
889 	char		myname[MAXNAMELEN];
890 
891 	dev_name = ddi_driver_name(vswp->dip);
892 	instance = ddi_get_instance(vswp->dip);
893 	(void) snprintf(myname, MAXNAMELEN, "%s%d", dev_name, instance);
894 
895 	if (md_get_prop_data(mdp, node, physdev_propname,
896 	    (uint8_t **)(&physname), &len) != 0) {
897 		cmn_err(CE_WARN, "!vsw%d: Unable to get name(s) of physical "
898 		    "device(s) from MD", vswp->instance);
899 		return (1);
900 	} else if ((strlen(physname) + 1) > LIFNAMSIZ) {
901 		cmn_err(CE_WARN, "!vsw%d: %s is too long a device name",
902 		    vswp->instance, physname);
903 		return (1);
904 	} else if (strcmp(myname, physname) == 0) {
905 		/*
906 		 * Prevent the vswitch from opening itself as the
907 		 * network device.
908 		 */
909 		cmn_err(CE_WARN, "!vsw%d: %s is an invalid device name",
910 		    vswp->instance, physname);
911 		return (1);
912 	} else {
913 		(void) strncpy(name, physname, strlen(physname) + 1);
914 		D2(vswp, "%s: using first device specified (%s)",
915 		    __func__, physname);
916 	}
917 
918 #ifdef DEBUG
919 	/*
920 	 * As a temporary measure to aid testing we check to see if there
921 	 * is a vsw.conf file present. If there is we use the value of the
922 	 * vsw_physname property in the file as the name of the physical
923 	 * device, overriding the value from the MD.
924 	 *
925 	 * There may be multiple devices listed, but for the moment
926 	 * we just use the first one.
927 	 */
928 	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vswp->dip, 0,
929 	    "vsw_physname", &dev) == DDI_PROP_SUCCESS) {
930 		if ((strlen(dev) + 1) > LIFNAMSIZ) {
931 			cmn_err(CE_WARN, "vsw%d: %s is too long a device name",
932 			    vswp->instance, dev);
933 			ddi_prop_free(dev);
934 			return (1);
935 		} else {
936 			cmn_err(CE_NOTE, "vsw%d: Using device name (%s) from "
937 			    "config file", vswp->instance, dev);
938 
939 			(void) strncpy(name, dev, strlen(dev) + 1);
940 		}
941 
942 		ddi_prop_free(dev);
943 	}
944 #endif
945 
946 	return (0);
947 }
948 
949 /*
950  * Read the 'vsw-switch-mode' property from the specified MD node.
951  *
952  * Returns 0 on success, otherwise returns 1.
953  */
954 static int
955 vsw_get_md_smodes(vsw_t *vswp, md_t *mdp, mde_cookie_t node, uint8_t *mode)
956 {
957 	int		len = 0;
958 	char		*smode = NULL;
959 	char		*curr_mode = NULL;
960 
961 	D1(vswp, "%s: enter", __func__);
962 
963 	/*
964 	 * Get the switch-mode property. The modes are listed in
965 	 * decreasing order of preference, i.e. prefered mode is
966 	 * first item in list.
967 	 */
968 	len = 0;
969 	if (md_get_prop_data(mdp, node, smode_propname,
970 	    (uint8_t **)(&smode), &len) != 0) {
971 		/*
972 		 * Unable to get switch-mode property from MD, nothing
973 		 * more we can do.
974 		 */
975 		cmn_err(CE_WARN, "!vsw%d: Unable to get switch mode property"
976 		    " from the MD", vswp->instance);
977 		return (1);
978 	}
979 
980 	curr_mode = smode;
981 	/*
982 	 * Modes of operation:
983 	 * 'switched'	 - layer 2 switching, underlying HW in
984 	 *			programmed mode.
985 	 * 'promiscuous' - layer 2 switching, underlying HW in
986 	 *			promiscuous mode.
987 	 * 'routed'	 - layer 3 (i.e. IP) routing, underlying HW
988 	 *			in non-promiscuous mode.
989 	 */
990 	while (curr_mode < (smode + len)) {
991 		D2(vswp, "%s: curr_mode = [%s]", __func__, curr_mode);
992 		if (strcmp(curr_mode, "switched") == 0) {
993 			*mode = VSW_LAYER2;
994 		} else if (strcmp(curr_mode, "promiscuous") == 0) {
995 			*mode = VSW_LAYER2 | VSW_LAYER2_PROMISC;
996 		} else if (strcmp(curr_mode, "routed") == 0) {
997 			*mode = VSW_LAYER3;
998 		} else {
999 			cmn_err(CE_WARN, "!vsw%d: Unknown switch mode %s, "
1000 			    "setting to default switched mode",
1001 			    vswp->instance, curr_mode);
1002 			*mode = VSW_LAYER2;
1003 		}
1004 		curr_mode += strlen(curr_mode) + 1;
1005 	}
1006 
1007 	D2(vswp, "%s: %d mode", __func__, *mode);
1008 
1009 	D1(vswp, "%s: exit", __func__);
1010 
1011 	return (0);
1012 }
1013 
1014 /*
1015  * Register with the MAC layer as a network device, so we
1016  * can be plumbed if necessary.
1017  */
1018 static int
1019 vsw_mac_register(vsw_t *vswp)
1020 {
1021 	mac_register_t	*macp;
1022 	int		rv;
1023 
1024 	D1(vswp, "%s: enter", __func__);
1025 
1026 	if ((macp = mac_alloc(MAC_VERSION)) == NULL)
1027 		return (EINVAL);
1028 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1029 	macp->m_driver = vswp;
1030 	macp->m_dip = vswp->dip;
1031 	macp->m_src_addr = (uint8_t *)&vswp->if_addr;
1032 	macp->m_callbacks = &vsw_m_callbacks;
1033 	macp->m_min_sdu = 0;
1034 	macp->m_max_sdu = vswp->mtu;
1035 	macp->m_margin = VLAN_TAGSZ;
1036 	rv = mac_register(macp, &vswp->if_mh);
1037 	mac_free(macp);
1038 	if (rv != 0) {
1039 		/*
1040 		 * Treat this as a non-fatal error as we may be
1041 		 * able to operate in some other mode.
1042 		 */
1043 		cmn_err(CE_NOTE, "!vsw%d: Unable to register as "
1044 		    "a provider with MAC layer", vswp->instance);
1045 		return (rv);
1046 	}
1047 
1048 	vswp->if_state |= VSW_IF_REG;
1049 
1050 	D1(vswp, "%s: exit", __func__);
1051 
1052 	return (rv);
1053 }
1054 
1055 static int
1056 vsw_mac_unregister(vsw_t *vswp)
1057 {
1058 	int		rv = 0;
1059 
1060 	D1(vswp, "%s: enter", __func__);
1061 
1062 	WRITE_ENTER(&vswp->if_lockrw);
1063 
1064 	if (vswp->if_state & VSW_IF_REG) {
1065 		rv = mac_unregister(vswp->if_mh);
1066 		if (rv != 0) {
1067 			DWARN(vswp, "%s: unable to unregister from MAC "
1068 			    "framework", __func__);
1069 
1070 			RW_EXIT(&vswp->if_lockrw);
1071 			D1(vswp, "%s: fail exit", __func__);
1072 			return (rv);
1073 		}
1074 
1075 		/* mark i/f as down and unregistered */
1076 		vswp->if_state &= ~(VSW_IF_UP | VSW_IF_REG);
1077 	}
1078 	RW_EXIT(&vswp->if_lockrw);
1079 
1080 	D1(vswp, "%s: exit", __func__);
1081 
1082 	return (rv);
1083 }
1084 
1085 static int
1086 vsw_m_stat(void *arg, uint_t stat, uint64_t *val)
1087 {
1088 	vsw_t			*vswp = (vsw_t *)arg;
1089 
1090 	D1(vswp, "%s: enter", __func__);
1091 
1092 	mutex_enter(&vswp->mac_lock);
1093 	if (vswp->mh == NULL) {
1094 		mutex_exit(&vswp->mac_lock);
1095 		return (EINVAL);
1096 	}
1097 
1098 	/* return stats from underlying device */
1099 	*val = mac_stat_get(vswp->mh, stat);
1100 
1101 	mutex_exit(&vswp->mac_lock);
1102 
1103 	return (0);
1104 }
1105 
1106 static void
1107 vsw_m_stop(void *arg)
1108 {
1109 	vsw_t	*vswp = (vsw_t *)arg;
1110 
1111 	D1(vswp, "%s: enter", __func__);
1112 
1113 	WRITE_ENTER(&vswp->if_lockrw);
1114 	vswp->if_state &= ~VSW_IF_UP;
1115 	RW_EXIT(&vswp->if_lockrw);
1116 
1117 	/* Cleanup and close the mac client */
1118 	vsw_mac_client_cleanup(vswp, NULL, VSW_LOCALDEV);
1119 
1120 	D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state);
1121 }
1122 
1123 static int
1124 vsw_m_start(void *arg)
1125 {
1126 	int		rv;
1127 	vsw_t		*vswp = (vsw_t *)arg;
1128 
1129 	D1(vswp, "%s: enter", __func__);
1130 
1131 	WRITE_ENTER(&vswp->if_lockrw);
1132 
1133 	vswp->if_state |= VSW_IF_UP;
1134 
1135 	if (vswp->switching_setup_done == B_FALSE) {
1136 		/*
1137 		 * If the switching mode has not been setup yet, just
1138 		 * return. The unicast address will be programmed
1139 		 * after the physical device is successfully setup by the
1140 		 * timeout handler.
1141 		 */
1142 		RW_EXIT(&vswp->if_lockrw);
1143 		return (0);
1144 	}
1145 
1146 	/* if in layer2 mode, program unicast address. */
1147 	if (vswp->mh != NULL) {
1148 		/* Init a mac client and program addresses */
1149 		rv = vsw_mac_client_init(vswp, NULL, VSW_LOCALDEV);
1150 		if (rv != 0) {
1151 			cmn_err(CE_NOTE,
1152 			    "!vsw%d: failed to program interface "
1153 			    "unicast address\n", vswp->instance);
1154 		}
1155 	}
1156 
1157 	RW_EXIT(&vswp->if_lockrw);
1158 
1159 	D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state);
1160 	return (0);
1161 }
1162 
1163 /*
1164  * Change the local interface address.
1165  *
1166  * Note: we don't support this entry point. The local
1167  * mac address of the switch can only be changed via its
1168  * MD node properties.
1169  */
1170 static int
1171 vsw_m_unicst(void *arg, const uint8_t *macaddr)
1172 {
1173 	_NOTE(ARGUNUSED(arg, macaddr))
1174 
1175 	return (DDI_FAILURE);
1176 }
1177 
1178 static int
1179 vsw_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
1180 {
1181 	vsw_t		*vswp = (vsw_t *)arg;
1182 	mcst_addr_t	*mcst_p = NULL;
1183 	uint64_t	addr = 0x0;
1184 	int		i, ret = 0;
1185 
1186 	D1(vswp, "%s: enter", __func__);
1187 
1188 	/*
1189 	 * Convert address into form that can be used
1190 	 * as hash table key.
1191 	 */
1192 	for (i = 0; i < ETHERADDRL; i++) {
1193 		addr = (addr << 8) | mca[i];
1194 	}
1195 
1196 	D2(vswp, "%s: addr = 0x%llx", __func__, addr);
1197 
1198 	if (add) {
1199 		D2(vswp, "%s: adding multicast", __func__);
1200 		if (vsw_add_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) {
1201 			/*
1202 			 * Update the list of multicast addresses
1203 			 * contained within the vsw_t structure to
1204 			 * include this new one.
1205 			 */
1206 			mcst_p = kmem_zalloc(sizeof (mcst_addr_t), KM_NOSLEEP);
1207 			if (mcst_p == NULL) {
1208 				DERR(vswp, "%s unable to alloc mem", __func__);
1209 				(void) vsw_del_mcst(vswp,
1210 				    VSW_LOCALDEV, addr, NULL);
1211 				return (1);
1212 			}
1213 			mcst_p->addr = addr;
1214 			ether_copy(mca, &mcst_p->mca);
1215 
1216 			/*
1217 			 * Call into the underlying driver to program the
1218 			 * address into HW.
1219 			 */
1220 			ret = vsw_mac_multicast_add(vswp, NULL, mcst_p,
1221 			    VSW_LOCALDEV);
1222 			if (ret != 0) {
1223 				(void) vsw_del_mcst(vswp,
1224 				    VSW_LOCALDEV, addr, NULL);
1225 				kmem_free(mcst_p, sizeof (*mcst_p));
1226 				return (ret);
1227 			}
1228 
1229 			mutex_enter(&vswp->mca_lock);
1230 			mcst_p->nextp = vswp->mcap;
1231 			vswp->mcap = mcst_p;
1232 			mutex_exit(&vswp->mca_lock);
1233 		} else {
1234 			cmn_err(CE_WARN, "!vsw%d: unable to add multicast "
1235 			    "address", vswp->instance);
1236 		}
1237 		return (ret);
1238 	}
1239 
1240 	D2(vswp, "%s: removing multicast", __func__);
1241 	/*
1242 	 * Remove the address from the hash table..
1243 	 */
1244 	if (vsw_del_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) {
1245 
1246 		/*
1247 		 * ..and then from the list maintained in the
1248 		 * vsw_t structure.
1249 		 */
1250 		mcst_p = vsw_del_addr(VSW_LOCALDEV, vswp, addr);
1251 		ASSERT(mcst_p != NULL);
1252 
1253 		vsw_mac_multicast_remove(vswp, NULL, mcst_p, VSW_LOCALDEV);
1254 		kmem_free(mcst_p, sizeof (*mcst_p));
1255 	}
1256 
1257 	D1(vswp, "%s: exit", __func__);
1258 
1259 	return (0);
1260 }
1261 
1262 static int
1263 vsw_m_promisc(void *arg, boolean_t on)
1264 {
1265 	vsw_t		*vswp = (vsw_t *)arg;
1266 
1267 	D1(vswp, "%s: enter", __func__);
1268 
1269 	WRITE_ENTER(&vswp->if_lockrw);
1270 	if (on)
1271 		vswp->if_state |= VSW_IF_PROMISC;
1272 	else
1273 		vswp->if_state &= ~VSW_IF_PROMISC;
1274 	RW_EXIT(&vswp->if_lockrw);
1275 
1276 	D1(vswp, "%s: exit", __func__);
1277 
1278 	return (0);
1279 }
1280 
1281 static mblk_t *
1282 vsw_m_tx(void *arg, mblk_t *mp)
1283 {
1284 	vsw_t		*vswp = (vsw_t *)arg;
1285 
1286 	D1(vswp, "%s: enter", __func__);
1287 
1288 	mp = vsw_vlan_frame_pretag(vswp, VSW_LOCALDEV, mp);
1289 
1290 	if (mp == NULL) {
1291 		return (NULL);
1292 	}
1293 
1294 	vswp->vsw_switch_frame(vswp, mp, VSW_LOCALDEV, NULL, NULL);
1295 
1296 	D1(vswp, "%s: exit", __func__);
1297 
1298 	return (NULL);
1299 }
1300 
1301 /*
1302  * Register for machine description (MD) updates.
1303  *
1304  * Returns 0 on success, 1 on failure.
1305  */
1306 static int
1307 vsw_mdeg_register(vsw_t *vswp)
1308 {
1309 	mdeg_prop_spec_t	*pspecp;
1310 	mdeg_node_spec_t	*inst_specp;
1311 	mdeg_handle_t		mdeg_hdl, mdeg_port_hdl;
1312 	size_t			templatesz;
1313 	int			rv;
1314 
1315 	D1(vswp, "%s: enter", __func__);
1316 
1317 	/*
1318 	 * Allocate and initialize a per-instance copy
1319 	 * of the global property spec array that will
1320 	 * uniquely identify this vsw instance.
1321 	 */
1322 	templatesz = sizeof (vsw_prop_template);
1323 	pspecp = kmem_zalloc(templatesz, KM_SLEEP);
1324 
1325 	bcopy(vsw_prop_template, pspecp, templatesz);
1326 
1327 	VSW_SET_MDEG_PROP_INST(pspecp, vswp->regprop);
1328 
1329 	/* initialize the complete prop spec structure */
1330 	inst_specp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP);
1331 	inst_specp->namep = "virtual-device";
1332 	inst_specp->specp = pspecp;
1333 
1334 	D2(vswp, "%s: instance %d registering with mdeg", __func__,
1335 	    vswp->regprop);
1336 	/*
1337 	 * Register an interest in 'virtual-device' nodes with a
1338 	 * 'name' property of 'virtual-network-switch'
1339 	 */
1340 	rv = mdeg_register(inst_specp, &vdev_match, vsw_mdeg_cb,
1341 	    (void *)vswp, &mdeg_hdl);
1342 	if (rv != MDEG_SUCCESS) {
1343 		DERR(vswp, "%s: mdeg_register failed (%d) for vsw node",
1344 		    __func__, rv);
1345 		goto mdeg_reg_fail;
1346 	}
1347 
1348 	/*
1349 	 * Register an interest in 'vsw-port' nodes.
1350 	 */
1351 	rv = mdeg_register(inst_specp, &vport_match, vsw_port_mdeg_cb,
1352 	    (void *)vswp, &mdeg_port_hdl);
1353 	if (rv != MDEG_SUCCESS) {
1354 		DERR(vswp, "%s: mdeg_register failed (%d)\n", __func__, rv);
1355 		(void) mdeg_unregister(mdeg_hdl);
1356 		goto mdeg_reg_fail;
1357 	}
1358 
1359 	/* save off data that will be needed later */
1360 	vswp->inst_spec = inst_specp;
1361 	vswp->mdeg_hdl = mdeg_hdl;
1362 	vswp->mdeg_port_hdl = mdeg_port_hdl;
1363 
1364 	D1(vswp, "%s: exit", __func__);
1365 	return (0);
1366 
1367 mdeg_reg_fail:
1368 	cmn_err(CE_WARN, "!vsw%d: Unable to register MDEG callbacks",
1369 	    vswp->instance);
1370 	kmem_free(pspecp, templatesz);
1371 	kmem_free(inst_specp, sizeof (mdeg_node_spec_t));
1372 
1373 	vswp->mdeg_hdl = NULL;
1374 	vswp->mdeg_port_hdl = NULL;
1375 
1376 	return (1);
1377 }
1378 
1379 static void
1380 vsw_mdeg_unregister(vsw_t *vswp)
1381 {
1382 	D1(vswp, "vsw_mdeg_unregister: enter");
1383 
1384 	if (vswp->mdeg_hdl != NULL)
1385 		(void) mdeg_unregister(vswp->mdeg_hdl);
1386 
1387 	if (vswp->mdeg_port_hdl != NULL)
1388 		(void) mdeg_unregister(vswp->mdeg_port_hdl);
1389 
1390 	if (vswp->inst_spec != NULL) {
1391 		if (vswp->inst_spec->specp != NULL) {
1392 			(void) kmem_free(vswp->inst_spec->specp,
1393 			    sizeof (vsw_prop_template));
1394 			vswp->inst_spec->specp = NULL;
1395 		}
1396 
1397 		(void) kmem_free(vswp->inst_spec, sizeof (mdeg_node_spec_t));
1398 		vswp->inst_spec = NULL;
1399 	}
1400 
1401 	D1(vswp, "vsw_mdeg_unregister: exit");
1402 }
1403 
1404 /*
1405  * Mdeg callback invoked for the vsw node itself.
1406  */
1407 static int
1408 vsw_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1409 {
1410 	vsw_t		*vswp;
1411 	md_t		*mdp;
1412 	mde_cookie_t	node;
1413 	uint64_t	inst;
1414 	char		*node_name = NULL;
1415 
1416 	if (resp == NULL)
1417 		return (MDEG_FAILURE);
1418 
1419 	vswp = (vsw_t *)cb_argp;
1420 
1421 	D1(vswp, "%s: added %d : removed %d : curr matched %d"
1422 	    " : prev matched %d", __func__, resp->added.nelem,
1423 	    resp->removed.nelem, resp->match_curr.nelem,
1424 	    resp->match_prev.nelem);
1425 
1426 	/*
1427 	 * We get an initial callback for this node as 'added'
1428 	 * after registering with mdeg. Note that we would have
1429 	 * already gathered information about this vsw node by
1430 	 * walking MD earlier during attach (in vsw_read_mdprops()).
1431 	 * So, there is a window where the properties of this
1432 	 * node might have changed when we get this initial 'added'
1433 	 * callback. We handle this as if an update occured
1434 	 * and invoke the same function which handles updates to
1435 	 * the properties of this vsw-node if any.
1436 	 *
1437 	 * A non-zero 'match' value indicates that the MD has been
1438 	 * updated and that a virtual-network-switch node is
1439 	 * present which may or may not have been updated. It is
1440 	 * up to the clients to examine their own nodes and
1441 	 * determine if they have changed.
1442 	 */
1443 	if (resp->added.nelem != 0) {
1444 
1445 		if (resp->added.nelem != 1) {
1446 			cmn_err(CE_NOTE, "!vsw%d: number of nodes added "
1447 			    "invalid: %d\n", vswp->instance, resp->added.nelem);
1448 			return (MDEG_FAILURE);
1449 		}
1450 
1451 		mdp = resp->added.mdp;
1452 		node = resp->added.mdep[0];
1453 
1454 	} else if (resp->match_curr.nelem != 0) {
1455 
1456 		if (resp->match_curr.nelem != 1) {
1457 			cmn_err(CE_NOTE, "!vsw%d: number of nodes updated "
1458 			    "invalid: %d\n", vswp->instance,
1459 			    resp->match_curr.nelem);
1460 			return (MDEG_FAILURE);
1461 		}
1462 
1463 		mdp = resp->match_curr.mdp;
1464 		node = resp->match_curr.mdep[0];
1465 
1466 	} else {
1467 		return (MDEG_FAILURE);
1468 	}
1469 
1470 	/* Validate name and instance */
1471 	if (md_get_prop_str(mdp, node, "name", &node_name) != 0) {
1472 		DERR(vswp, "%s: unable to get node name\n",  __func__);
1473 		return (MDEG_FAILURE);
1474 	}
1475 
1476 	/* is this a virtual-network-switch? */
1477 	if (strcmp(node_name, vsw_propname) != 0) {
1478 		DERR(vswp, "%s: Invalid node name: %s\n",
1479 		    __func__, node_name);
1480 		return (MDEG_FAILURE);
1481 	}
1482 
1483 	if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) {
1484 		DERR(vswp, "%s: prop(cfg-handle) not found\n",
1485 		    __func__);
1486 		return (MDEG_FAILURE);
1487 	}
1488 
1489 	/* is this the right instance of vsw? */
1490 	if (inst != vswp->regprop) {
1491 		DERR(vswp, "%s: Invalid cfg-handle: %lx\n",
1492 		    __func__, inst);
1493 		return (MDEG_FAILURE);
1494 	}
1495 
1496 	vsw_update_md_prop(vswp, mdp, node);
1497 
1498 	return (MDEG_SUCCESS);
1499 }
1500 
1501 /*
1502  * Mdeg callback invoked for changes to the vsw-port nodes
1503  * under the vsw node.
1504  */
1505 static int
1506 vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1507 {
1508 	vsw_t		*vswp;
1509 	int		idx;
1510 	md_t		*mdp;
1511 	mde_cookie_t	node;
1512 	uint64_t	inst;
1513 	int		rv;
1514 
1515 	if ((resp == NULL) || (cb_argp == NULL))
1516 		return (MDEG_FAILURE);
1517 
1518 	vswp = (vsw_t *)cb_argp;
1519 
1520 	D2(vswp, "%s: added %d : removed %d : curr matched %d"
1521 	    " : prev matched %d", __func__, resp->added.nelem,
1522 	    resp->removed.nelem, resp->match_curr.nelem,
1523 	    resp->match_prev.nelem);
1524 
1525 	/* process added ports */
1526 	for (idx = 0; idx < resp->added.nelem; idx++) {
1527 		mdp = resp->added.mdp;
1528 		node = resp->added.mdep[idx];
1529 
1530 		D2(vswp, "%s: adding node(%d) 0x%lx", __func__, idx, node);
1531 
1532 		if ((rv = vsw_port_add(vswp, mdp, &node)) != 0) {
1533 			cmn_err(CE_WARN, "!vsw%d: Unable to add new port "
1534 			    "(0x%lx), err=%d", vswp->instance, node, rv);
1535 		}
1536 	}
1537 
1538 	/* process removed ports */
1539 	for (idx = 0; idx < resp->removed.nelem; idx++) {
1540 		mdp = resp->removed.mdp;
1541 		node = resp->removed.mdep[idx];
1542 
1543 		if (md_get_prop_val(mdp, node, id_propname, &inst)) {
1544 			DERR(vswp, "%s: prop(%s) not found in port(%d)",
1545 			    __func__, id_propname, idx);
1546 			continue;
1547 		}
1548 
1549 		D2(vswp, "%s: removing node(%d) 0x%lx", __func__, idx, node);
1550 
1551 		if (vsw_port_detach(vswp, inst) != 0) {
1552 			cmn_err(CE_WARN, "!vsw%d: Unable to remove port %ld",
1553 			    vswp->instance, inst);
1554 		}
1555 	}
1556 
1557 	for (idx = 0; idx < resp->match_curr.nelem; idx++) {
1558 		(void) vsw_port_update(vswp, resp->match_curr.mdp,
1559 		    resp->match_curr.mdep[idx],
1560 		    resp->match_prev.mdp,
1561 		    resp->match_prev.mdep[idx]);
1562 	}
1563 
1564 	D1(vswp, "%s: exit", __func__);
1565 
1566 	return (MDEG_SUCCESS);
1567 }
1568 
1569 /*
1570  * Scan the machine description for this instance of vsw
1571  * and read its properties. Called only from vsw_attach().
1572  * Returns: 0 on success, 1 on failure.
1573  */
1574 static int
1575 vsw_read_mdprops(vsw_t *vswp)
1576 {
1577 	md_t		*mdp = NULL;
1578 	mde_cookie_t	rootnode;
1579 	mde_cookie_t	*listp = NULL;
1580 	uint64_t	inst;
1581 	uint64_t	cfgh;
1582 	char		*name;
1583 	int		rv = 1;
1584 	int		num_nodes = 0;
1585 	int		num_devs = 0;
1586 	int		listsz = 0;
1587 	int		i;
1588 
1589 	/*
1590 	 * In each 'virtual-device' node in the MD there is a
1591 	 * 'cfg-handle' property which is the MD's concept of
1592 	 * an instance number (this may be completely different from
1593 	 * the device drivers instance #). OBP reads that value and
1594 	 * stores it in the 'reg' property of the appropriate node in
1595 	 * the device tree. We first read this reg property and use this
1596 	 * to compare against the 'cfg-handle' property of vsw nodes
1597 	 * in MD to get to this specific vsw instance and then read
1598 	 * other properties that we are interested in.
1599 	 * We also cache the value of 'reg' property and use it later
1600 	 * to register callbacks with mdeg (see vsw_mdeg_register())
1601 	 */
1602 	inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip,
1603 	    DDI_PROP_DONTPASS, reg_propname, -1);
1604 	if (inst == -1) {
1605 		cmn_err(CE_NOTE, "!vsw%d: Unable to read %s property from "
1606 		    "OBP device tree", vswp->instance, reg_propname);
1607 		return (rv);
1608 	}
1609 
1610 	vswp->regprop = inst;
1611 
1612 	if ((mdp = md_get_handle()) == NULL) {
1613 		DWARN(vswp, "%s: cannot init MD\n", __func__);
1614 		return (rv);
1615 	}
1616 
1617 	num_nodes = md_node_count(mdp);
1618 	ASSERT(num_nodes > 0);
1619 
1620 	listsz = num_nodes * sizeof (mde_cookie_t);
1621 	listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP);
1622 
1623 	rootnode = md_root_node(mdp);
1624 
1625 	/* search for all "virtual_device" nodes */
1626 	num_devs = md_scan_dag(mdp, rootnode,
1627 	    md_find_name(mdp, vdev_propname),
1628 	    md_find_name(mdp, "fwd"), listp);
1629 	if (num_devs <= 0) {
1630 		DWARN(vswp, "%s: invalid num_devs:%d\n", __func__, num_devs);
1631 		goto vsw_readmd_exit;
1632 	}
1633 
1634 	/*
1635 	 * Now loop through the list of virtual-devices looking for
1636 	 * devices with name "virtual-network-switch" and for each
1637 	 * such device compare its instance with what we have from
1638 	 * the 'reg' property to find the right node in MD and then
1639 	 * read all its properties.
1640 	 */
1641 	for (i = 0; i < num_devs; i++) {
1642 
1643 		if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) {
1644 			DWARN(vswp, "%s: name property not found\n",
1645 			    __func__);
1646 			goto vsw_readmd_exit;
1647 		}
1648 
1649 		/* is this a virtual-network-switch? */
1650 		if (strcmp(name, vsw_propname) != 0)
1651 			continue;
1652 
1653 		if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) {
1654 			DWARN(vswp, "%s: cfg-handle property not found\n",
1655 			    __func__);
1656 			goto vsw_readmd_exit;
1657 		}
1658 
1659 		/* is this the required instance of vsw? */
1660 		if (inst != cfgh)
1661 			continue;
1662 
1663 		/* now read all properties of this vsw instance */
1664 		rv = vsw_get_initial_md_properties(vswp, mdp, listp[i]);
1665 		break;
1666 	}
1667 
1668 vsw_readmd_exit:
1669 
1670 	kmem_free(listp, listsz);
1671 	(void) md_fini_handle(mdp);
1672 	return (rv);
1673 }
1674 
1675 /*
1676  * Read the initial start-of-day values from the specified MD node.
1677  */
1678 static int
1679 vsw_get_initial_md_properties(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
1680 {
1681 	uint64_t	macaddr = 0;
1682 
1683 	D1(vswp, "%s: enter", __func__);
1684 
1685 	if (vsw_get_md_physname(vswp, mdp, node, vswp->physname) != 0) {
1686 		return (1);
1687 	}
1688 
1689 	/* mac address for vswitch device itself */
1690 	if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) {
1691 		cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD",
1692 		    vswp->instance);
1693 		return (1);
1694 	}
1695 
1696 	vsw_save_lmacaddr(vswp, macaddr);
1697 
1698 	if (vsw_get_md_smodes(vswp, mdp, node, &vswp->smode)) {
1699 		DWARN(vswp, "%s: Unable to read %s property from MD, "
1700 		    "defaulting to 'switched' mode",
1701 		    __func__, smode_propname);
1702 
1703 		vswp->smode = VSW_LAYER2;
1704 	}
1705 
1706 	/*
1707 	 * Read the 'linkprop' property to know if this
1708 	 * vsw device wants to get physical link updates.
1709 	 */
1710 	vsw_linkprop_read(vswp, mdp, node, &vswp->pls_update);
1711 
1712 	/* read mtu */
1713 	vsw_mtu_read(vswp, mdp, node, &vswp->mtu);
1714 	if (vswp->mtu < ETHERMTU || vswp->mtu > VNET_MAX_MTU) {
1715 		vswp->mtu = ETHERMTU;
1716 	}
1717 	vswp->max_frame_size = vswp->mtu + sizeof (struct ether_header) +
1718 	    VLAN_TAGSZ;
1719 
1720 	/* read vlan id properties of this vsw instance */
1721 	vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &vswp->pvid,
1722 	    &vswp->vids, &vswp->nvids, &vswp->default_vlan_id);
1723 
1724 	/* read priority-ether-types */
1725 	vsw_read_pri_eth_types(vswp, mdp, node);
1726 
1727 	/* read bandwidth property of this vsw instance */
1728 	vsw_bandwidth_read(vswp, mdp, node, &vswp->bandwidth);
1729 
1730 	D1(vswp, "%s: exit", __func__);
1731 	return (0);
1732 }
1733 
1734 /*
1735  * Read vlan id properties of the given MD node.
1736  * Arguments:
1737  *   arg:          device argument(vsw device or a port)
1738  *   type:         type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port)
1739  *   mdp:          machine description
1740  *   node:         md node cookie
1741  *
1742  * Returns:
1743  *   pvidp:        port-vlan-id of the node
1744  *   vidspp:       list of vlan-ids of the node
1745  *   nvidsp:       # of vlan-ids in the list
1746  *   default_idp:  default-vlan-id of the node(if node is vsw device)
1747  */
1748 static void
1749 vsw_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node,
1750 	uint16_t *pvidp, vsw_vlanid_t **vidspp, uint16_t *nvidsp,
1751 	uint16_t *default_idp)
1752 {
1753 	vsw_t		*vswp;
1754 	vsw_port_t	*portp;
1755 	char		*pvid_propname;
1756 	char		*vid_propname;
1757 	uint_t		nvids = 0;
1758 	uint32_t	vids_size;
1759 	int		rv;
1760 	int		i;
1761 	uint64_t	*data;
1762 	uint64_t	val;
1763 	int		size;
1764 	int		inst;
1765 
1766 	if (type == VSW_LOCALDEV) {
1767 
1768 		vswp = (vsw_t *)arg;
1769 		pvid_propname = vsw_pvid_propname;
1770 		vid_propname = vsw_vid_propname;
1771 		inst = vswp->instance;
1772 
1773 	} else if (type == VSW_VNETPORT) {
1774 
1775 		portp = (vsw_port_t *)arg;
1776 		vswp = portp->p_vswp;
1777 		pvid_propname = port_pvid_propname;
1778 		vid_propname = port_vid_propname;
1779 		inst = portp->p_instance;
1780 
1781 	} else {
1782 		return;
1783 	}
1784 
1785 	if (type == VSW_LOCALDEV && default_idp != NULL) {
1786 		rv = md_get_prop_val(mdp, node, vsw_dvid_propname, &val);
1787 		if (rv != 0) {
1788 			DWARN(vswp, "%s: prop(%s) not found", __func__,
1789 			    vsw_dvid_propname);
1790 
1791 			*default_idp = vsw_default_vlan_id;
1792 		} else {
1793 			*default_idp = val & 0xFFF;
1794 			D2(vswp, "%s: %s(%d): (%d)\n", __func__,
1795 			    vsw_dvid_propname, inst, *default_idp);
1796 		}
1797 	}
1798 
1799 	rv = md_get_prop_val(mdp, node, pvid_propname, &val);
1800 	if (rv != 0) {
1801 		DWARN(vswp, "%s: prop(%s) not found", __func__, pvid_propname);
1802 		*pvidp = vsw_default_vlan_id;
1803 	} else {
1804 
1805 		*pvidp = val & 0xFFF;
1806 		D2(vswp, "%s: %s(%d): (%d)\n", __func__,
1807 		    pvid_propname, inst, *pvidp);
1808 	}
1809 
1810 	rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data,
1811 	    &size);
1812 	if (rv != 0) {
1813 		D2(vswp, "%s: prop(%s) not found", __func__, vid_propname);
1814 		size = 0;
1815 	} else {
1816 		size /= sizeof (uint64_t);
1817 	}
1818 	nvids = size;
1819 
1820 	if (nvids != 0) {
1821 		D2(vswp, "%s: %s(%d): ", __func__, vid_propname, inst);
1822 		vids_size = sizeof (vsw_vlanid_t) * nvids;
1823 		*vidspp = kmem_zalloc(vids_size, KM_SLEEP);
1824 		for (i = 0; i < nvids; i++) {
1825 			(*vidspp)[i].vl_vid = data[i] & 0xFFFF;
1826 			(*vidspp)[i].vl_set = B_FALSE;
1827 			D2(vswp, " %d ", (*vidspp)[i].vl_vid);
1828 		}
1829 		D2(vswp, "\n");
1830 	}
1831 
1832 	*nvidsp = nvids;
1833 }
1834 
1835 static void
1836 vsw_port_read_bandwidth(vsw_port_t *portp, md_t *mdp, mde_cookie_t node,
1837     uint64_t *bw)
1838 {
1839 	int		rv;
1840 	uint64_t	val;
1841 	vsw_t		*vswp;
1842 
1843 	vswp = portp->p_vswp;
1844 
1845 	rv = md_get_prop_val(mdp, node, port_maxbw_propname, &val);
1846 
1847 	if (rv != 0) {
1848 		*bw = 0;
1849 		D3(vswp, "%s: prop(%s) not found\n", __func__,
1850 		    port_maxbw_propname);
1851 	} else {
1852 		*bw = val;
1853 		D3(vswp, "%s: %s nodes found", __func__, port_maxbw_propname);
1854 	}
1855 }
1856 
1857 /*
1858  * This function reads "priority-ether-types" property from md. This property
1859  * is used to enable support for priority frames. Applications which need
1860  * guaranteed and timely delivery of certain high priority frames to/from
1861  * a vnet or vsw within ldoms, should configure this property by providing
1862  * the ether type(s) for which the priority facility is needed.
1863  * Normal data frames are delivered over a ldc channel using the descriptor
1864  * ring mechanism which is constrained by factors such as descriptor ring size,
1865  * the rate at which the ring is processed at the peer ldc end point, etc.
1866  * The priority mechanism provides an Out-Of-Band path to send/receive frames
1867  * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the
1868  * descriptor ring path and enables a more reliable and timely delivery of
1869  * frames to the peer.
1870  */
1871 static void
1872 vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
1873 {
1874 	int		rv;
1875 	uint16_t	*types;
1876 	uint64_t	*data;
1877 	int		size;
1878 	int		i;
1879 	size_t		mblk_sz;
1880 
1881 	rv = md_get_prop_data(mdp, node, pri_types_propname,
1882 	    (uint8_t **)&data, &size);
1883 	if (rv != 0) {
1884 		/*
1885 		 * Property may not exist if we are running pre-ldoms1.1 f/w.
1886 		 * Check if 'vsw_pri_eth_type' has been set in that case.
1887 		 */
1888 		if (vsw_pri_eth_type != 0) {
1889 			size = sizeof (vsw_pri_eth_type);
1890 			data = &vsw_pri_eth_type;
1891 		} else {
1892 			D3(vswp, "%s: prop(%s) not found", __func__,
1893 			    pri_types_propname);
1894 			size = 0;
1895 		}
1896 	}
1897 
1898 	if (size == 0) {
1899 		vswp->pri_num_types = 0;
1900 		return;
1901 	}
1902 
1903 	/*
1904 	 * we have some priority-ether-types defined;
1905 	 * allocate a table of these types and also
1906 	 * allocate a pool of mblks to transmit these
1907 	 * priority packets.
1908 	 */
1909 	size /= sizeof (uint64_t);
1910 	vswp->pri_num_types = size;
1911 	vswp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP);
1912 	for (i = 0, types = vswp->pri_types; i < size; i++) {
1913 		types[i] = data[i] & 0xFFFF;
1914 	}
1915 	mblk_sz = (VIO_PKT_DATA_HDRSIZE + ETHERMAX + 7) & ~7;
1916 	(void) vio_create_mblks(vsw_pri_tx_nmblks, mblk_sz, &vswp->pri_tx_vmp);
1917 }
1918 
1919 static void
1920 vsw_mtu_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node, uint32_t *mtu)
1921 {
1922 	int		rv;
1923 	int		inst;
1924 	uint64_t	val;
1925 	char		*mtu_propname;
1926 
1927 	mtu_propname = vsw_mtu_propname;
1928 	inst = vswp->instance;
1929 
1930 	rv = md_get_prop_val(mdp, node, mtu_propname, &val);
1931 	if (rv != 0) {
1932 		D3(vswp, "%s: prop(%s) not found", __func__, mtu_propname);
1933 		*mtu = vsw_ethermtu;
1934 	} else {
1935 
1936 		*mtu = val & 0xFFFF;
1937 		D2(vswp, "%s: %s(%d): (%d)\n", __func__,
1938 		    mtu_propname, inst, *mtu);
1939 	}
1940 }
1941 
1942 /*
1943  * Update the mtu of the vsw device. We first check if the device has been
1944  * plumbed and if so fail the mtu update. Otherwise, we continue to update the
1945  * new mtu and reset all ports to initiate handshake re-negotiation with peers
1946  * using the new mtu.
1947  */
1948 static int
1949 vsw_mtu_update(vsw_t *vswp, uint32_t mtu)
1950 {
1951 	int	rv;
1952 
1953 	WRITE_ENTER(&vswp->if_lockrw);
1954 
1955 	if (vswp->if_state & VSW_IF_UP) {
1956 
1957 		RW_EXIT(&vswp->if_lockrw);
1958 
1959 		cmn_err(CE_NOTE, "!vsw%d: Unable to process mtu update"
1960 		    " as the device is plumbed\n", vswp->instance);
1961 		return (EBUSY);
1962 
1963 	} else {
1964 
1965 		D2(vswp, "%s: curr_mtu(%d) new_mtu(%d)\n",
1966 		    __func__, vswp->mtu, mtu);
1967 
1968 		vswp->mtu = mtu;
1969 		vswp->max_frame_size = vswp->mtu +
1970 		    sizeof (struct ether_header) + VLAN_TAGSZ;
1971 
1972 		rv = mac_maxsdu_update(vswp->if_mh, mtu);
1973 		if (rv != 0) {
1974 			cmn_err(CE_NOTE,
1975 			    "!vsw%d: Unable to update mtu with mac"
1976 			    " layer\n", vswp->instance);
1977 		}
1978 
1979 		RW_EXIT(&vswp->if_lockrw);
1980 
1981 		/* Reset ports to renegotiate with the new mtu */
1982 		vsw_reset_ports(vswp);
1983 
1984 	}
1985 
1986 	return (0);
1987 }
1988 
1989 static void
1990 vsw_linkprop_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node,
1991 	boolean_t *pls)
1992 {
1993 	int		rv;
1994 	uint64_t	val;
1995 	char		*linkpropname;
1996 
1997 	linkpropname = vsw_linkprop_propname;
1998 
1999 	rv = md_get_prop_val(mdp, node, linkpropname, &val);
2000 	if (rv != 0) {
2001 		D3(vswp, "%s: prop(%s) not found", __func__, linkpropname);
2002 		*pls = B_FALSE;
2003 	} else {
2004 
2005 		*pls = (val & 0x1) ? B_TRUE : B_FALSE;
2006 		D2(vswp, "%s: %s(%d): (%d)\n", __func__, linkpropname,
2007 		    vswp->instance, *pls);
2008 	}
2009 }
2010 
2011 void
2012 vsw_mac_link_update(vsw_t *vswp, link_state_t link_state)
2013 {
2014 	READ_ENTER(&vswp->if_lockrw);
2015 
2016 	if (vswp->if_state & VSW_IF_REG) {
2017 		mac_link_update(vswp->if_mh, link_state);
2018 	}
2019 
2020 	RW_EXIT(&vswp->if_lockrw);
2021 }
2022 
2023 void
2024 vsw_physlink_state_update(vsw_t *vswp)
2025 {
2026 	if (vswp->pls_update == B_TRUE) {
2027 		vsw_mac_link_update(vswp, vswp->phys_link_state);
2028 	}
2029 	vsw_physlink_update_ports(vswp);
2030 }
2031 
2032 static void
2033 vsw_bandwidth_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node, uint64_t *bw)
2034 {
2035 	/* read the vsw bandwidth from md */
2036 	int		rv;
2037 	uint64_t	val;
2038 
2039 	rv = md_get_prop_val(mdp, node, vsw_maxbw_propname, &val);
2040 	if (rv != 0) {
2041 		*bw = 0;
2042 		D3(vswp, "%s: prop(%s) not found", __func__,
2043 		    vsw_maxbw_propname);
2044 	} else {
2045 		*bw = val;
2046 		D3(vswp, "%s: %s(%d): (%ld)\n", __func__,
2047 		    vsw_maxbw_propname, vswp->instance, *bw);
2048 	}
2049 }
2050 
2051 /*
2052  * Check to see if the relevant properties in the specified node have
2053  * changed, and if so take the appropriate action.
2054  *
2055  * If any of the properties are missing or invalid we don't take
2056  * any action, as this function should only be invoked when modifications
2057  * have been made to what we assume is a working configuration, which
2058  * we leave active.
2059  *
2060  * Note it is legal for this routine to be invoked even if none of the
2061  * properties in the port node within the MD have actually changed.
2062  */
2063 static void
2064 vsw_update_md_prop(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
2065 {
2066 	char		physname[LIFNAMSIZ];
2067 	char		drv[LIFNAMSIZ];
2068 	uint_t		ddi_instance;
2069 	uint8_t		new_smode;
2070 	int		i;
2071 	uint64_t 	macaddr = 0;
2072 	enum		{MD_init = 0x1,
2073 				MD_physname = 0x2,
2074 				MD_macaddr = 0x4,
2075 				MD_smode = 0x8,
2076 				MD_vlans = 0x10,
2077 				MD_mtu = 0x20,
2078 				MD_pls = 0x40,
2079 				MD_bw = 0x80} updated;
2080 	int		rv;
2081 	uint16_t	pvid;
2082 	vsw_vlanid_t	*vids;
2083 	uint16_t	nvids;
2084 	uint32_t	mtu;
2085 	boolean_t	pls_update;
2086 	uint64_t	maxbw;
2087 
2088 	updated = MD_init;
2089 
2090 	D1(vswp, "%s: enter", __func__);
2091 
2092 	/*
2093 	 * Check if name of physical device in MD has changed.
2094 	 */
2095 	if (vsw_get_md_physname(vswp, mdp, node, (char *)&physname) == 0) {
2096 		/*
2097 		 * Do basic sanity check on new device name/instance,
2098 		 * if its non NULL. It is valid for the device name to
2099 		 * have changed from a non NULL to a NULL value, i.e.
2100 		 * the vsw is being changed to 'routed' mode.
2101 		 */
2102 		if ((strlen(physname) != 0) &&
2103 		    (ddi_parse(physname, drv,
2104 		    &ddi_instance) != DDI_SUCCESS)) {
2105 			cmn_err(CE_WARN, "!vsw%d: physical device %s is not"
2106 			    " a valid device name/instance",
2107 			    vswp->instance, physname);
2108 			goto fail_reconf;
2109 		}
2110 
2111 		if (strcmp(physname, vswp->physname)) {
2112 			D2(vswp, "%s: device name changed from %s to %s",
2113 			    __func__, vswp->physname, physname);
2114 
2115 			updated |= MD_physname;
2116 		} else {
2117 			D2(vswp, "%s: device name unchanged at %s",
2118 			    __func__, vswp->physname);
2119 		}
2120 	} else {
2121 		cmn_err(CE_WARN, "!vsw%d: Unable to read name of physical "
2122 		    "device from updated MD.", vswp->instance);
2123 		goto fail_reconf;
2124 	}
2125 
2126 	/*
2127 	 * Check if MAC address has changed.
2128 	 */
2129 	if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) {
2130 		cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD",
2131 		    vswp->instance);
2132 		goto fail_reconf;
2133 	} else {
2134 		uint64_t maddr = macaddr;
2135 		READ_ENTER(&vswp->if_lockrw);
2136 		for (i = ETHERADDRL - 1; i >= 0; i--) {
2137 			if (vswp->if_addr.ether_addr_octet[i]
2138 			    != (macaddr & 0xFF)) {
2139 				D2(vswp, "%s: octet[%d] 0x%x != 0x%x",
2140 				    __func__, i,
2141 				    vswp->if_addr.ether_addr_octet[i],
2142 				    (macaddr & 0xFF));
2143 				updated |= MD_macaddr;
2144 				macaddr = maddr;
2145 				break;
2146 			}
2147 			macaddr >>= 8;
2148 		}
2149 		RW_EXIT(&vswp->if_lockrw);
2150 		if (updated & MD_macaddr) {
2151 			vsw_save_lmacaddr(vswp, macaddr);
2152 		}
2153 	}
2154 
2155 	/*
2156 	 * Check if switching modes have changed.
2157 	 */
2158 	if (vsw_get_md_smodes(vswp, mdp, node, &new_smode)) {
2159 		cmn_err(CE_WARN, "!vsw%d: Unable to read %s property from MD",
2160 		    vswp->instance, smode_propname);
2161 		goto fail_reconf;
2162 	} else {
2163 		if (new_smode != vswp->smode) {
2164 			D2(vswp, "%s: switching mode changed from %d to %d",
2165 			    __func__, vswp->smode, new_smode);
2166 
2167 			updated |= MD_smode;
2168 		}
2169 	}
2170 
2171 	/* Read the vlan ids */
2172 	vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &pvid, &vids,
2173 	    &nvids, NULL);
2174 
2175 	/* Determine if there are any vlan id updates */
2176 	if ((pvid != vswp->pvid) ||		/* pvid changed? */
2177 	    (nvids != vswp->nvids) ||		/* # of vids changed? */
2178 	    ((nvids != 0) && (vswp->nvids != 0) &&	/* vids changed? */
2179 	    !vsw_cmp_vids(vids, vswp->vids, nvids))) {
2180 		updated |= MD_vlans;
2181 	}
2182 
2183 	/* Read mtu */
2184 	vsw_mtu_read(vswp, mdp, node, &mtu);
2185 	if (mtu != vswp->mtu) {
2186 		if (mtu >= ETHERMTU && mtu <= VNET_MAX_MTU) {
2187 			updated |= MD_mtu;
2188 		} else {
2189 			cmn_err(CE_NOTE, "!vsw%d: Unable to process mtu update"
2190 			    " as the specified value:%d is invalid\n",
2191 			    vswp->instance, mtu);
2192 		}
2193 	}
2194 
2195 	/*
2196 	 * Read the 'linkprop' property.
2197 	 */
2198 	vsw_linkprop_read(vswp, mdp, node, &pls_update);
2199 	if (pls_update != vswp->pls_update) {
2200 		updated |= MD_pls;
2201 	}
2202 
2203 	/* Read bandwidth */
2204 	vsw_bandwidth_read(vswp, mdp, node, &maxbw);
2205 	if (maxbw != vswp->bandwidth) {
2206 		if (maxbw >= MRP_MAXBW_MINVAL || maxbw == 0) {
2207 			updated |= MD_bw;
2208 		} else {
2209 			cmn_err(CE_NOTE, "!vsw%d: Unable to process bandwidth"
2210 			    " update as the specified value:%ld is invalid\n",
2211 			    vswp->instance, maxbw);
2212 		}
2213 	}
2214 
2215 	/*
2216 	 * Now make any changes which are needed...
2217 	 */
2218 	if (updated & MD_pls) {
2219 
2220 		/* save the updated property. */
2221 		vswp->pls_update = pls_update;
2222 
2223 		if (pls_update == B_FALSE) {
2224 			/*
2225 			 * Phys link state update is now disabled for this vsw
2226 			 * interface. If we had previously reported a link-down
2227 			 * to the stack, undo that by sending a link-up.
2228 			 */
2229 			if (vswp->phys_link_state == LINK_STATE_DOWN) {
2230 				vsw_mac_link_update(vswp, LINK_STATE_UP);
2231 			}
2232 		} else {
2233 			/*
2234 			 * Phys link state update is now enabled. Send up an
2235 			 * update based on the current phys link state.
2236 			 */
2237 			if (vswp->smode & VSW_LAYER2) {
2238 				vsw_mac_link_update(vswp,
2239 				    vswp->phys_link_state);
2240 			}
2241 		}
2242 
2243 	}
2244 
2245 	if (updated & (MD_physname | MD_smode | MD_mtu)) {
2246 
2247 		/*
2248 		 * Stop any pending thread to setup switching mode.
2249 		 */
2250 		vsw_setup_switching_stop(vswp);
2251 
2252 		/* Cleanup HybridIO */
2253 		vsw_hio_cleanup(vswp);
2254 
2255 		/*
2256 		 * Remove unicst, mcst addrs of vsw interface
2257 		 * and ports from the physdev. This also closes
2258 		 * the corresponding mac clients.
2259 		 */
2260 		vsw_unset_addrs(vswp);
2261 
2262 		/*
2263 		 * Stop, detach and close the old device..
2264 		 */
2265 		mutex_enter(&vswp->mac_lock);
2266 		vsw_mac_close(vswp);
2267 		mutex_exit(&vswp->mac_lock);
2268 
2269 		/*
2270 		 * Update phys name.
2271 		 */
2272 		if (updated & MD_physname) {
2273 			cmn_err(CE_NOTE, "!vsw%d: changing from %s to %s",
2274 			    vswp->instance, vswp->physname, physname);
2275 			(void) strncpy(vswp->physname,
2276 			    physname, strlen(physname) + 1);
2277 		}
2278 
2279 		/*
2280 		 * Update array with the new switch mode values.
2281 		 */
2282 		if (updated & MD_smode) {
2283 			vswp->smode = new_smode;
2284 		}
2285 
2286 		/* Update mtu */
2287 		if (updated & MD_mtu) {
2288 			rv = vsw_mtu_update(vswp, mtu);
2289 			if (rv != 0) {
2290 				goto fail_update;
2291 			}
2292 		}
2293 
2294 		/*
2295 		 * ..and attach, start the new device.
2296 		 */
2297 		rv = vsw_setup_switching(vswp);
2298 		if (rv == EAGAIN) {
2299 			/*
2300 			 * Unable to setup switching mode.
2301 			 * As the error is EAGAIN, schedule a thread to retry
2302 			 * and return. Programming addresses of ports and
2303 			 * vsw interface will be done by the thread when the
2304 			 * switching setup completes successfully.
2305 			 */
2306 			if (vsw_setup_switching_start(vswp) != 0) {
2307 				goto fail_update;
2308 			}
2309 			return;
2310 
2311 		} else if (rv) {
2312 			goto fail_update;
2313 		}
2314 
2315 		vsw_setup_switching_post_process(vswp);
2316 	} else if (updated & MD_macaddr) {
2317 		/*
2318 		 * We enter here if only MD_macaddr is exclusively updated.
2319 		 * If MD_physname and/or MD_smode are also updated, then
2320 		 * as part of that, we would have implicitly processed
2321 		 * MD_macaddr update (above).
2322 		 */
2323 		cmn_err(CE_NOTE, "!vsw%d: changing mac address to 0x%lx",
2324 		    vswp->instance, macaddr);
2325 
2326 		READ_ENTER(&vswp->if_lockrw);
2327 		if (vswp->if_state & VSW_IF_UP) {
2328 			/* reconfigure with new address */
2329 			vsw_if_mac_reconfig(vswp, B_FALSE, 0, NULL, 0);
2330 
2331 			/*
2332 			 * Notify the MAC layer of the changed address.
2333 			 */
2334 			mac_unicst_update(vswp->if_mh,
2335 			    (uint8_t *)&vswp->if_addr);
2336 
2337 		}
2338 		RW_EXIT(&vswp->if_lockrw);
2339 
2340 	}
2341 
2342 	if (updated & MD_vlans) {
2343 		/* Remove existing vlan ids from the hash table. */
2344 		vsw_vlan_remove_ids(vswp, VSW_LOCALDEV);
2345 
2346 		if (vswp->if_state & VSW_IF_UP) {
2347 			vsw_if_mac_reconfig(vswp, B_TRUE, pvid, vids, nvids);
2348 		} else {
2349 			if (vswp->nvids != 0) {
2350 				kmem_free(vswp->vids,
2351 				    sizeof (vsw_vlanid_t) * vswp->nvids);
2352 			}
2353 			vswp->vids = vids;
2354 			vswp->nvids = nvids;
2355 			vswp->pvid = pvid;
2356 		}
2357 
2358 		/* add these new vlan ids into hash table */
2359 		vsw_vlan_add_ids(vswp, VSW_LOCALDEV);
2360 	} else {
2361 		if (nvids != 0) {
2362 			kmem_free(vids, sizeof (vsw_vlanid_t) * nvids);
2363 		}
2364 	}
2365 
2366 	if (updated & MD_bw) {
2367 		vsw_update_bandwidth(vswp, NULL, VSW_LOCALDEV, maxbw);
2368 	}
2369 
2370 	return;
2371 
2372 fail_reconf:
2373 	cmn_err(CE_WARN, "!vsw%d: configuration unchanged", vswp->instance);
2374 	return;
2375 
2376 fail_update:
2377 	cmn_err(CE_WARN, "!vsw%d: re-configuration failed",
2378 	    vswp->instance);
2379 }
2380 
2381 /*
2382  * Read the port's md properties.
2383  */
2384 static int
2385 vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp,
2386 	md_t *mdp, mde_cookie_t *node)
2387 {
2388 	uint64_t		ldc_id;
2389 	uint8_t			*addrp;
2390 	int			i, addrsz;
2391 	int			num_nodes = 0, nchan = 0;
2392 	int			listsz = 0;
2393 	mde_cookie_t		*listp = NULL;
2394 	struct ether_addr	ea;
2395 	uint64_t		macaddr;
2396 	uint64_t		inst = 0;
2397 	uint64_t		val;
2398 
2399 	if (md_get_prop_val(mdp, *node, id_propname, &inst)) {
2400 		DWARN(vswp, "%s: prop(%s) not found", __func__,
2401 		    id_propname);
2402 		return (1);
2403 	}
2404 
2405 	/*
2406 	 * Find the channel endpoint node(s) (which should be under this
2407 	 * port node) which contain the channel id(s).
2408 	 */
2409 	if ((num_nodes = md_node_count(mdp)) <= 0) {
2410 		DERR(vswp, "%s: invalid number of nodes found (%d)",
2411 		    __func__, num_nodes);
2412 		return (1);
2413 	}
2414 
2415 	D2(vswp, "%s: %d nodes found", __func__, num_nodes);
2416 
2417 	/* allocate enough space for node list */
2418 	listsz = num_nodes * sizeof (mde_cookie_t);
2419 	listp = kmem_zalloc(listsz, KM_SLEEP);
2420 
2421 	nchan = md_scan_dag(mdp, *node, md_find_name(mdp, chan_propname),
2422 	    md_find_name(mdp, "fwd"), listp);
2423 
2424 	if (nchan <= 0) {
2425 		DWARN(vswp, "%s: no %s nodes found", __func__, chan_propname);
2426 		kmem_free(listp, listsz);
2427 		return (1);
2428 	}
2429 
2430 	D2(vswp, "%s: %d %s nodes found", __func__, nchan, chan_propname);
2431 
2432 	/* use property from first node found */
2433 	if (md_get_prop_val(mdp, listp[0], id_propname, &ldc_id)) {
2434 		DWARN(vswp, "%s: prop(%s) not found\n", __func__,
2435 		    id_propname);
2436 		kmem_free(listp, listsz);
2437 		return (1);
2438 	}
2439 
2440 	/* don't need list any more */
2441 	kmem_free(listp, listsz);
2442 
2443 	D2(vswp, "%s: ldc_id 0x%llx", __func__, ldc_id);
2444 
2445 	/* read mac-address property */
2446 	if (md_get_prop_data(mdp, *node, remaddr_propname,
2447 	    &addrp, &addrsz)) {
2448 		DWARN(vswp, "%s: prop(%s) not found",
2449 		    __func__, remaddr_propname);
2450 		return (1);
2451 	}
2452 
2453 	if (addrsz < ETHERADDRL) {
2454 		DWARN(vswp, "%s: invalid address size", __func__);
2455 		return (1);
2456 	}
2457 
2458 	macaddr = *((uint64_t *)addrp);
2459 	D2(vswp, "%s: remote mac address 0x%llx", __func__, macaddr);
2460 
2461 	for (i = ETHERADDRL - 1; i >= 0; i--) {
2462 		ea.ether_addr_octet[i] = macaddr & 0xFF;
2463 		macaddr >>= 8;
2464 	}
2465 
2466 	/* now update all properties into the port */
2467 	portp->p_vswp = vswp;
2468 	portp->p_instance = inst;
2469 	portp->addr_set = B_FALSE;
2470 	ether_copy(&ea, &portp->p_macaddr);
2471 	if (nchan > VSW_PORT_MAX_LDCS) {
2472 		D2(vswp, "%s: using first of %d ldc ids",
2473 		    __func__, nchan);
2474 		nchan = VSW_PORT_MAX_LDCS;
2475 	}
2476 	portp->num_ldcs = nchan;
2477 	portp->ldc_ids =
2478 	    kmem_zalloc(sizeof (uint64_t) * nchan, KM_SLEEP);
2479 	bcopy(&ldc_id, (portp->ldc_ids), sizeof (uint64_t) * nchan);
2480 
2481 	/* read vlan id properties of this port node */
2482 	vsw_vlan_read_ids(portp, VSW_VNETPORT, mdp, *node, &portp->pvid,
2483 	    &portp->vids, &portp->nvids, NULL);
2484 
2485 	/* Check if hybrid property is present */
2486 	if (md_get_prop_val(mdp, *node, hybrid_propname, &val) == 0) {
2487 		D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname);
2488 		portp->p_hio_enabled = B_TRUE;
2489 	} else {
2490 		portp->p_hio_enabled = B_FALSE;
2491 	}
2492 	/*
2493 	 * Port hio capability determined after version
2494 	 * negotiation, i.e., when we know the peer is HybridIO capable.
2495 	 */
2496 	portp->p_hio_capable = B_FALSE;
2497 
2498 	/* Read bandwidth of this port */
2499 	vsw_port_read_bandwidth(portp, mdp, *node, &portp->p_bandwidth);
2500 
2501 	return (0);
2502 }
2503 
2504 /*
2505  * Add a new port to the system.
2506  *
2507  * Returns 0 on success, 1 on failure.
2508  */
2509 int
2510 vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node)
2511 {
2512 	vsw_port_t	*portp;
2513 	int		rv;
2514 
2515 	portp = kmem_zalloc(sizeof (vsw_port_t), KM_SLEEP);
2516 
2517 	rv = vsw_port_read_props(portp, vswp, mdp, node);
2518 	if (rv != 0) {
2519 		kmem_free(portp, sizeof (*portp));
2520 		return (1);
2521 	}
2522 
2523 	rv = vsw_port_attach(portp);
2524 	if (rv != 0) {
2525 		DERR(vswp, "%s: failed to attach port", __func__);
2526 		return (1);
2527 	}
2528 
2529 	return (0);
2530 }
2531 
2532 static int
2533 vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex,
2534 	md_t *prev_mdp, mde_cookie_t prev_mdex)
2535 {
2536 	uint64_t	cport_num;
2537 	uint64_t	pport_num;
2538 	vsw_port_list_t	*plistp;
2539 	vsw_port_t	*portp;
2540 	uint16_t	pvid;
2541 	vsw_vlanid_t	*vids;
2542 	uint16_t	nvids;
2543 	uint64_t	val;
2544 	boolean_t	hio_enabled = B_FALSE;
2545 	uint64_t	maxbw;
2546 	enum		{P_MD_init = 0x1,
2547 				P_MD_vlans = 0x2,
2548 				P_MD_hio = 0x4,
2549 				P_MD_maxbw = 0x8} updated;
2550 
2551 	updated = P_MD_init;
2552 
2553 	/*
2554 	 * For now, we get port updates only if vlan ids changed.
2555 	 * We read the port num and do some sanity check.
2556 	 */
2557 	if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) {
2558 		return (1);
2559 	}
2560 
2561 	if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) {
2562 		return (1);
2563 	}
2564 	if (cport_num != pport_num)
2565 		return (1);
2566 
2567 	plistp = &(vswp->plist);
2568 
2569 	READ_ENTER(&plistp->lockrw);
2570 
2571 	portp = vsw_lookup_port(vswp, cport_num);
2572 	if (portp == NULL) {
2573 		RW_EXIT(&plistp->lockrw);
2574 		return (1);
2575 	}
2576 
2577 	/* Read the vlan ids */
2578 	vsw_vlan_read_ids(portp, VSW_VNETPORT, curr_mdp, curr_mdex, &pvid,
2579 	    &vids, &nvids, NULL);
2580 
2581 	/* Determine if there are any vlan id updates */
2582 	if ((pvid != portp->pvid) ||		/* pvid changed? */
2583 	    (nvids != portp->nvids) ||		/* # of vids changed? */
2584 	    ((nvids != 0) && (portp->nvids != 0) &&	/* vids changed? */
2585 	    !vsw_cmp_vids(vids, portp->vids, nvids))) {
2586 		updated |= P_MD_vlans;
2587 	}
2588 
2589 	/* Check if hybrid property is present */
2590 	if (md_get_prop_val(curr_mdp, curr_mdex, hybrid_propname, &val) == 0) {
2591 		D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname);
2592 		hio_enabled = B_TRUE;
2593 	}
2594 
2595 	if (portp->p_hio_enabled != hio_enabled) {
2596 		updated |= P_MD_hio;
2597 	}
2598 
2599 	/* Check if maxbw property is present */
2600 	vsw_port_read_bandwidth(portp, curr_mdp, curr_mdex, &maxbw);
2601 	if (maxbw != portp->p_bandwidth) {
2602 		if (maxbw >= MRP_MAXBW_MINVAL || maxbw == 0) {
2603 			updated |= P_MD_maxbw;
2604 		} else {
2605 			cmn_err(CE_NOTE, "!vsw%d: Unable to process bandwidth"
2606 			    " update for port %d as the specified value:%ld"
2607 			    " is invalid\n",
2608 			    vswp->instance, portp->p_instance, maxbw);
2609 		}
2610 	}
2611 
2612 	if (updated & P_MD_vlans) {
2613 		/* Remove existing vlan ids from the hash table. */
2614 		vsw_vlan_remove_ids(portp, VSW_VNETPORT);
2615 
2616 		/* Reconfigure vlans with network device */
2617 		vsw_mac_port_reconfig_vlans(portp, pvid, vids, nvids);
2618 
2619 		/* add these new vlan ids into hash table */
2620 		vsw_vlan_add_ids(portp, VSW_VNETPORT);
2621 
2622 		/* reset the port if it is vlan unaware (ver < 1.3) */
2623 		vsw_vlan_unaware_port_reset(portp);
2624 	}
2625 
2626 	if (updated & P_MD_hio) {
2627 		vsw_hio_port_update(portp, hio_enabled);
2628 	}
2629 
2630 	if (updated & P_MD_maxbw) {
2631 		vsw_update_bandwidth(NULL, portp, VSW_VNETPORT, maxbw);
2632 	}
2633 
2634 	RW_EXIT(&plistp->lockrw);
2635 
2636 	return (0);
2637 }
2638 
2639 /*
2640  * vsw_mac_rx -- A common function to send packets to the interface.
2641  * By default this function check if the interface is UP or not, the
2642  * rest of the behaviour depends on the flags as below:
2643  *
2644  *	VSW_MACRX_PROMISC -- Check if the promisc mode set or not.
2645  *	VSW_MACRX_COPYMSG -- Make a copy of the message(s).
2646  *	VSW_MACRX_FREEMSG -- Free if the messages cannot be sent up the stack.
2647  */
2648 void
2649 vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh,
2650     mblk_t *mp, vsw_macrx_flags_t flags)
2651 {
2652 	mblk_t		*mpt;
2653 
2654 	D1(vswp, "%s:enter\n", __func__);
2655 	READ_ENTER(&vswp->if_lockrw);
2656 	/* Check if the interface is up */
2657 	if (!(vswp->if_state & VSW_IF_UP)) {
2658 		RW_EXIT(&vswp->if_lockrw);
2659 		/* Free messages only if FREEMSG flag specified */
2660 		if (flags & VSW_MACRX_FREEMSG) {
2661 			freemsgchain(mp);
2662 		}
2663 		D1(vswp, "%s:exit\n", __func__);
2664 		return;
2665 	}
2666 	/*
2667 	 * If PROMISC flag is passed, then check if
2668 	 * the interface is in the PROMISC mode.
2669 	 * If not, drop the messages.
2670 	 */
2671 	if (flags & VSW_MACRX_PROMISC) {
2672 		if (!(vswp->if_state & VSW_IF_PROMISC)) {
2673 			RW_EXIT(&vswp->if_lockrw);
2674 			/* Free messages only if FREEMSG flag specified */
2675 			if (flags & VSW_MACRX_FREEMSG) {
2676 				freemsgchain(mp);
2677 			}
2678 			D1(vswp, "%s:exit\n", __func__);
2679 			return;
2680 		}
2681 	}
2682 	RW_EXIT(&vswp->if_lockrw);
2683 	/*
2684 	 * If COPYMSG flag is passed, then make a copy
2685 	 * of the message chain and send up the copy.
2686 	 */
2687 	if (flags & VSW_MACRX_COPYMSG) {
2688 		mp = copymsgchain(mp);
2689 		if (mp == NULL) {
2690 			D1(vswp, "%s:exit\n", __func__);
2691 			return;
2692 		}
2693 	}
2694 
2695 	D2(vswp, "%s: sending up stack", __func__);
2696 
2697 	mpt = NULL;
2698 	(void) vsw_vlan_frame_untag(vswp, VSW_LOCALDEV, &mp, &mpt);
2699 	if (mp != NULL) {
2700 		mac_rx(vswp->if_mh, mrh, mp);
2701 	}
2702 	D1(vswp, "%s:exit\n", __func__);
2703 }
2704 
2705 /* copy mac address of vsw into soft state structure */
2706 static void
2707 vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr)
2708 {
2709 	int	i;
2710 
2711 	WRITE_ENTER(&vswp->if_lockrw);
2712 	for (i = ETHERADDRL - 1; i >= 0; i--) {
2713 		vswp->if_addr.ether_addr_octet[i] = macaddr & 0xFF;
2714 		macaddr >>= 8;
2715 	}
2716 	RW_EXIT(&vswp->if_lockrw);
2717 }
2718 
2719 /* Compare VLAN ids, array size expected to be same. */
2720 static boolean_t
2721 vsw_cmp_vids(vsw_vlanid_t *vids1, vsw_vlanid_t *vids2, int nvids)
2722 {
2723 	int i, j;
2724 	uint16_t vid;
2725 
2726 	for (i = 0; i < nvids; i++) {
2727 		vid = vids1[i].vl_vid;
2728 		for (j = 0; j < nvids; j++) {
2729 			if (vid == vids2[i].vl_vid)
2730 				break;
2731 		}
2732 		if (j == nvids) {
2733 			return (B_FALSE);
2734 		}
2735 	}
2736 	return (B_TRUE);
2737 }
2738