xref: /illumos-gate/usr/src/uts/sun4v/io/vsw.c (revision dde769a2c00c82faaf80563ddd5610de2f4da339)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/errno.h>
29 #include <sys/debug.h>
30 #include <sys/time.h>
31 #include <sys/sysmacros.h>
32 #include <sys/systm.h>
33 #include <sys/user.h>
34 #include <sys/stropts.h>
35 #include <sys/stream.h>
36 #include <sys/strlog.h>
37 #include <sys/strsubr.h>
38 #include <sys/cmn_err.h>
39 #include <sys/cpu.h>
40 #include <sys/kmem.h>
41 #include <sys/conf.h>
42 #include <sys/ddi.h>
43 #include <sys/sunddi.h>
44 #include <sys/ksynch.h>
45 #include <sys/stat.h>
46 #include <sys/kstat.h>
47 #include <sys/vtrace.h>
48 #include <sys/strsun.h>
49 #include <sys/dlpi.h>
50 #include <sys/ethernet.h>
51 #include <net/if.h>
52 #include <sys/varargs.h>
53 #include <sys/machsystm.h>
54 #include <sys/modctl.h>
55 #include <sys/modhash.h>
56 #include <sys/mac_provider.h>
57 #include <sys/mac_ether.h>
58 #include <sys/taskq.h>
59 #include <sys/note.h>
60 #include <sys/mach_descrip.h>
61 #include <sys/mac_provider.h>
62 #include <sys/mdeg.h>
63 #include <sys/ldc.h>
64 #include <sys/vsw_fdb.h>
65 #include <sys/vsw.h>
66 #include <sys/vio_mailbox.h>
67 #include <sys/vnet_mailbox.h>
68 #include <sys/vnet_common.h>
69 #include <sys/vio_util.h>
70 #include <sys/sdt.h>
71 #include <sys/atomic.h>
72 #include <sys/callb.h>
73 #include <sys/vlan.h>
74 
75 /*
76  * Function prototypes.
77  */
78 static	int vsw_attach(dev_info_t *, ddi_attach_cmd_t);
79 static	int vsw_detach(dev_info_t *, ddi_detach_cmd_t);
80 static	int vsw_unattach(vsw_t *vswp);
81 static	int vsw_get_md_physname(vsw_t *, md_t *, mde_cookie_t, char *);
82 static	int vsw_get_md_smodes(vsw_t *, md_t *, mde_cookie_t, uint8_t *);
83 static	int vsw_mod_cleanup(void);
84 
85 /* MDEG routines */
86 static	int vsw_mdeg_register(vsw_t *vswp);
87 static	void vsw_mdeg_unregister(vsw_t *vswp);
88 static	int vsw_mdeg_cb(void *cb_argp, mdeg_result_t *);
89 static	int vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *);
90 static	int vsw_get_initial_md_properties(vsw_t *vswp, md_t *, mde_cookie_t);
91 static	int vsw_read_mdprops(vsw_t *vswp);
92 static	void vsw_vlan_read_ids(void *arg, int type, md_t *mdp,
93 	mde_cookie_t node, uint16_t *pvidp, vsw_vlanid_t **vidspp,
94 	uint16_t *nvidsp, uint16_t *default_idp);
95 static	int vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp,
96 	md_t *mdp, mde_cookie_t *node);
97 static	void vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp,
98 	mde_cookie_t node);
99 static	void vsw_mtu_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node,
100 	uint32_t *mtu);
101 static	int vsw_mtu_update(vsw_t *vswp, uint32_t mtu);
102 static	void vsw_linkprop_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node,
103 	boolean_t *pls);
104 static	void vsw_update_md_prop(vsw_t *, md_t *, mde_cookie_t);
105 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr);
106 static boolean_t vsw_cmp_vids(vsw_vlanid_t *vids1,
107 	vsw_vlanid_t *vids2, int nvids);
108 
109 /* Mac driver related routines */
110 static int vsw_mac_register(vsw_t *);
111 static int vsw_mac_unregister(vsw_t *);
112 static int vsw_m_stat(void *, uint_t, uint64_t *);
113 static void vsw_m_stop(void *arg);
114 static int vsw_m_start(void *arg);
115 static int vsw_m_unicst(void *arg, const uint8_t *);
116 static int vsw_m_multicst(void *arg, boolean_t, const uint8_t *);
117 static int vsw_m_promisc(void *arg, boolean_t);
118 static mblk_t *vsw_m_tx(void *arg, mblk_t *);
119 static void vsw_mac_link_update(vsw_t *vswp, link_state_t link_state);
120 void vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh,
121     mblk_t *mp, vsw_macrx_flags_t flags);
122 void vsw_physlink_state_update(vsw_t *vswp);
123 
124 /*
125  * Functions imported from other files.
126  */
127 extern void vsw_setup_switching_thread(void *arg);
128 extern int vsw_setup_switching_start(vsw_t *vswp);
129 extern void vsw_setup_switching_stop(vsw_t *vswp);
130 extern int vsw_setup_switching(vsw_t *);
131 extern void vsw_switch_frame_nop(vsw_t *vswp, mblk_t *mp, int caller,
132     vsw_port_t *port, mac_resource_handle_t mrh);
133 extern int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *);
134 extern int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *);
135 extern void vsw_del_mcst_vsw(vsw_t *);
136 extern mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr);
137 extern void vsw_detach_ports(vsw_t *vswp);
138 extern int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node);
139 extern int vsw_port_detach(vsw_t *vswp, int p_instance);
140 static int vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex,
141 	md_t *prev_mdp, mde_cookie_t prev_mdex);
142 extern	int vsw_port_attach(vsw_port_t *port);
143 extern vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance);
144 extern int vsw_mac_open(vsw_t *vswp);
145 extern void vsw_mac_close(vsw_t *vswp);
146 extern void vsw_mac_cleanup_ports(vsw_t *vswp);
147 extern void vsw_unset_addrs(vsw_t *vswp);
148 extern void vsw_setup_layer2_post_process(vsw_t *vswp);
149 extern void vsw_create_vlans(void *arg, int type);
150 extern void vsw_destroy_vlans(void *arg, int type);
151 extern void vsw_vlan_add_ids(void *arg, int type);
152 extern void vsw_vlan_remove_ids(void *arg, int type);
153 extern void vsw_vlan_unaware_port_reset(vsw_port_t *portp);
154 extern uint32_t vsw_vlan_frame_untag(void *arg, int type, mblk_t **np,
155 	mblk_t **npt);
156 extern mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp);
157 extern void vsw_hio_cleanup(vsw_t *vswp);
158 extern void vsw_hio_start_ports(vsw_t *vswp);
159 extern void vsw_hio_port_update(vsw_port_t *portp, boolean_t hio_enabled);
160 extern int vsw_mac_multicast_add(vsw_t *, vsw_port_t *, mcst_addr_t *, int);
161 extern void vsw_mac_multicast_remove(vsw_t *, vsw_port_t *, mcst_addr_t *, int);
162 extern void vsw_mac_port_reconfig_vlans(vsw_port_t *portp, uint16_t new_pvid,
163     vsw_vlanid_t *new_vids, int new_nvids);
164 extern int vsw_mac_client_init(vsw_t *vswp, vsw_port_t *port, int type);
165 extern void vsw_mac_client_cleanup(vsw_t *vswp, vsw_port_t *port, int type);
166 extern void vsw_if_mac_reconfig(vsw_t *vswp, boolean_t update_vlans,
167     uint16_t new_pvid, vsw_vlanid_t *new_vids, int new_nvids);
168 extern void vsw_reset_ports(vsw_t *vswp);
169 extern void vsw_port_reset(vsw_port_t *portp);
170 extern void vsw_physlink_update_ports(vsw_t *vswp);
171 void vsw_hio_port_update(vsw_port_t *portp, boolean_t hio_enabled);
172 
173 /*
174  * Internal tunables.
175  */
176 int	vsw_num_handshakes = VNET_NUM_HANDSHAKES; /* # of handshake attempts */
177 int	vsw_wretries = 100;		/* # of write attempts */
178 int	vsw_desc_delay = 0;		/* delay in us */
179 int	vsw_read_attempts = 5;		/* # of reads of descriptor */
180 int	vsw_setup_switching_delay = 3;	/* setup sw timeout interval in sec */
181 int	vsw_mac_open_retries = 300;	/* max # of mac_open() retries */
182 					/* 300*3 = 900sec(15min) of max tmout */
183 int	vsw_ldc_tx_delay = 5;		/* delay(ticks) for tx retries */
184 int	vsw_ldc_tx_retries = 10;	/* # of ldc tx retries */
185 int	vsw_ldc_retries = 5;		/* # of ldc_close() retries */
186 int	vsw_ldc_delay = 1000;		/* 1 ms delay for ldc_close() */
187 boolean_t vsw_ldc_rxthr_enabled = B_TRUE;	/* LDC Rx thread enabled */
188 boolean_t vsw_ldc_txthr_enabled = B_TRUE;	/* LDC Tx thread enabled */
189 
190 uint32_t	vsw_fdb_nchains = 8;	/* # of chains in fdb hash table */
191 uint32_t	vsw_vlan_nchains = 4;	/* # of chains in vlan id hash table */
192 uint32_t	vsw_ethermtu = 1500;	/* mtu of the device */
193 
194 /* delay in usec to wait for all references on a fdb entry to be dropped */
195 uint32_t vsw_fdbe_refcnt_delay = 10;
196 
197 /*
198  * Default vlan id. This is only used internally when the "default-vlan-id"
199  * property is not present in the MD device node. Therefore, this should not be
200  * used as a tunable; if this value is changed, the corresponding variable
201  * should be updated to the same value in all vnets connected to this vsw.
202  */
203 uint16_t	vsw_default_vlan_id = 1;
204 
205 /*
206  * Workaround for a version handshake bug in obp's vnet.
207  * If vsw initiates version negotiation starting from the highest version,
208  * obp sends a nack and terminates version handshake. To workaround
209  * this, we do not initiate version handshake when the channel comes up.
210  * Instead, we wait for the peer to send its version info msg and go through
211  * the version protocol exchange. If we successfully negotiate a version,
212  * before sending the ack, we send our version info msg to the peer
213  * using the <major,minor> version that we are about to ack.
214  */
215 boolean_t vsw_obp_ver_proto_workaround = B_TRUE;
216 
217 /*
218  * In the absence of "priority-ether-types" property in MD, the following
219  * internal tunable can be set to specify a single priority ethertype.
220  */
221 uint64_t vsw_pri_eth_type = 0;
222 
223 /*
224  * Number of transmit priority buffers that are preallocated per device.
225  * This number is chosen to be a small value to throttle transmission
226  * of priority packets. Note: Must be a power of 2 for vio_create_mblks().
227  */
228 uint32_t vsw_pri_tx_nmblks = 64;
229 
230 /*
231  * Number of RARP packets sent to announce macaddr to the physical switch,
232  * after vsw's physical device is changed dynamically or after a guest (client
233  * vnet) is live migrated in.
234  */
235 uint32_t vsw_publish_macaddr_count = 3;
236 
237 boolean_t vsw_hio_enabled = B_TRUE;	/* Enable/disable HybridIO */
238 int vsw_hio_max_cleanup_retries = 10;	/* Max retries for HybridIO cleanp */
239 int vsw_hio_cleanup_delay = 10000;	/* 10ms */
240 
241 /* Number of transmit descriptors -  must be power of 2 */
242 uint32_t vsw_ntxds = VSW_RING_NUM_EL;
243 
244 /*
245  * Max number of mblks received in one receive operation.
246  */
247 uint32_t vsw_chain_len = (VSW_NUM_MBLKS * 0.6);
248 
249 /*
250  * Internal tunables for receive buffer pools, that is,  the size and number of
251  * mblks for each pool. At least 3 sizes must be specified if these are used.
252  * The sizes must be specified in increasing order. Non-zero value of the first
253  * size will be used as a hint to use these values instead of the algorithm
254  * that determines the sizes based on MTU.
255  */
256 uint32_t vsw_mblk_size1 = 0;
257 uint32_t vsw_mblk_size2 = 0;
258 uint32_t vsw_mblk_size3 = 0;
259 uint32_t vsw_mblk_size4 = 0;
260 uint32_t vsw_num_mblks1 = VSW_NUM_MBLKS;	/* number of mblks for pool1 */
261 uint32_t vsw_num_mblks2 = VSW_NUM_MBLKS;	/* number of mblks for pool2 */
262 uint32_t vsw_num_mblks3 = VSW_NUM_MBLKS;	/* number of mblks for pool3 */
263 uint32_t vsw_num_mblks4 = VSW_NUM_MBLKS;	/* number of mblks for pool4 */
264 
265 /*
266  * Set this to non-zero to enable additional internal receive buffer pools
267  * based on the MTU of the device for better performance at the cost of more
268  * memory consumption. This is turned off by default, to use allocb(9F) for
269  * receive buffer allocations of sizes > 2K.
270  */
271 boolean_t vsw_jumbo_rxpools = B_FALSE;
272 
273 /*
274  * vsw_max_tx_qcount is the maximum # of packets that can be queued
275  * before the tx worker thread begins processing the queue. Its value
276  * is chosen to be 4x the default length of tx descriptor ring.
277  */
278 uint32_t vsw_max_tx_qcount = 4 * VSW_RING_NUM_EL;
279 
280 /*
281  * MAC callbacks
282  */
283 static	mac_callbacks_t	vsw_m_callbacks = {
284 	0,
285 	vsw_m_stat,
286 	vsw_m_start,
287 	vsw_m_stop,
288 	vsw_m_promisc,
289 	vsw_m_multicst,
290 	vsw_m_unicst,
291 	vsw_m_tx,
292 	NULL,
293 	NULL,
294 	NULL
295 };
296 
297 static	struct	cb_ops	vsw_cb_ops = {
298 	nulldev,			/* cb_open */
299 	nulldev,			/* cb_close */
300 	nodev,				/* cb_strategy */
301 	nodev,				/* cb_print */
302 	nodev,				/* cb_dump */
303 	nodev,				/* cb_read */
304 	nodev,				/* cb_write */
305 	nodev,				/* cb_ioctl */
306 	nodev,				/* cb_devmap */
307 	nodev,				/* cb_mmap */
308 	nodev,				/* cb_segmap */
309 	nochpoll,			/* cb_chpoll */
310 	ddi_prop_op,			/* cb_prop_op */
311 	NULL,				/* cb_stream */
312 	D_MP,				/* cb_flag */
313 	CB_REV,				/* rev */
314 	nodev,				/* int (*cb_aread)() */
315 	nodev				/* int (*cb_awrite)() */
316 };
317 
318 static	struct	dev_ops	vsw_ops = {
319 	DEVO_REV,		/* devo_rev */
320 	0,			/* devo_refcnt */
321 	NULL,			/* devo_getinfo */
322 	nulldev,		/* devo_identify */
323 	nulldev,		/* devo_probe */
324 	vsw_attach,		/* devo_attach */
325 	vsw_detach,		/* devo_detach */
326 	nodev,			/* devo_reset */
327 	&vsw_cb_ops,		/* devo_cb_ops */
328 	(struct bus_ops *)NULL,	/* devo_bus_ops */
329 	ddi_power		/* devo_power */
330 };
331 
332 extern	struct	mod_ops	mod_driverops;
333 static struct modldrv vswmodldrv = {
334 	&mod_driverops,
335 	"sun4v Virtual Switch",
336 	&vsw_ops,
337 };
338 
339 #define	LDC_ENTER_LOCK(ldcp)	\
340 				mutex_enter(&((ldcp)->ldc_cblock));\
341 				mutex_enter(&((ldcp)->ldc_rxlock));\
342 				mutex_enter(&((ldcp)->ldc_txlock));
343 #define	LDC_EXIT_LOCK(ldcp)	\
344 				mutex_exit(&((ldcp)->ldc_txlock));\
345 				mutex_exit(&((ldcp)->ldc_rxlock));\
346 				mutex_exit(&((ldcp)->ldc_cblock));
347 
348 /* Driver soft state ptr  */
349 static void	*vsw_state;
350 
351 /*
352  * Linked list of "vsw_t" structures - one per instance.
353  */
354 vsw_t		*vsw_head = NULL;
355 vio_mblk_pool_t	*vsw_rx_poolp = NULL;
356 krwlock_t	vsw_rw;
357 
358 /*
359  * Property names
360  */
361 static char vdev_propname[] = "virtual-device";
362 static char vsw_propname[] = "virtual-network-switch";
363 static char physdev_propname[] = "vsw-phys-dev";
364 static char smode_propname[] = "vsw-switch-mode";
365 static char macaddr_propname[] = "local-mac-address";
366 static char remaddr_propname[] = "remote-mac-address";
367 static char ldcids_propname[] = "ldc-ids";
368 static char chan_propname[] = "channel-endpoint";
369 static char id_propname[] = "id";
370 static char reg_propname[] = "reg";
371 static char pri_types_propname[] = "priority-ether-types";
372 static char vsw_pvid_propname[] = "port-vlan-id";
373 static char vsw_vid_propname[] = "vlan-id";
374 static char vsw_dvid_propname[] = "default-vlan-id";
375 static char port_pvid_propname[] = "remote-port-vlan-id";
376 static char port_vid_propname[] = "remote-vlan-id";
377 static char hybrid_propname[] = "hybrid";
378 static char vsw_mtu_propname[] = "mtu";
379 static char vsw_linkprop_propname[] = "linkprop";
380 
381 /*
382  * Matching criteria passed to the MDEG to register interest
383  * in changes to 'virtual-device-port' nodes identified by their
384  * 'id' property.
385  */
386 static md_prop_match_t vport_prop_match[] = {
387 	{ MDET_PROP_VAL,    "id"   },
388 	{ MDET_LIST_END,    NULL    }
389 };
390 
391 static mdeg_node_match_t vport_match = { "virtual-device-port",
392 						vport_prop_match };
393 
394 /*
395  * Matching criteria passed to the MDEG to register interest
396  * in changes to 'virtual-device' nodes (i.e. vsw nodes) identified
397  * by their 'name' and 'cfg-handle' properties.
398  */
399 static md_prop_match_t vdev_prop_match[] = {
400 	{ MDET_PROP_STR,    "name"   },
401 	{ MDET_PROP_VAL,    "cfg-handle" },
402 	{ MDET_LIST_END,    NULL    }
403 };
404 
405 static mdeg_node_match_t vdev_match = { "virtual-device",
406 						vdev_prop_match };
407 
408 
409 /*
410  * Specification of an MD node passed to the MDEG to filter any
411  * 'vport' nodes that do not belong to the specified node. This
412  * template is copied for each vsw instance and filled in with
413  * the appropriate 'cfg-handle' value before being passed to the MDEG.
414  */
415 static mdeg_prop_spec_t vsw_prop_template[] = {
416 	{ MDET_PROP_STR,    "name",		vsw_propname },
417 	{ MDET_PROP_VAL,    "cfg-handle",	NULL	},
418 	{ MDET_LIST_END,    NULL,		NULL	}
419 };
420 
421 #define	VSW_SET_MDEG_PROP_INST(specp, val)	(specp)[1].ps_val = (val);
422 
423 #ifdef	DEBUG
424 /*
425  * Print debug messages - set to 0x1f to enable all msgs
426  * or 0x0 to turn all off.
427  */
428 int vswdbg = 0x0;
429 
430 /*
431  * debug levels:
432  * 0x01:	Function entry/exit tracing
433  * 0x02:	Internal function messages
434  * 0x04:	Verbose internal messages
435  * 0x08:	Warning messages
436  * 0x10:	Error messages
437  */
438 
439 void
440 vswdebug(vsw_t *vswp, const char *fmt, ...)
441 {
442 	char buf[512];
443 	va_list ap;
444 
445 	va_start(ap, fmt);
446 	(void) vsprintf(buf, fmt, ap);
447 	va_end(ap);
448 
449 	if (vswp == NULL)
450 		cmn_err(CE_CONT, "%s\n", buf);
451 	else
452 		cmn_err(CE_CONT, "vsw%d: %s\n", vswp->instance, buf);
453 }
454 
455 #endif	/* DEBUG */
456 
457 static struct modlinkage modlinkage = {
458 	MODREV_1,
459 	&vswmodldrv,
460 	NULL
461 };
462 
463 int
464 _init(void)
465 {
466 	int status;
467 
468 	rw_init(&vsw_rw, NULL, RW_DRIVER, NULL);
469 
470 	status = ddi_soft_state_init(&vsw_state, sizeof (vsw_t), 1);
471 	if (status != 0) {
472 		return (status);
473 	}
474 
475 	mac_init_ops(&vsw_ops, DRV_NAME);
476 	status = mod_install(&modlinkage);
477 	if (status != 0) {
478 		ddi_soft_state_fini(&vsw_state);
479 	}
480 	return (status);
481 }
482 
483 int
484 _fini(void)
485 {
486 	int status;
487 
488 	status = vsw_mod_cleanup();
489 	if (status != 0)
490 		return (status);
491 
492 	status = mod_remove(&modlinkage);
493 	if (status != 0)
494 		return (status);
495 	mac_fini_ops(&vsw_ops);
496 	ddi_soft_state_fini(&vsw_state);
497 
498 	rw_destroy(&vsw_rw);
499 
500 	return (status);
501 }
502 
503 int
504 _info(struct modinfo *modinfop)
505 {
506 	return (mod_info(&modlinkage, modinfop));
507 }
508 
509 static int
510 vsw_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
511 {
512 	vsw_t			*vswp;
513 	int			instance;
514 	char			hashname[MAXNAMELEN];
515 	char			qname[TASKQ_NAMELEN];
516 	vsw_attach_progress_t	progress = PROG_init;
517 	int			rv;
518 
519 	switch (cmd) {
520 	case DDI_ATTACH:
521 		break;
522 	case DDI_RESUME:
523 		/* nothing to do for this non-device */
524 		return (DDI_SUCCESS);
525 	case DDI_PM_RESUME:
526 	default:
527 		return (DDI_FAILURE);
528 	}
529 
530 	instance = ddi_get_instance(dip);
531 	if (ddi_soft_state_zalloc(vsw_state, instance) != DDI_SUCCESS) {
532 		DERR(NULL, "vsw%d: ddi_soft_state_zalloc failed", instance);
533 		return (DDI_FAILURE);
534 	}
535 	vswp = ddi_get_soft_state(vsw_state, instance);
536 
537 	if (vswp == NULL) {
538 		DERR(NULL, "vsw%d: ddi_get_soft_state failed", instance);
539 		goto vsw_attach_fail;
540 	}
541 
542 	vswp->dip = dip;
543 	vswp->instance = instance;
544 	vswp->phys_link_state = LINK_STATE_UNKNOWN;
545 	ddi_set_driver_private(dip, (caddr_t)vswp);
546 
547 	mutex_init(&vswp->mac_lock, NULL, MUTEX_DRIVER, NULL);
548 	mutex_init(&vswp->mca_lock, NULL, MUTEX_DRIVER, NULL);
549 	mutex_init(&vswp->sw_thr_lock, NULL, MUTEX_DRIVER, NULL);
550 	cv_init(&vswp->sw_thr_cv, NULL, CV_DRIVER, NULL);
551 	rw_init(&vswp->maccl_rwlock, NULL, RW_DRIVER, NULL);
552 	rw_init(&vswp->if_lockrw, NULL, RW_DRIVER, NULL);
553 	rw_init(&vswp->mfdbrw, NULL, RW_DRIVER, NULL);
554 	rw_init(&vswp->plist.lockrw, NULL, RW_DRIVER, NULL);
555 
556 	progress |= PROG_locks;
557 
558 	rv = vsw_read_mdprops(vswp);
559 	if (rv != 0)
560 		goto vsw_attach_fail;
561 
562 	progress |= PROG_readmd;
563 
564 	/* setup the unicast forwarding database  */
565 	(void) snprintf(hashname, MAXNAMELEN, "vsw_unicst_table-%d",
566 	    vswp->instance);
567 	D2(vswp, "creating unicast hash table (%s)...", hashname);
568 	vswp->fdb_nchains = vsw_fdb_nchains;
569 	vswp->fdb_hashp = mod_hash_create_ptrhash(hashname, vswp->fdb_nchains,
570 	    mod_hash_null_valdtor, sizeof (void *));
571 	vsw_create_vlans((void *)vswp, VSW_LOCALDEV);
572 	progress |= PROG_fdb;
573 
574 	/* setup the multicast fowarding database */
575 	(void) snprintf(hashname, MAXNAMELEN, "vsw_mcst_table-%d",
576 	    vswp->instance);
577 	D2(vswp, "creating multicast hash table %s)...", hashname);
578 	vswp->mfdb = mod_hash_create_ptrhash(hashname, vsw_fdb_nchains,
579 	    mod_hash_null_valdtor, sizeof (void *));
580 
581 	progress |= PROG_mfdb;
582 
583 	/*
584 	 * Create the taskq which will process all the VIO
585 	 * control messages.
586 	 */
587 	(void) snprintf(qname, TASKQ_NAMELEN, "vsw_taskq%d", vswp->instance);
588 	if ((vswp->taskq_p = ddi_taskq_create(vswp->dip, qname, 1,
589 	    TASKQ_DEFAULTPRI, 0)) == NULL) {
590 		cmn_err(CE_WARN, "!vsw%d: Unable to create task queue",
591 		    vswp->instance);
592 		goto vsw_attach_fail;
593 	}
594 
595 	progress |= PROG_taskq;
596 
597 	/* prevent auto-detaching */
598 	if (ddi_prop_update_int(DDI_DEV_T_NONE, vswp->dip,
599 	    DDI_NO_AUTODETACH, 1) != DDI_SUCCESS) {
600 		cmn_err(CE_NOTE, "!Unable to set \"%s\" property for "
601 		    "instance %u", DDI_NO_AUTODETACH, instance);
602 	}
603 
604 	/*
605 	 * The null switching function is set to avoid panic until
606 	 * switch mode is setup.
607 	 */
608 	vswp->vsw_switch_frame = vsw_switch_frame_nop;
609 
610 	/*
611 	 * Setup the required switching mode, based on the mdprops that we read
612 	 * earlier. We start a thread to do this, to avoid calling mac_open()
613 	 * directly from attach().
614 	 */
615 	rv = vsw_setup_switching_start(vswp);
616 	if (rv != 0) {
617 		goto vsw_attach_fail;
618 	}
619 
620 	progress |= PROG_swmode;
621 
622 	/* Register with mac layer as a provider */
623 	rv = vsw_mac_register(vswp);
624 	if (rv != 0)
625 		goto vsw_attach_fail;
626 
627 	progress |= PROG_macreg;
628 
629 	/*
630 	 * Now we have everything setup, register an interest in
631 	 * specific MD nodes.
632 	 *
633 	 * The callback is invoked in 2 cases, firstly if upon mdeg
634 	 * registration there are existing nodes which match our specified
635 	 * criteria, and secondly if the MD is changed (and again, there
636 	 * are nodes which we are interested in present within it. Note
637 	 * that our callback will be invoked even if our specified nodes
638 	 * have not actually changed).
639 	 *
640 	 */
641 	rv = vsw_mdeg_register(vswp);
642 	if (rv != 0)
643 		goto vsw_attach_fail;
644 
645 	progress |= PROG_mdreg;
646 
647 	vswp->attach_progress = progress;
648 
649 	WRITE_ENTER(&vsw_rw);
650 	vswp->next = vsw_head;
651 	vsw_head = vswp;
652 	RW_EXIT(&vsw_rw);
653 
654 	ddi_report_dev(vswp->dip);
655 	return (DDI_SUCCESS);
656 
657 vsw_attach_fail:
658 	DERR(NULL, "vsw_attach: failed");
659 
660 	vswp->attach_progress = progress;
661 	(void) vsw_unattach(vswp);
662 	ddi_soft_state_free(vsw_state, instance);
663 	return (DDI_FAILURE);
664 }
665 
666 static int
667 vsw_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
668 {
669 	vsw_t			**vswpp, *vswp;
670 	int 			instance;
671 
672 	instance = ddi_get_instance(dip);
673 	vswp = ddi_get_soft_state(vsw_state, instance);
674 
675 	if (vswp == NULL) {
676 		return (DDI_FAILURE);
677 	}
678 
679 	switch (cmd) {
680 	case DDI_DETACH:
681 		break;
682 	case DDI_SUSPEND:
683 	case DDI_PM_SUSPEND:
684 	default:
685 		return (DDI_FAILURE);
686 	}
687 
688 	D2(vswp, "detaching instance %d", instance);
689 
690 	if (vsw_unattach(vswp) != 0) {
691 		return (DDI_FAILURE);
692 	}
693 
694 	ddi_remove_minor_node(dip, NULL);
695 
696 	WRITE_ENTER(&vsw_rw);
697 	for (vswpp = &vsw_head; *vswpp; vswpp = &(*vswpp)->next) {
698 		if (*vswpp == vswp) {
699 			*vswpp = vswp->next;
700 			break;
701 		}
702 	}
703 	RW_EXIT(&vsw_rw);
704 
705 	ddi_soft_state_free(vsw_state, instance);
706 
707 	return (DDI_SUCCESS);
708 }
709 
710 /*
711  * Common routine to handle vsw_attach() failure and vsw_detach(). Note that
712  * the only reason this function could fail is if mac_unregister() fails.
713  * Otherwise, this function must ensure that all resources are freed and return
714  * success.
715  */
716 static int
717 vsw_unattach(vsw_t *vswp)
718 {
719 	vio_mblk_pool_t		*poolp, *npoolp;
720 	vsw_attach_progress_t	progress;
721 
722 	progress = vswp->attach_progress;
723 
724 	/*
725 	 * Unregister from the gldv3 subsystem. This can fail, in particular
726 	 * if there are still any open references to this mac device; in which
727 	 * case we just return failure without continuing to detach further.
728 	 */
729 	if (progress & PROG_macreg) {
730 		if (vsw_mac_unregister(vswp) != 0) {
731 			cmn_err(CE_WARN, "!vsw%d: Unable to detach from "
732 			    "MAC layer", vswp->instance);
733 			return (1);
734 		}
735 		progress &= ~PROG_macreg;
736 	}
737 
738 	/*
739 	 * Now that we have unregistered from gldv3, we must finish all other
740 	 * steps and successfully return from this function; otherwise we will
741 	 * end up leaving the device in a broken/unusable state.
742 	 *
743 	 * If we have registered with mdeg, unregister now to stop further
744 	 * callbacks to this vsw device and/or its ports. Then, detach any
745 	 * existing ports.
746 	 */
747 	if (progress & PROG_mdreg) {
748 		vsw_mdeg_unregister(vswp);
749 		vsw_detach_ports(vswp);
750 
751 		/*
752 		 * At this point, we attempt to free receive mblk pools that
753 		 * couldn't be destroyed when the ports were detached; if this
754 		 * attempt also fails, we hook up the pool(s) to the module so
755 		 * they can be cleaned up in _fini().
756 		 */
757 		poolp = vswp->rxh;
758 		while (poolp != NULL) {
759 			npoolp = vswp->rxh = poolp->nextp;
760 			if (vio_destroy_mblks(poolp) != 0) {
761 				WRITE_ENTER(&vsw_rw);
762 				poolp->nextp = vsw_rx_poolp;
763 				vsw_rx_poolp = poolp;
764 				RW_EXIT(&vsw_rw);
765 			}
766 			poolp = npoolp;
767 		}
768 		progress &= ~PROG_mdreg;
769 	}
770 
771 	/*
772 	 * If we have started a thread to setup the switching mode, stop it, if
773 	 * it is still running. If it has finished setting up the switching
774 	 * mode, then we need to clean up some additional things if we are
775 	 * running in L2 mode: first free up any hybrid resources; then stop
776 	 * and close the underlying physical device. Note that we would have
777 	 * already released all per mac_client resources (ucast, mcast addrs,
778 	 * hio-shares etc) as all the ports are detached and if the vsw device
779 	 * itself was in use as an interface, it has been unplumbed (otherwise
780 	 * mac_unregister() above would fail).
781 	 */
782 	if (progress & PROG_swmode) {
783 
784 		vsw_setup_switching_stop(vswp);
785 
786 		if (vswp->hio_capable == B_TRUE) {
787 			vsw_hio_cleanup(vswp);
788 			vswp->hio_capable = B_FALSE;
789 		}
790 
791 		mutex_enter(&vswp->mac_lock);
792 		vsw_mac_close(vswp);
793 		mutex_exit(&vswp->mac_lock);
794 
795 		progress &= ~PROG_swmode;
796 	}
797 
798 	/*
799 	 * By now any pending tasks have finished and the underlying
800 	 * ldc's have been destroyed, so its safe to delete the control
801 	 * message taskq.
802 	 */
803 	if (progress & PROG_taskq) {
804 		ddi_taskq_destroy(vswp->taskq_p);
805 		progress &= ~PROG_taskq;
806 	}
807 
808 	/* Destroy the multicast hash table */
809 	if (progress & PROG_mfdb) {
810 		mod_hash_destroy_hash(vswp->mfdb);
811 		progress &= ~PROG_mfdb;
812 	}
813 
814 	/* Destroy the vlan hash table and fdb */
815 	if (progress & PROG_fdb) {
816 		vsw_destroy_vlans(vswp, VSW_LOCALDEV);
817 		mod_hash_destroy_hash(vswp->fdb_hashp);
818 		progress &= ~PROG_fdb;
819 	}
820 
821 	if (progress & PROG_readmd) {
822 		if (VSW_PRI_ETH_DEFINED(vswp)) {
823 			kmem_free(vswp->pri_types,
824 			    sizeof (uint16_t) * vswp->pri_num_types);
825 			(void) vio_destroy_mblks(vswp->pri_tx_vmp);
826 		}
827 		progress &= ~PROG_readmd;
828 	}
829 
830 	if (progress & PROG_locks) {
831 		rw_destroy(&vswp->plist.lockrw);
832 		rw_destroy(&vswp->mfdbrw);
833 		rw_destroy(&vswp->if_lockrw);
834 		rw_destroy(&vswp->maccl_rwlock);
835 		cv_destroy(&vswp->sw_thr_cv);
836 		mutex_destroy(&vswp->sw_thr_lock);
837 		mutex_destroy(&vswp->mca_lock);
838 		mutex_destroy(&vswp->mac_lock);
839 		progress &= ~PROG_locks;
840 	}
841 
842 	vswp->attach_progress = progress;
843 
844 	return (0);
845 }
846 
847 /*
848  * one time cleanup.
849  */
850 static int
851 vsw_mod_cleanup(void)
852 {
853 	vio_mblk_pool_t		*poolp, *npoolp;
854 
855 	/*
856 	 * If any rx mblk pools are still in use, return
857 	 * error and stop the module from unloading.
858 	 */
859 	WRITE_ENTER(&vsw_rw);
860 	poolp = vsw_rx_poolp;
861 	while (poolp != NULL) {
862 		npoolp = vsw_rx_poolp = poolp->nextp;
863 		if (vio_destroy_mblks(poolp) != 0) {
864 			vsw_rx_poolp = poolp;
865 			RW_EXIT(&vsw_rw);
866 			return (EBUSY);
867 		}
868 		poolp = npoolp;
869 	}
870 	RW_EXIT(&vsw_rw);
871 
872 	return (0);
873 }
874 
875 /*
876  * Get the value of the "vsw-phys-dev" property in the specified
877  * node. This property is the name of the physical device that
878  * the virtual switch will use to talk to the outside world.
879  *
880  * Note it is valid for this property to be NULL (but the property
881  * itself must exist). Callers of this routine should verify that
882  * the value returned is what they expected (i.e. either NULL or non NULL).
883  *
884  * On success returns value of the property in region pointed to by
885  * the 'name' argument, and with return value of 0. Otherwise returns 1.
886  */
887 static int
888 vsw_get_md_physname(vsw_t *vswp, md_t *mdp, mde_cookie_t node, char *name)
889 {
890 	int		len = 0;
891 	int		instance;
892 	char		*physname = NULL;
893 	char		*dev;
894 	const char	*dev_name;
895 	char		myname[MAXNAMELEN];
896 
897 	dev_name = ddi_driver_name(vswp->dip);
898 	instance = ddi_get_instance(vswp->dip);
899 	(void) snprintf(myname, MAXNAMELEN, "%s%d", dev_name, instance);
900 
901 	if (md_get_prop_data(mdp, node, physdev_propname,
902 	    (uint8_t **)(&physname), &len) != 0) {
903 		cmn_err(CE_WARN, "!vsw%d: Unable to get name(s) of physical "
904 		    "device(s) from MD", vswp->instance);
905 		return (1);
906 	} else if ((strlen(physname) + 1) > LIFNAMSIZ) {
907 		cmn_err(CE_WARN, "!vsw%d: %s is too long a device name",
908 		    vswp->instance, physname);
909 		return (1);
910 	} else if (strcmp(myname, physname) == 0) {
911 		/*
912 		 * Prevent the vswitch from opening itself as the
913 		 * network device.
914 		 */
915 		cmn_err(CE_WARN, "!vsw%d: %s is an invalid device name",
916 		    vswp->instance, physname);
917 		return (1);
918 	} else {
919 		(void) strncpy(name, physname, strlen(physname) + 1);
920 		D2(vswp, "%s: using first device specified (%s)",
921 		    __func__, physname);
922 	}
923 
924 #ifdef DEBUG
925 	/*
926 	 * As a temporary measure to aid testing we check to see if there
927 	 * is a vsw.conf file present. If there is we use the value of the
928 	 * vsw_physname property in the file as the name of the physical
929 	 * device, overriding the value from the MD.
930 	 *
931 	 * There may be multiple devices listed, but for the moment
932 	 * we just use the first one.
933 	 */
934 	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vswp->dip, 0,
935 	    "vsw_physname", &dev) == DDI_PROP_SUCCESS) {
936 		if ((strlen(dev) + 1) > LIFNAMSIZ) {
937 			cmn_err(CE_WARN, "vsw%d: %s is too long a device name",
938 			    vswp->instance, dev);
939 			ddi_prop_free(dev);
940 			return (1);
941 		} else {
942 			cmn_err(CE_NOTE, "vsw%d: Using device name (%s) from "
943 			    "config file", vswp->instance, dev);
944 
945 			(void) strncpy(name, dev, strlen(dev) + 1);
946 		}
947 
948 		ddi_prop_free(dev);
949 	}
950 #endif
951 
952 	return (0);
953 }
954 
955 /*
956  * Read the 'vsw-switch-mode' property from the specified MD node.
957  *
958  * Returns 0 on success, otherwise returns 1.
959  */
960 static int
961 vsw_get_md_smodes(vsw_t *vswp, md_t *mdp, mde_cookie_t node, uint8_t *mode)
962 {
963 	int		len = 0;
964 	char		*smode = NULL;
965 	char		*curr_mode = NULL;
966 
967 	D1(vswp, "%s: enter", __func__);
968 
969 	/*
970 	 * Get the switch-mode property. The modes are listed in
971 	 * decreasing order of preference, i.e. prefered mode is
972 	 * first item in list.
973 	 */
974 	len = 0;
975 	if (md_get_prop_data(mdp, node, smode_propname,
976 	    (uint8_t **)(&smode), &len) != 0) {
977 		/*
978 		 * Unable to get switch-mode property from MD, nothing
979 		 * more we can do.
980 		 */
981 		cmn_err(CE_WARN, "!vsw%d: Unable to get switch mode property"
982 		    " from the MD", vswp->instance);
983 		return (1);
984 	}
985 
986 	curr_mode = smode;
987 	/*
988 	 * Modes of operation:
989 	 * 'switched'	 - layer 2 switching, underlying HW in
990 	 *			programmed mode.
991 	 * 'promiscuous' - layer 2 switching, underlying HW in
992 	 *			promiscuous mode.
993 	 * 'routed'	 - layer 3 (i.e. IP) routing, underlying HW
994 	 *			in non-promiscuous mode.
995 	 */
996 	while (curr_mode < (smode + len)) {
997 		D2(vswp, "%s: curr_mode = [%s]", __func__, curr_mode);
998 		if (strcmp(curr_mode, "switched") == 0) {
999 			*mode = VSW_LAYER2;
1000 		} else if (strcmp(curr_mode, "promiscuous") == 0) {
1001 			*mode = VSW_LAYER2 | VSW_LAYER2_PROMISC;
1002 		} else if (strcmp(curr_mode, "routed") == 0) {
1003 			*mode = VSW_LAYER3;
1004 		} else {
1005 			cmn_err(CE_WARN, "!vsw%d: Unknown switch mode %s, "
1006 			    "setting to default switched mode",
1007 			    vswp->instance, curr_mode);
1008 			*mode = VSW_LAYER2;
1009 		}
1010 		curr_mode += strlen(curr_mode) + 1;
1011 	}
1012 
1013 	D2(vswp, "%s: %d mode", __func__, *mode);
1014 
1015 	D1(vswp, "%s: exit", __func__);
1016 
1017 	return (0);
1018 }
1019 
1020 /*
1021  * Register with the MAC layer as a network device, so we
1022  * can be plumbed if necessary.
1023  */
1024 static int
1025 vsw_mac_register(vsw_t *vswp)
1026 {
1027 	mac_register_t	*macp;
1028 	int		rv;
1029 
1030 	D1(vswp, "%s: enter", __func__);
1031 
1032 	if ((macp = mac_alloc(MAC_VERSION)) == NULL)
1033 		return (EINVAL);
1034 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1035 	macp->m_driver = vswp;
1036 	macp->m_dip = vswp->dip;
1037 	macp->m_src_addr = (uint8_t *)&vswp->if_addr;
1038 	macp->m_callbacks = &vsw_m_callbacks;
1039 	macp->m_min_sdu = 0;
1040 	macp->m_max_sdu = vswp->mtu;
1041 	macp->m_margin = VLAN_TAGSZ;
1042 	rv = mac_register(macp, &vswp->if_mh);
1043 	mac_free(macp);
1044 	if (rv != 0) {
1045 		/*
1046 		 * Treat this as a non-fatal error as we may be
1047 		 * able to operate in some other mode.
1048 		 */
1049 		cmn_err(CE_NOTE, "!vsw%d: Unable to register as "
1050 		    "a provider with MAC layer", vswp->instance);
1051 		return (rv);
1052 	}
1053 
1054 	vswp->if_state |= VSW_IF_REG;
1055 
1056 	D1(vswp, "%s: exit", __func__);
1057 
1058 	return (rv);
1059 }
1060 
1061 static int
1062 vsw_mac_unregister(vsw_t *vswp)
1063 {
1064 	int		rv = 0;
1065 
1066 	D1(vswp, "%s: enter", __func__);
1067 
1068 	WRITE_ENTER(&vswp->if_lockrw);
1069 
1070 	if (vswp->if_state & VSW_IF_REG) {
1071 		rv = mac_unregister(vswp->if_mh);
1072 		if (rv != 0) {
1073 			DWARN(vswp, "%s: unable to unregister from MAC "
1074 			    "framework", __func__);
1075 
1076 			RW_EXIT(&vswp->if_lockrw);
1077 			D1(vswp, "%s: fail exit", __func__);
1078 			return (rv);
1079 		}
1080 
1081 		/* mark i/f as down and unregistered */
1082 		vswp->if_state &= ~(VSW_IF_UP | VSW_IF_REG);
1083 	}
1084 	RW_EXIT(&vswp->if_lockrw);
1085 
1086 	D1(vswp, "%s: exit", __func__);
1087 
1088 	return (rv);
1089 }
1090 
1091 static int
1092 vsw_m_stat(void *arg, uint_t stat, uint64_t *val)
1093 {
1094 	vsw_t			*vswp = (vsw_t *)arg;
1095 
1096 	D1(vswp, "%s: enter", __func__);
1097 
1098 	mutex_enter(&vswp->mac_lock);
1099 	if (vswp->mh == NULL) {
1100 		mutex_exit(&vswp->mac_lock);
1101 		return (EINVAL);
1102 	}
1103 
1104 	/* return stats from underlying device */
1105 	*val = mac_stat_get(vswp->mh, stat);
1106 
1107 	mutex_exit(&vswp->mac_lock);
1108 
1109 	return (0);
1110 }
1111 
1112 static void
1113 vsw_m_stop(void *arg)
1114 {
1115 	vsw_t	*vswp = (vsw_t *)arg;
1116 
1117 	D1(vswp, "%s: enter", __func__);
1118 
1119 	WRITE_ENTER(&vswp->if_lockrw);
1120 	vswp->if_state &= ~VSW_IF_UP;
1121 	RW_EXIT(&vswp->if_lockrw);
1122 
1123 	/* Cleanup and close the mac client */
1124 	vsw_mac_client_cleanup(vswp, NULL, VSW_LOCALDEV);
1125 
1126 	D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state);
1127 }
1128 
1129 static int
1130 vsw_m_start(void *arg)
1131 {
1132 	int		rv;
1133 	vsw_t		*vswp = (vsw_t *)arg;
1134 
1135 	D1(vswp, "%s: enter", __func__);
1136 
1137 	WRITE_ENTER(&vswp->if_lockrw);
1138 
1139 	vswp->if_state |= VSW_IF_UP;
1140 
1141 	if (vswp->switching_setup_done == B_FALSE) {
1142 		/*
1143 		 * If the switching mode has not been setup yet, just
1144 		 * return. The unicast address will be programmed
1145 		 * after the physical device is successfully setup by the
1146 		 * timeout handler.
1147 		 */
1148 		RW_EXIT(&vswp->if_lockrw);
1149 		return (0);
1150 	}
1151 
1152 	/* if in layer2 mode, program unicast address. */
1153 	if (vswp->mh != NULL) {
1154 		/* Init a mac client and program addresses */
1155 		rv = vsw_mac_client_init(vswp, NULL, VSW_LOCALDEV);
1156 		if (rv != 0) {
1157 			cmn_err(CE_NOTE,
1158 			    "!vsw%d: failed to program interface "
1159 			    "unicast address\n", vswp->instance);
1160 		}
1161 	}
1162 
1163 	RW_EXIT(&vswp->if_lockrw);
1164 
1165 	D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state);
1166 	return (0);
1167 }
1168 
1169 /*
1170  * Change the local interface address.
1171  *
1172  * Note: we don't support this entry point. The local
1173  * mac address of the switch can only be changed via its
1174  * MD node properties.
1175  */
1176 static int
1177 vsw_m_unicst(void *arg, const uint8_t *macaddr)
1178 {
1179 	_NOTE(ARGUNUSED(arg, macaddr))
1180 
1181 	return (DDI_FAILURE);
1182 }
1183 
1184 static int
1185 vsw_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
1186 {
1187 	vsw_t		*vswp = (vsw_t *)arg;
1188 	mcst_addr_t	*mcst_p = NULL;
1189 	uint64_t	addr = 0x0;
1190 	int		i, ret = 0;
1191 
1192 	D1(vswp, "%s: enter", __func__);
1193 
1194 	/*
1195 	 * Convert address into form that can be used
1196 	 * as hash table key.
1197 	 */
1198 	for (i = 0; i < ETHERADDRL; i++) {
1199 		addr = (addr << 8) | mca[i];
1200 	}
1201 
1202 	D2(vswp, "%s: addr = 0x%llx", __func__, addr);
1203 
1204 	if (add) {
1205 		D2(vswp, "%s: adding multicast", __func__);
1206 		if (vsw_add_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) {
1207 			/*
1208 			 * Update the list of multicast addresses
1209 			 * contained within the vsw_t structure to
1210 			 * include this new one.
1211 			 */
1212 			mcst_p = kmem_zalloc(sizeof (mcst_addr_t), KM_NOSLEEP);
1213 			if (mcst_p == NULL) {
1214 				DERR(vswp, "%s unable to alloc mem", __func__);
1215 				(void) vsw_del_mcst(vswp,
1216 				    VSW_LOCALDEV, addr, NULL);
1217 				return (1);
1218 			}
1219 			mcst_p->addr = addr;
1220 			ether_copy(mca, &mcst_p->mca);
1221 
1222 			/*
1223 			 * Call into the underlying driver to program the
1224 			 * address into HW.
1225 			 */
1226 			ret = vsw_mac_multicast_add(vswp, NULL, mcst_p,
1227 			    VSW_LOCALDEV);
1228 			if (ret != 0) {
1229 				(void) vsw_del_mcst(vswp,
1230 				    VSW_LOCALDEV, addr, NULL);
1231 				kmem_free(mcst_p, sizeof (*mcst_p));
1232 				return (ret);
1233 			}
1234 
1235 			mutex_enter(&vswp->mca_lock);
1236 			mcst_p->nextp = vswp->mcap;
1237 			vswp->mcap = mcst_p;
1238 			mutex_exit(&vswp->mca_lock);
1239 		} else {
1240 			cmn_err(CE_WARN, "!vsw%d: unable to add multicast "
1241 			    "address", vswp->instance);
1242 		}
1243 		return (ret);
1244 	}
1245 
1246 	D2(vswp, "%s: removing multicast", __func__);
1247 	/*
1248 	 * Remove the address from the hash table..
1249 	 */
1250 	if (vsw_del_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) {
1251 
1252 		/*
1253 		 * ..and then from the list maintained in the
1254 		 * vsw_t structure.
1255 		 */
1256 		mcst_p = vsw_del_addr(VSW_LOCALDEV, vswp, addr);
1257 		ASSERT(mcst_p != NULL);
1258 
1259 		vsw_mac_multicast_remove(vswp, NULL, mcst_p, VSW_LOCALDEV);
1260 		kmem_free(mcst_p, sizeof (*mcst_p));
1261 	}
1262 
1263 	D1(vswp, "%s: exit", __func__);
1264 
1265 	return (0);
1266 }
1267 
1268 static int
1269 vsw_m_promisc(void *arg, boolean_t on)
1270 {
1271 	vsw_t		*vswp = (vsw_t *)arg;
1272 
1273 	D1(vswp, "%s: enter", __func__);
1274 
1275 	WRITE_ENTER(&vswp->if_lockrw);
1276 	if (on)
1277 		vswp->if_state |= VSW_IF_PROMISC;
1278 	else
1279 		vswp->if_state &= ~VSW_IF_PROMISC;
1280 	RW_EXIT(&vswp->if_lockrw);
1281 
1282 	D1(vswp, "%s: exit", __func__);
1283 
1284 	return (0);
1285 }
1286 
1287 static mblk_t *
1288 vsw_m_tx(void *arg, mblk_t *mp)
1289 {
1290 	vsw_t		*vswp = (vsw_t *)arg;
1291 
1292 	D1(vswp, "%s: enter", __func__);
1293 
1294 	mp = vsw_vlan_frame_pretag(vswp, VSW_LOCALDEV, mp);
1295 
1296 	if (mp == NULL) {
1297 		return (NULL);
1298 	}
1299 
1300 	vswp->vsw_switch_frame(vswp, mp, VSW_LOCALDEV, NULL, NULL);
1301 
1302 	D1(vswp, "%s: exit", __func__);
1303 
1304 	return (NULL);
1305 }
1306 
1307 /*
1308  * Register for machine description (MD) updates.
1309  *
1310  * Returns 0 on success, 1 on failure.
1311  */
1312 static int
1313 vsw_mdeg_register(vsw_t *vswp)
1314 {
1315 	mdeg_prop_spec_t	*pspecp;
1316 	mdeg_node_spec_t	*inst_specp;
1317 	mdeg_handle_t		mdeg_hdl, mdeg_port_hdl;
1318 	size_t			templatesz;
1319 	int			rv;
1320 
1321 	D1(vswp, "%s: enter", __func__);
1322 
1323 	/*
1324 	 * Allocate and initialize a per-instance copy
1325 	 * of the global property spec array that will
1326 	 * uniquely identify this vsw instance.
1327 	 */
1328 	templatesz = sizeof (vsw_prop_template);
1329 	pspecp = kmem_zalloc(templatesz, KM_SLEEP);
1330 
1331 	bcopy(vsw_prop_template, pspecp, templatesz);
1332 
1333 	VSW_SET_MDEG_PROP_INST(pspecp, vswp->regprop);
1334 
1335 	/* initialize the complete prop spec structure */
1336 	inst_specp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP);
1337 	inst_specp->namep = "virtual-device";
1338 	inst_specp->specp = pspecp;
1339 
1340 	D2(vswp, "%s: instance %d registering with mdeg", __func__,
1341 	    vswp->regprop);
1342 	/*
1343 	 * Register an interest in 'virtual-device' nodes with a
1344 	 * 'name' property of 'virtual-network-switch'
1345 	 */
1346 	rv = mdeg_register(inst_specp, &vdev_match, vsw_mdeg_cb,
1347 	    (void *)vswp, &mdeg_hdl);
1348 	if (rv != MDEG_SUCCESS) {
1349 		DERR(vswp, "%s: mdeg_register failed (%d) for vsw node",
1350 		    __func__, rv);
1351 		goto mdeg_reg_fail;
1352 	}
1353 
1354 	/*
1355 	 * Register an interest in 'vsw-port' nodes.
1356 	 */
1357 	rv = mdeg_register(inst_specp, &vport_match, vsw_port_mdeg_cb,
1358 	    (void *)vswp, &mdeg_port_hdl);
1359 	if (rv != MDEG_SUCCESS) {
1360 		DERR(vswp, "%s: mdeg_register failed (%d)\n", __func__, rv);
1361 		(void) mdeg_unregister(mdeg_hdl);
1362 		goto mdeg_reg_fail;
1363 	}
1364 
1365 	/* save off data that will be needed later */
1366 	vswp->inst_spec = inst_specp;
1367 	vswp->mdeg_hdl = mdeg_hdl;
1368 	vswp->mdeg_port_hdl = mdeg_port_hdl;
1369 
1370 	D1(vswp, "%s: exit", __func__);
1371 	return (0);
1372 
1373 mdeg_reg_fail:
1374 	cmn_err(CE_WARN, "!vsw%d: Unable to register MDEG callbacks",
1375 	    vswp->instance);
1376 	kmem_free(pspecp, templatesz);
1377 	kmem_free(inst_specp, sizeof (mdeg_node_spec_t));
1378 
1379 	vswp->mdeg_hdl = NULL;
1380 	vswp->mdeg_port_hdl = NULL;
1381 
1382 	return (1);
1383 }
1384 
1385 static void
1386 vsw_mdeg_unregister(vsw_t *vswp)
1387 {
1388 	D1(vswp, "vsw_mdeg_unregister: enter");
1389 
1390 	if (vswp->mdeg_hdl != NULL)
1391 		(void) mdeg_unregister(vswp->mdeg_hdl);
1392 
1393 	if (vswp->mdeg_port_hdl != NULL)
1394 		(void) mdeg_unregister(vswp->mdeg_port_hdl);
1395 
1396 	if (vswp->inst_spec != NULL) {
1397 		if (vswp->inst_spec->specp != NULL) {
1398 			(void) kmem_free(vswp->inst_spec->specp,
1399 			    sizeof (vsw_prop_template));
1400 			vswp->inst_spec->specp = NULL;
1401 		}
1402 
1403 		(void) kmem_free(vswp->inst_spec, sizeof (mdeg_node_spec_t));
1404 		vswp->inst_spec = NULL;
1405 	}
1406 
1407 	D1(vswp, "vsw_mdeg_unregister: exit");
1408 }
1409 
1410 /*
1411  * Mdeg callback invoked for the vsw node itself.
1412  */
1413 static int
1414 vsw_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1415 {
1416 	vsw_t		*vswp;
1417 	md_t		*mdp;
1418 	mde_cookie_t	node;
1419 	uint64_t	inst;
1420 	char		*node_name = NULL;
1421 
1422 	if (resp == NULL)
1423 		return (MDEG_FAILURE);
1424 
1425 	vswp = (vsw_t *)cb_argp;
1426 
1427 	D1(vswp, "%s: added %d : removed %d : curr matched %d"
1428 	    " : prev matched %d", __func__, resp->added.nelem,
1429 	    resp->removed.nelem, resp->match_curr.nelem,
1430 	    resp->match_prev.nelem);
1431 
1432 	/*
1433 	 * We get an initial callback for this node as 'added'
1434 	 * after registering with mdeg. Note that we would have
1435 	 * already gathered information about this vsw node by
1436 	 * walking MD earlier during attach (in vsw_read_mdprops()).
1437 	 * So, there is a window where the properties of this
1438 	 * node might have changed when we get this initial 'added'
1439 	 * callback. We handle this as if an update occured
1440 	 * and invoke the same function which handles updates to
1441 	 * the properties of this vsw-node if any.
1442 	 *
1443 	 * A non-zero 'match' value indicates that the MD has been
1444 	 * updated and that a virtual-network-switch node is
1445 	 * present which may or may not have been updated. It is
1446 	 * up to the clients to examine their own nodes and
1447 	 * determine if they have changed.
1448 	 */
1449 	if (resp->added.nelem != 0) {
1450 
1451 		if (resp->added.nelem != 1) {
1452 			cmn_err(CE_NOTE, "!vsw%d: number of nodes added "
1453 			    "invalid: %d\n", vswp->instance, resp->added.nelem);
1454 			return (MDEG_FAILURE);
1455 		}
1456 
1457 		mdp = resp->added.mdp;
1458 		node = resp->added.mdep[0];
1459 
1460 	} else if (resp->match_curr.nelem != 0) {
1461 
1462 		if (resp->match_curr.nelem != 1) {
1463 			cmn_err(CE_NOTE, "!vsw%d: number of nodes updated "
1464 			    "invalid: %d\n", vswp->instance,
1465 			    resp->match_curr.nelem);
1466 			return (MDEG_FAILURE);
1467 		}
1468 
1469 		mdp = resp->match_curr.mdp;
1470 		node = resp->match_curr.mdep[0];
1471 
1472 	} else {
1473 		return (MDEG_FAILURE);
1474 	}
1475 
1476 	/* Validate name and instance */
1477 	if (md_get_prop_str(mdp, node, "name", &node_name) != 0) {
1478 		DERR(vswp, "%s: unable to get node name\n",  __func__);
1479 		return (MDEG_FAILURE);
1480 	}
1481 
1482 	/* is this a virtual-network-switch? */
1483 	if (strcmp(node_name, vsw_propname) != 0) {
1484 		DERR(vswp, "%s: Invalid node name: %s\n",
1485 		    __func__, node_name);
1486 		return (MDEG_FAILURE);
1487 	}
1488 
1489 	if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) {
1490 		DERR(vswp, "%s: prop(cfg-handle) not found\n",
1491 		    __func__);
1492 		return (MDEG_FAILURE);
1493 	}
1494 
1495 	/* is this the right instance of vsw? */
1496 	if (inst != vswp->regprop) {
1497 		DERR(vswp, "%s: Invalid cfg-handle: %lx\n",
1498 		    __func__, inst);
1499 		return (MDEG_FAILURE);
1500 	}
1501 
1502 	vsw_update_md_prop(vswp, mdp, node);
1503 
1504 	return (MDEG_SUCCESS);
1505 }
1506 
1507 /*
1508  * Mdeg callback invoked for changes to the vsw-port nodes
1509  * under the vsw node.
1510  */
1511 static int
1512 vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1513 {
1514 	vsw_t		*vswp;
1515 	int		idx;
1516 	md_t		*mdp;
1517 	mde_cookie_t	node;
1518 	uint64_t	inst;
1519 	int		rv;
1520 
1521 	if ((resp == NULL) || (cb_argp == NULL))
1522 		return (MDEG_FAILURE);
1523 
1524 	vswp = (vsw_t *)cb_argp;
1525 
1526 	D2(vswp, "%s: added %d : removed %d : curr matched %d"
1527 	    " : prev matched %d", __func__, resp->added.nelem,
1528 	    resp->removed.nelem, resp->match_curr.nelem,
1529 	    resp->match_prev.nelem);
1530 
1531 	/* process added ports */
1532 	for (idx = 0; idx < resp->added.nelem; idx++) {
1533 		mdp = resp->added.mdp;
1534 		node = resp->added.mdep[idx];
1535 
1536 		D2(vswp, "%s: adding node(%d) 0x%lx", __func__, idx, node);
1537 
1538 		if ((rv = vsw_port_add(vswp, mdp, &node)) != 0) {
1539 			cmn_err(CE_WARN, "!vsw%d: Unable to add new port "
1540 			    "(0x%lx), err=%d", vswp->instance, node, rv);
1541 		}
1542 	}
1543 
1544 	/* process removed ports */
1545 	for (idx = 0; idx < resp->removed.nelem; idx++) {
1546 		mdp = resp->removed.mdp;
1547 		node = resp->removed.mdep[idx];
1548 
1549 		if (md_get_prop_val(mdp, node, id_propname, &inst)) {
1550 			DERR(vswp, "%s: prop(%s) not found in port(%d)",
1551 			    __func__, id_propname, idx);
1552 			continue;
1553 		}
1554 
1555 		D2(vswp, "%s: removing node(%d) 0x%lx", __func__, idx, node);
1556 
1557 		if (vsw_port_detach(vswp, inst) != 0) {
1558 			cmn_err(CE_WARN, "!vsw%d: Unable to remove port %ld",
1559 			    vswp->instance, inst);
1560 		}
1561 	}
1562 
1563 	for (idx = 0; idx < resp->match_curr.nelem; idx++) {
1564 		(void) vsw_port_update(vswp, resp->match_curr.mdp,
1565 		    resp->match_curr.mdep[idx],
1566 		    resp->match_prev.mdp,
1567 		    resp->match_prev.mdep[idx]);
1568 	}
1569 
1570 	D1(vswp, "%s: exit", __func__);
1571 
1572 	return (MDEG_SUCCESS);
1573 }
1574 
1575 /*
1576  * Scan the machine description for this instance of vsw
1577  * and read its properties. Called only from vsw_attach().
1578  * Returns: 0 on success, 1 on failure.
1579  */
1580 static int
1581 vsw_read_mdprops(vsw_t *vswp)
1582 {
1583 	md_t		*mdp = NULL;
1584 	mde_cookie_t	rootnode;
1585 	mde_cookie_t	*listp = NULL;
1586 	uint64_t	inst;
1587 	uint64_t	cfgh;
1588 	char		*name;
1589 	int		rv = 1;
1590 	int		num_nodes = 0;
1591 	int		num_devs = 0;
1592 	int		listsz = 0;
1593 	int		i;
1594 
1595 	/*
1596 	 * In each 'virtual-device' node in the MD there is a
1597 	 * 'cfg-handle' property which is the MD's concept of
1598 	 * an instance number (this may be completely different from
1599 	 * the device drivers instance #). OBP reads that value and
1600 	 * stores it in the 'reg' property of the appropriate node in
1601 	 * the device tree. We first read this reg property and use this
1602 	 * to compare against the 'cfg-handle' property of vsw nodes
1603 	 * in MD to get to this specific vsw instance and then read
1604 	 * other properties that we are interested in.
1605 	 * We also cache the value of 'reg' property and use it later
1606 	 * to register callbacks with mdeg (see vsw_mdeg_register())
1607 	 */
1608 	inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip,
1609 	    DDI_PROP_DONTPASS, reg_propname, -1);
1610 	if (inst == -1) {
1611 		cmn_err(CE_NOTE, "!vsw%d: Unable to read %s property from "
1612 		    "OBP device tree", vswp->instance, reg_propname);
1613 		return (rv);
1614 	}
1615 
1616 	vswp->regprop = inst;
1617 
1618 	if ((mdp = md_get_handle()) == NULL) {
1619 		DWARN(vswp, "%s: cannot init MD\n", __func__);
1620 		return (rv);
1621 	}
1622 
1623 	num_nodes = md_node_count(mdp);
1624 	ASSERT(num_nodes > 0);
1625 
1626 	listsz = num_nodes * sizeof (mde_cookie_t);
1627 	listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP);
1628 
1629 	rootnode = md_root_node(mdp);
1630 
1631 	/* search for all "virtual_device" nodes */
1632 	num_devs = md_scan_dag(mdp, rootnode,
1633 	    md_find_name(mdp, vdev_propname),
1634 	    md_find_name(mdp, "fwd"), listp);
1635 	if (num_devs <= 0) {
1636 		DWARN(vswp, "%s: invalid num_devs:%d\n", __func__, num_devs);
1637 		goto vsw_readmd_exit;
1638 	}
1639 
1640 	/*
1641 	 * Now loop through the list of virtual-devices looking for
1642 	 * devices with name "virtual-network-switch" and for each
1643 	 * such device compare its instance with what we have from
1644 	 * the 'reg' property to find the right node in MD and then
1645 	 * read all its properties.
1646 	 */
1647 	for (i = 0; i < num_devs; i++) {
1648 
1649 		if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) {
1650 			DWARN(vswp, "%s: name property not found\n",
1651 			    __func__);
1652 			goto vsw_readmd_exit;
1653 		}
1654 
1655 		/* is this a virtual-network-switch? */
1656 		if (strcmp(name, vsw_propname) != 0)
1657 			continue;
1658 
1659 		if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) {
1660 			DWARN(vswp, "%s: cfg-handle property not found\n",
1661 			    __func__);
1662 			goto vsw_readmd_exit;
1663 		}
1664 
1665 		/* is this the required instance of vsw? */
1666 		if (inst != cfgh)
1667 			continue;
1668 
1669 		/* now read all properties of this vsw instance */
1670 		rv = vsw_get_initial_md_properties(vswp, mdp, listp[i]);
1671 		break;
1672 	}
1673 
1674 vsw_readmd_exit:
1675 
1676 	kmem_free(listp, listsz);
1677 	(void) md_fini_handle(mdp);
1678 	return (rv);
1679 }
1680 
1681 /*
1682  * Read the initial start-of-day values from the specified MD node.
1683  */
1684 static int
1685 vsw_get_initial_md_properties(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
1686 {
1687 	uint64_t	macaddr = 0;
1688 
1689 	D1(vswp, "%s: enter", __func__);
1690 
1691 	if (vsw_get_md_physname(vswp, mdp, node, vswp->physname) != 0) {
1692 		return (1);
1693 	}
1694 
1695 	/* mac address for vswitch device itself */
1696 	if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) {
1697 		cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD",
1698 		    vswp->instance);
1699 		return (1);
1700 	}
1701 
1702 	vsw_save_lmacaddr(vswp, macaddr);
1703 
1704 	if (vsw_get_md_smodes(vswp, mdp, node, &vswp->smode)) {
1705 		DWARN(vswp, "%s: Unable to read %s property from MD, "
1706 		    "defaulting to 'switched' mode",
1707 		    __func__, smode_propname);
1708 
1709 		vswp->smode = VSW_LAYER2;
1710 	}
1711 
1712 	/*
1713 	 * Read the 'linkprop' property to know if this
1714 	 * vsw device wants to get physical link updates.
1715 	 */
1716 	vsw_linkprop_read(vswp, mdp, node, &vswp->pls_update);
1717 
1718 	/* read mtu */
1719 	vsw_mtu_read(vswp, mdp, node, &vswp->mtu);
1720 	if (vswp->mtu < ETHERMTU || vswp->mtu > VNET_MAX_MTU) {
1721 		vswp->mtu = ETHERMTU;
1722 	}
1723 	vswp->max_frame_size = vswp->mtu + sizeof (struct ether_header) +
1724 	    VLAN_TAGSZ;
1725 
1726 	/* read vlan id properties of this vsw instance */
1727 	vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &vswp->pvid,
1728 	    &vswp->vids, &vswp->nvids, &vswp->default_vlan_id);
1729 
1730 	/* read priority-ether-types */
1731 	vsw_read_pri_eth_types(vswp, mdp, node);
1732 
1733 	D1(vswp, "%s: exit", __func__);
1734 	return (0);
1735 }
1736 
1737 /*
1738  * Read vlan id properties of the given MD node.
1739  * Arguments:
1740  *   arg:          device argument(vsw device or a port)
1741  *   type:         type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port)
1742  *   mdp:          machine description
1743  *   node:         md node cookie
1744  *
1745  * Returns:
1746  *   pvidp:        port-vlan-id of the node
1747  *   vidspp:       list of vlan-ids of the node
1748  *   nvidsp:       # of vlan-ids in the list
1749  *   default_idp:  default-vlan-id of the node(if node is vsw device)
1750  */
1751 static void
1752 vsw_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node,
1753 	uint16_t *pvidp, vsw_vlanid_t **vidspp, uint16_t *nvidsp,
1754 	uint16_t *default_idp)
1755 {
1756 	vsw_t		*vswp;
1757 	vsw_port_t	*portp;
1758 	char		*pvid_propname;
1759 	char		*vid_propname;
1760 	uint_t		nvids = 0;
1761 	uint32_t	vids_size;
1762 	int		rv;
1763 	int		i;
1764 	uint64_t	*data;
1765 	uint64_t	val;
1766 	int		size;
1767 	int		inst;
1768 
1769 	if (type == VSW_LOCALDEV) {
1770 
1771 		vswp = (vsw_t *)arg;
1772 		pvid_propname = vsw_pvid_propname;
1773 		vid_propname = vsw_vid_propname;
1774 		inst = vswp->instance;
1775 
1776 	} else if (type == VSW_VNETPORT) {
1777 
1778 		portp = (vsw_port_t *)arg;
1779 		vswp = portp->p_vswp;
1780 		pvid_propname = port_pvid_propname;
1781 		vid_propname = port_vid_propname;
1782 		inst = portp->p_instance;
1783 
1784 	} else {
1785 		return;
1786 	}
1787 
1788 	if (type == VSW_LOCALDEV && default_idp != NULL) {
1789 		rv = md_get_prop_val(mdp, node, vsw_dvid_propname, &val);
1790 		if (rv != 0) {
1791 			DWARN(vswp, "%s: prop(%s) not found", __func__,
1792 			    vsw_dvid_propname);
1793 
1794 			*default_idp = vsw_default_vlan_id;
1795 		} else {
1796 			*default_idp = val & 0xFFF;
1797 			D2(vswp, "%s: %s(%d): (%d)\n", __func__,
1798 			    vsw_dvid_propname, inst, *default_idp);
1799 		}
1800 	}
1801 
1802 	rv = md_get_prop_val(mdp, node, pvid_propname, &val);
1803 	if (rv != 0) {
1804 		DWARN(vswp, "%s: prop(%s) not found", __func__, pvid_propname);
1805 		*pvidp = vsw_default_vlan_id;
1806 	} else {
1807 
1808 		*pvidp = val & 0xFFF;
1809 		D2(vswp, "%s: %s(%d): (%d)\n", __func__,
1810 		    pvid_propname, inst, *pvidp);
1811 	}
1812 
1813 	rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data,
1814 	    &size);
1815 	if (rv != 0) {
1816 		D2(vswp, "%s: prop(%s) not found", __func__, vid_propname);
1817 		size = 0;
1818 	} else {
1819 		size /= sizeof (uint64_t);
1820 	}
1821 	nvids = size;
1822 
1823 	if (nvids != 0) {
1824 		D2(vswp, "%s: %s(%d): ", __func__, vid_propname, inst);
1825 		vids_size = sizeof (vsw_vlanid_t) * nvids;
1826 		*vidspp = kmem_zalloc(vids_size, KM_SLEEP);
1827 		for (i = 0; i < nvids; i++) {
1828 			(*vidspp)[i].vl_vid = data[i] & 0xFFFF;
1829 			(*vidspp)[i].vl_set = B_FALSE;
1830 			D2(vswp, " %d ", (*vidspp)[i].vl_vid);
1831 		}
1832 		D2(vswp, "\n");
1833 	}
1834 
1835 	*nvidsp = nvids;
1836 }
1837 
1838 /*
1839  * This function reads "priority-ether-types" property from md. This property
1840  * is used to enable support for priority frames. Applications which need
1841  * guaranteed and timely delivery of certain high priority frames to/from
1842  * a vnet or vsw within ldoms, should configure this property by providing
1843  * the ether type(s) for which the priority facility is needed.
1844  * Normal data frames are delivered over a ldc channel using the descriptor
1845  * ring mechanism which is constrained by factors such as descriptor ring size,
1846  * the rate at which the ring is processed at the peer ldc end point, etc.
1847  * The priority mechanism provides an Out-Of-Band path to send/receive frames
1848  * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the
1849  * descriptor ring path and enables a more reliable and timely delivery of
1850  * frames to the peer.
1851  */
1852 static void
1853 vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
1854 {
1855 	int		rv;
1856 	uint16_t	*types;
1857 	uint64_t	*data;
1858 	int		size;
1859 	int		i;
1860 	size_t		mblk_sz;
1861 
1862 	rv = md_get_prop_data(mdp, node, pri_types_propname,
1863 	    (uint8_t **)&data, &size);
1864 	if (rv != 0) {
1865 		/*
1866 		 * Property may not exist if we are running pre-ldoms1.1 f/w.
1867 		 * Check if 'vsw_pri_eth_type' has been set in that case.
1868 		 */
1869 		if (vsw_pri_eth_type != 0) {
1870 			size = sizeof (vsw_pri_eth_type);
1871 			data = &vsw_pri_eth_type;
1872 		} else {
1873 			D3(vswp, "%s: prop(%s) not found", __func__,
1874 			    pri_types_propname);
1875 			size = 0;
1876 		}
1877 	}
1878 
1879 	if (size == 0) {
1880 		vswp->pri_num_types = 0;
1881 		return;
1882 	}
1883 
1884 	/*
1885 	 * we have some priority-ether-types defined;
1886 	 * allocate a table of these types and also
1887 	 * allocate a pool of mblks to transmit these
1888 	 * priority packets.
1889 	 */
1890 	size /= sizeof (uint64_t);
1891 	vswp->pri_num_types = size;
1892 	vswp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP);
1893 	for (i = 0, types = vswp->pri_types; i < size; i++) {
1894 		types[i] = data[i] & 0xFFFF;
1895 	}
1896 	mblk_sz = (VIO_PKT_DATA_HDRSIZE + ETHERMAX + 7) & ~7;
1897 	(void) vio_create_mblks(vsw_pri_tx_nmblks, mblk_sz, &vswp->pri_tx_vmp);
1898 }
1899 
1900 static void
1901 vsw_mtu_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node, uint32_t *mtu)
1902 {
1903 	int		rv;
1904 	int		inst;
1905 	uint64_t	val;
1906 	char		*mtu_propname;
1907 
1908 	mtu_propname = vsw_mtu_propname;
1909 	inst = vswp->instance;
1910 
1911 	rv = md_get_prop_val(mdp, node, mtu_propname, &val);
1912 	if (rv != 0) {
1913 		D3(vswp, "%s: prop(%s) not found", __func__, mtu_propname);
1914 		*mtu = vsw_ethermtu;
1915 	} else {
1916 
1917 		*mtu = val & 0xFFFF;
1918 		D2(vswp, "%s: %s(%d): (%d)\n", __func__,
1919 		    mtu_propname, inst, *mtu);
1920 	}
1921 }
1922 
1923 /*
1924  * Update the mtu of the vsw device. We first check if the device has been
1925  * plumbed and if so fail the mtu update. Otherwise, we continue to update the
1926  * new mtu and reset all ports to initiate handshake re-negotiation with peers
1927  * using the new mtu.
1928  */
1929 static int
1930 vsw_mtu_update(vsw_t *vswp, uint32_t mtu)
1931 {
1932 	int	rv;
1933 
1934 	WRITE_ENTER(&vswp->if_lockrw);
1935 
1936 	if (vswp->if_state & VSW_IF_UP) {
1937 
1938 		RW_EXIT(&vswp->if_lockrw);
1939 
1940 		cmn_err(CE_NOTE, "!vsw%d: Unable to process mtu update"
1941 		    " as the device is plumbed\n", vswp->instance);
1942 		return (EBUSY);
1943 
1944 	} else {
1945 
1946 		D2(vswp, "%s: curr_mtu(%d) new_mtu(%d)\n",
1947 		    __func__, vswp->mtu, mtu);
1948 
1949 		vswp->mtu = mtu;
1950 		vswp->max_frame_size = vswp->mtu +
1951 		    sizeof (struct ether_header) + VLAN_TAGSZ;
1952 
1953 		rv = mac_maxsdu_update(vswp->if_mh, mtu);
1954 		if (rv != 0) {
1955 			cmn_err(CE_NOTE,
1956 			    "!vsw%d: Unable to update mtu with mac"
1957 			    " layer\n", vswp->instance);
1958 		}
1959 
1960 		RW_EXIT(&vswp->if_lockrw);
1961 
1962 		/* Reset ports to renegotiate with the new mtu */
1963 		vsw_reset_ports(vswp);
1964 
1965 	}
1966 
1967 	return (0);
1968 }
1969 
1970 static void
1971 vsw_linkprop_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node,
1972 	boolean_t *pls)
1973 {
1974 	int		rv;
1975 	uint64_t	val;
1976 	char		*linkpropname;
1977 
1978 	linkpropname = vsw_linkprop_propname;
1979 
1980 	rv = md_get_prop_val(mdp, node, linkpropname, &val);
1981 	if (rv != 0) {
1982 		D3(vswp, "%s: prop(%s) not found", __func__, linkpropname);
1983 		*pls = B_FALSE;
1984 	} else {
1985 
1986 		*pls = (val & 0x1) ? B_TRUE : B_FALSE;
1987 		D2(vswp, "%s: %s(%d): (%d)\n", __func__, linkpropname,
1988 		    vswp->instance, *pls);
1989 	}
1990 }
1991 
1992 static void
1993 vsw_mac_link_update(vsw_t *vswp, link_state_t link_state)
1994 {
1995 	READ_ENTER(&vswp->if_lockrw);
1996 	if ((vswp->if_state & VSW_IF_UP) == 0) {
1997 		RW_EXIT(&vswp->if_lockrw);
1998 		return;
1999 	}
2000 	RW_EXIT(&vswp->if_lockrw);
2001 
2002 	mac_link_update(vswp->if_mh, link_state);
2003 }
2004 
2005 void
2006 vsw_physlink_state_update(vsw_t *vswp)
2007 {
2008 	if (vswp->pls_update == B_TRUE) {
2009 		vsw_mac_link_update(vswp, vswp->phys_link_state);
2010 	}
2011 	vsw_physlink_update_ports(vswp);
2012 }
2013 
2014 /*
2015  * Check to see if the relevant properties in the specified node have
2016  * changed, and if so take the appropriate action.
2017  *
2018  * If any of the properties are missing or invalid we don't take
2019  * any action, as this function should only be invoked when modifications
2020  * have been made to what we assume is a working configuration, which
2021  * we leave active.
2022  *
2023  * Note it is legal for this routine to be invoked even if none of the
2024  * properties in the port node within the MD have actually changed.
2025  */
2026 static void
2027 vsw_update_md_prop(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
2028 {
2029 	char		physname[LIFNAMSIZ];
2030 	char		drv[LIFNAMSIZ];
2031 	uint_t		ddi_instance;
2032 	uint8_t		new_smode;
2033 	int		i;
2034 	uint64_t 	macaddr = 0;
2035 	enum		{MD_init = 0x1,
2036 				MD_physname = 0x2,
2037 				MD_macaddr = 0x4,
2038 				MD_smode = 0x8,
2039 				MD_vlans = 0x10,
2040 				MD_mtu = 0x20,
2041 				MD_pls = 0x40} updated;
2042 	int		rv;
2043 	uint16_t	pvid;
2044 	vsw_vlanid_t	*vids;
2045 	uint16_t	nvids;
2046 	uint32_t	mtu;
2047 	boolean_t	pls_update;
2048 
2049 	updated = MD_init;
2050 
2051 	D1(vswp, "%s: enter", __func__);
2052 
2053 	/*
2054 	 * Check if name of physical device in MD has changed.
2055 	 */
2056 	if (vsw_get_md_physname(vswp, mdp, node, (char *)&physname) == 0) {
2057 		/*
2058 		 * Do basic sanity check on new device name/instance,
2059 		 * if its non NULL. It is valid for the device name to
2060 		 * have changed from a non NULL to a NULL value, i.e.
2061 		 * the vsw is being changed to 'routed' mode.
2062 		 */
2063 		if ((strlen(physname) != 0) &&
2064 		    (ddi_parse(physname, drv,
2065 		    &ddi_instance) != DDI_SUCCESS)) {
2066 			cmn_err(CE_WARN, "!vsw%d: physical device %s is not"
2067 			    " a valid device name/instance",
2068 			    vswp->instance, physname);
2069 			goto fail_reconf;
2070 		}
2071 
2072 		if (strcmp(physname, vswp->physname)) {
2073 			D2(vswp, "%s: device name changed from %s to %s",
2074 			    __func__, vswp->physname, physname);
2075 
2076 			updated |= MD_physname;
2077 		} else {
2078 			D2(vswp, "%s: device name unchanged at %s",
2079 			    __func__, vswp->physname);
2080 		}
2081 	} else {
2082 		cmn_err(CE_WARN, "!vsw%d: Unable to read name of physical "
2083 		    "device from updated MD.", vswp->instance);
2084 		goto fail_reconf;
2085 	}
2086 
2087 	/*
2088 	 * Check if MAC address has changed.
2089 	 */
2090 	if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) {
2091 		cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD",
2092 		    vswp->instance);
2093 		goto fail_reconf;
2094 	} else {
2095 		uint64_t maddr = macaddr;
2096 		READ_ENTER(&vswp->if_lockrw);
2097 		for (i = ETHERADDRL - 1; i >= 0; i--) {
2098 			if (vswp->if_addr.ether_addr_octet[i]
2099 			    != (macaddr & 0xFF)) {
2100 				D2(vswp, "%s: octet[%d] 0x%x != 0x%x",
2101 				    __func__, i,
2102 				    vswp->if_addr.ether_addr_octet[i],
2103 				    (macaddr & 0xFF));
2104 				updated |= MD_macaddr;
2105 				macaddr = maddr;
2106 				break;
2107 			}
2108 			macaddr >>= 8;
2109 		}
2110 		RW_EXIT(&vswp->if_lockrw);
2111 		if (updated & MD_macaddr) {
2112 			vsw_save_lmacaddr(vswp, macaddr);
2113 		}
2114 	}
2115 
2116 	/*
2117 	 * Check if switching modes have changed.
2118 	 */
2119 	if (vsw_get_md_smodes(vswp, mdp, node, &new_smode)) {
2120 		cmn_err(CE_WARN, "!vsw%d: Unable to read %s property from MD",
2121 		    vswp->instance, smode_propname);
2122 		goto fail_reconf;
2123 	} else {
2124 		if (new_smode != vswp->smode) {
2125 			D2(vswp, "%s: switching mode changed from %d to %d",
2126 			    __func__, vswp->smode, new_smode);
2127 
2128 			updated |= MD_smode;
2129 		}
2130 	}
2131 
2132 	/* Read the vlan ids */
2133 	vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &pvid, &vids,
2134 	    &nvids, NULL);
2135 
2136 	/* Determine if there are any vlan id updates */
2137 	if ((pvid != vswp->pvid) ||		/* pvid changed? */
2138 	    (nvids != vswp->nvids) ||		/* # of vids changed? */
2139 	    ((nvids != 0) && (vswp->nvids != 0) &&	/* vids changed? */
2140 	    !vsw_cmp_vids(vids, vswp->vids, nvids))) {
2141 		updated |= MD_vlans;
2142 	}
2143 
2144 	/* Read mtu */
2145 	vsw_mtu_read(vswp, mdp, node, &mtu);
2146 	if (mtu != vswp->mtu) {
2147 		if (mtu >= ETHERMTU && mtu <= VNET_MAX_MTU) {
2148 			updated |= MD_mtu;
2149 		} else {
2150 			cmn_err(CE_NOTE, "!vsw%d: Unable to process mtu update"
2151 			    " as the specified value:%d is invalid\n",
2152 			    vswp->instance, mtu);
2153 		}
2154 	}
2155 
2156 	/*
2157 	 * Read the 'linkprop' property.
2158 	 */
2159 	vsw_linkprop_read(vswp, mdp, node, &pls_update);
2160 	if (pls_update != vswp->pls_update) {
2161 		updated |= MD_pls;
2162 	}
2163 
2164 	/*
2165 	 * Now make any changes which are needed...
2166 	 */
2167 	if (updated & MD_pls) {
2168 
2169 		/* save the updated property. */
2170 		vswp->pls_update = pls_update;
2171 
2172 		if (pls_update == B_FALSE) {
2173 			/*
2174 			 * Phys link state update is now disabled for this vsw
2175 			 * interface. If we had previously reported a link-down
2176 			 * to the stack, undo that by sending a link-up.
2177 			 */
2178 			if (vswp->phys_link_state == LINK_STATE_DOWN) {
2179 				vsw_mac_link_update(vswp, LINK_STATE_UP);
2180 			}
2181 		} else {
2182 			/*
2183 			 * Phys link state update is now enabled. Send up an
2184 			 * update based on the current phys link state.
2185 			 */
2186 			vsw_mac_link_update(vswp, vswp->phys_link_state);
2187 		}
2188 
2189 	}
2190 
2191 	if (updated & (MD_physname | MD_smode | MD_mtu)) {
2192 
2193 		/*
2194 		 * Stop any pending thread to setup switching mode.
2195 		 */
2196 		vsw_setup_switching_stop(vswp);
2197 
2198 		/* Cleanup HybridIO */
2199 		vsw_hio_cleanup(vswp);
2200 
2201 		/*
2202 		 * Remove unicst, mcst addrs of vsw interface
2203 		 * and ports from the physdev. This also closes
2204 		 * the corresponding mac clients.
2205 		 */
2206 		vsw_unset_addrs(vswp);
2207 
2208 		/*
2209 		 * Stop, detach and close the old device..
2210 		 */
2211 		mutex_enter(&vswp->mac_lock);
2212 		vsw_mac_close(vswp);
2213 		mutex_exit(&vswp->mac_lock);
2214 
2215 		/*
2216 		 * Update phys name.
2217 		 */
2218 		if (updated & MD_physname) {
2219 			cmn_err(CE_NOTE, "!vsw%d: changing from %s to %s",
2220 			    vswp->instance, vswp->physname, physname);
2221 			(void) strncpy(vswp->physname,
2222 			    physname, strlen(physname) + 1);
2223 		}
2224 
2225 		/*
2226 		 * Update array with the new switch mode values.
2227 		 */
2228 		if (updated & MD_smode) {
2229 			vswp->smode = new_smode;
2230 		}
2231 
2232 		/* Update mtu */
2233 		if (updated & MD_mtu) {
2234 			rv = vsw_mtu_update(vswp, mtu);
2235 			if (rv != 0) {
2236 				goto fail_update;
2237 			}
2238 		}
2239 
2240 		/*
2241 		 * ..and attach, start the new device.
2242 		 */
2243 		rv = vsw_setup_switching(vswp);
2244 		if (rv == EAGAIN) {
2245 			/*
2246 			 * Unable to setup switching mode.
2247 			 * As the error is EAGAIN, schedule a thread to retry
2248 			 * and return. Programming addresses of ports and
2249 			 * vsw interface will be done by the thread when the
2250 			 * switching setup completes successfully.
2251 			 */
2252 			if (vsw_setup_switching_start(vswp) != 0) {
2253 				goto fail_update;
2254 			}
2255 			return;
2256 
2257 		} else if (rv) {
2258 			goto fail_update;
2259 		}
2260 
2261 		vsw_setup_layer2_post_process(vswp);
2262 	} else if (updated & MD_macaddr) {
2263 		/*
2264 		 * We enter here if only MD_macaddr is exclusively updated.
2265 		 * If MD_physname and/or MD_smode are also updated, then
2266 		 * as part of that, we would have implicitly processed
2267 		 * MD_macaddr update (above).
2268 		 */
2269 		cmn_err(CE_NOTE, "!vsw%d: changing mac address to 0x%lx",
2270 		    vswp->instance, macaddr);
2271 
2272 		READ_ENTER(&vswp->if_lockrw);
2273 		if (vswp->if_state & VSW_IF_UP) {
2274 			/* reconfigure with new address */
2275 			vsw_if_mac_reconfig(vswp, B_FALSE, 0, NULL, 0);
2276 
2277 			/*
2278 			 * Notify the MAC layer of the changed address.
2279 			 */
2280 			mac_unicst_update(vswp->if_mh,
2281 			    (uint8_t *)&vswp->if_addr);
2282 
2283 		}
2284 		RW_EXIT(&vswp->if_lockrw);
2285 
2286 	}
2287 
2288 	if (updated & MD_vlans) {
2289 		/* Remove existing vlan ids from the hash table. */
2290 		vsw_vlan_remove_ids(vswp, VSW_LOCALDEV);
2291 
2292 		if (vswp->if_state & VSW_IF_UP) {
2293 			vsw_if_mac_reconfig(vswp, B_TRUE, pvid, vids, nvids);
2294 		} else {
2295 			if (vswp->nvids != 0) {
2296 				kmem_free(vswp->vids,
2297 				    sizeof (vsw_vlanid_t) * vswp->nvids);
2298 			}
2299 			vswp->vids = vids;
2300 			vswp->nvids = nvids;
2301 			vswp->pvid = pvid;
2302 		}
2303 
2304 		/* add these new vlan ids into hash table */
2305 		vsw_vlan_add_ids(vswp, VSW_LOCALDEV);
2306 	} else {
2307 		if (nvids != 0) {
2308 			kmem_free(vids, sizeof (vsw_vlanid_t) * nvids);
2309 		}
2310 	}
2311 
2312 	return;
2313 
2314 fail_reconf:
2315 	cmn_err(CE_WARN, "!vsw%d: configuration unchanged", vswp->instance);
2316 	return;
2317 
2318 fail_update:
2319 	cmn_err(CE_WARN, "!vsw%d: re-configuration failed",
2320 	    vswp->instance);
2321 }
2322 
2323 /*
2324  * Read the port's md properties.
2325  */
2326 static int
2327 vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp,
2328 	md_t *mdp, mde_cookie_t *node)
2329 {
2330 	uint64_t		ldc_id;
2331 	uint8_t			*addrp;
2332 	int			i, addrsz;
2333 	int			num_nodes = 0, nchan = 0;
2334 	int			listsz = 0;
2335 	mde_cookie_t		*listp = NULL;
2336 	struct ether_addr	ea;
2337 	uint64_t		macaddr;
2338 	uint64_t		inst = 0;
2339 	uint64_t		val;
2340 
2341 	if (md_get_prop_val(mdp, *node, id_propname, &inst)) {
2342 		DWARN(vswp, "%s: prop(%s) not found", __func__,
2343 		    id_propname);
2344 		return (1);
2345 	}
2346 
2347 	/*
2348 	 * Find the channel endpoint node(s) (which should be under this
2349 	 * port node) which contain the channel id(s).
2350 	 */
2351 	if ((num_nodes = md_node_count(mdp)) <= 0) {
2352 		DERR(vswp, "%s: invalid number of nodes found (%d)",
2353 		    __func__, num_nodes);
2354 		return (1);
2355 	}
2356 
2357 	D2(vswp, "%s: %d nodes found", __func__, num_nodes);
2358 
2359 	/* allocate enough space for node list */
2360 	listsz = num_nodes * sizeof (mde_cookie_t);
2361 	listp = kmem_zalloc(listsz, KM_SLEEP);
2362 
2363 	nchan = md_scan_dag(mdp, *node, md_find_name(mdp, chan_propname),
2364 	    md_find_name(mdp, "fwd"), listp);
2365 
2366 	if (nchan <= 0) {
2367 		DWARN(vswp, "%s: no %s nodes found", __func__, chan_propname);
2368 		kmem_free(listp, listsz);
2369 		return (1);
2370 	}
2371 
2372 	D2(vswp, "%s: %d %s nodes found", __func__, nchan, chan_propname);
2373 
2374 	/* use property from first node found */
2375 	if (md_get_prop_val(mdp, listp[0], id_propname, &ldc_id)) {
2376 		DWARN(vswp, "%s: prop(%s) not found\n", __func__,
2377 		    id_propname);
2378 		kmem_free(listp, listsz);
2379 		return (1);
2380 	}
2381 
2382 	/* don't need list any more */
2383 	kmem_free(listp, listsz);
2384 
2385 	D2(vswp, "%s: ldc_id 0x%llx", __func__, ldc_id);
2386 
2387 	/* read mac-address property */
2388 	if (md_get_prop_data(mdp, *node, remaddr_propname,
2389 	    &addrp, &addrsz)) {
2390 		DWARN(vswp, "%s: prop(%s) not found",
2391 		    __func__, remaddr_propname);
2392 		return (1);
2393 	}
2394 
2395 	if (addrsz < ETHERADDRL) {
2396 		DWARN(vswp, "%s: invalid address size", __func__);
2397 		return (1);
2398 	}
2399 
2400 	macaddr = *((uint64_t *)addrp);
2401 	D2(vswp, "%s: remote mac address 0x%llx", __func__, macaddr);
2402 
2403 	for (i = ETHERADDRL - 1; i >= 0; i--) {
2404 		ea.ether_addr_octet[i] = macaddr & 0xFF;
2405 		macaddr >>= 8;
2406 	}
2407 
2408 	/* now update all properties into the port */
2409 	portp->p_vswp = vswp;
2410 	portp->p_instance = inst;
2411 	portp->addr_set = B_FALSE;
2412 	ether_copy(&ea, &portp->p_macaddr);
2413 	if (nchan > VSW_PORT_MAX_LDCS) {
2414 		D2(vswp, "%s: using first of %d ldc ids",
2415 		    __func__, nchan);
2416 		nchan = VSW_PORT_MAX_LDCS;
2417 	}
2418 	portp->num_ldcs = nchan;
2419 	portp->ldc_ids =
2420 	    kmem_zalloc(sizeof (uint64_t) * nchan, KM_SLEEP);
2421 	bcopy(&ldc_id, (portp->ldc_ids), sizeof (uint64_t) * nchan);
2422 
2423 	/* read vlan id properties of this port node */
2424 	vsw_vlan_read_ids(portp, VSW_VNETPORT, mdp, *node, &portp->pvid,
2425 	    &portp->vids, &portp->nvids, NULL);
2426 
2427 	/* Check if hybrid property is present */
2428 	if (md_get_prop_val(mdp, *node, hybrid_propname, &val) == 0) {
2429 		D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname);
2430 		portp->p_hio_enabled = B_TRUE;
2431 	} else {
2432 		portp->p_hio_enabled = B_FALSE;
2433 	}
2434 	/*
2435 	 * Port hio capability determined after version
2436 	 * negotiation, i.e., when we know the peer is HybridIO capable.
2437 	 */
2438 	portp->p_hio_capable = B_FALSE;
2439 	return (0);
2440 }
2441 
2442 /*
2443  * Add a new port to the system.
2444  *
2445  * Returns 0 on success, 1 on failure.
2446  */
2447 int
2448 vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node)
2449 {
2450 	vsw_port_t	*portp;
2451 	int		rv;
2452 
2453 	portp = kmem_zalloc(sizeof (vsw_port_t), KM_SLEEP);
2454 
2455 	rv = vsw_port_read_props(portp, vswp, mdp, node);
2456 	if (rv != 0) {
2457 		kmem_free(portp, sizeof (*portp));
2458 		return (1);
2459 	}
2460 
2461 	rv = vsw_port_attach(portp);
2462 	if (rv != 0) {
2463 		DERR(vswp, "%s: failed to attach port", __func__);
2464 		return (1);
2465 	}
2466 
2467 	return (0);
2468 }
2469 
2470 static int
2471 vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex,
2472 	md_t *prev_mdp, mde_cookie_t prev_mdex)
2473 {
2474 	uint64_t	cport_num;
2475 	uint64_t	pport_num;
2476 	vsw_port_list_t	*plistp;
2477 	vsw_port_t	*portp;
2478 	boolean_t	updated_vlans = B_FALSE;
2479 	uint16_t	pvid;
2480 	vsw_vlanid_t	*vids;
2481 	uint16_t	nvids;
2482 	uint64_t	val;
2483 	boolean_t	hio_enabled = B_FALSE;
2484 
2485 	/*
2486 	 * For now, we get port updates only if vlan ids changed.
2487 	 * We read the port num and do some sanity check.
2488 	 */
2489 	if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) {
2490 		return (1);
2491 	}
2492 
2493 	if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) {
2494 		return (1);
2495 	}
2496 	if (cport_num != pport_num)
2497 		return (1);
2498 
2499 	plistp = &(vswp->plist);
2500 
2501 	READ_ENTER(&plistp->lockrw);
2502 
2503 	portp = vsw_lookup_port(vswp, cport_num);
2504 	if (portp == NULL) {
2505 		RW_EXIT(&plistp->lockrw);
2506 		return (1);
2507 	}
2508 
2509 	/* Read the vlan ids */
2510 	vsw_vlan_read_ids(portp, VSW_VNETPORT, curr_mdp, curr_mdex, &pvid,
2511 	    &vids, &nvids, NULL);
2512 
2513 	/* Determine if there are any vlan id updates */
2514 	if ((pvid != portp->pvid) ||		/* pvid changed? */
2515 	    (nvids != portp->nvids) ||		/* # of vids changed? */
2516 	    ((nvids != 0) && (portp->nvids != 0) &&	/* vids changed? */
2517 	    !vsw_cmp_vids(vids, portp->vids, nvids))) {
2518 		updated_vlans = B_TRUE;
2519 	}
2520 
2521 	if (updated_vlans == B_TRUE) {
2522 
2523 		/* Remove existing vlan ids from the hash table. */
2524 		vsw_vlan_remove_ids(portp, VSW_VNETPORT);
2525 
2526 		/* Reconfigure vlans with network device */
2527 		vsw_mac_port_reconfig_vlans(portp, pvid, vids, nvids);
2528 
2529 		/* add these new vlan ids into hash table */
2530 		vsw_vlan_add_ids(portp, VSW_VNETPORT);
2531 
2532 		/* reset the port if it is vlan unaware (ver < 1.3) */
2533 		vsw_vlan_unaware_port_reset(portp);
2534 	}
2535 
2536 	/* Check if hybrid property is present */
2537 	if (md_get_prop_val(curr_mdp, curr_mdex, hybrid_propname, &val) == 0) {
2538 		D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname);
2539 		hio_enabled = B_TRUE;
2540 	}
2541 
2542 	if (portp->p_hio_enabled != hio_enabled) {
2543 		vsw_hio_port_update(portp, hio_enabled);
2544 	}
2545 
2546 	RW_EXIT(&plistp->lockrw);
2547 
2548 	return (0);
2549 }
2550 
2551 /*
2552  * vsw_mac_rx -- A common function to send packets to the interface.
2553  * By default this function check if the interface is UP or not, the
2554  * rest of the behaviour depends on the flags as below:
2555  *
2556  *	VSW_MACRX_PROMISC -- Check if the promisc mode set or not.
2557  *	VSW_MACRX_COPYMSG -- Make a copy of the message(s).
2558  *	VSW_MACRX_FREEMSG -- Free if the messages cannot be sent up the stack.
2559  */
2560 void
2561 vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh,
2562     mblk_t *mp, vsw_macrx_flags_t flags)
2563 {
2564 	mblk_t		*mpt;
2565 
2566 	D1(vswp, "%s:enter\n", __func__);
2567 	READ_ENTER(&vswp->if_lockrw);
2568 	/* Check if the interface is up */
2569 	if (!(vswp->if_state & VSW_IF_UP)) {
2570 		RW_EXIT(&vswp->if_lockrw);
2571 		/* Free messages only if FREEMSG flag specified */
2572 		if (flags & VSW_MACRX_FREEMSG) {
2573 			freemsgchain(mp);
2574 		}
2575 		D1(vswp, "%s:exit\n", __func__);
2576 		return;
2577 	}
2578 	/*
2579 	 * If PROMISC flag is passed, then check if
2580 	 * the interface is in the PROMISC mode.
2581 	 * If not, drop the messages.
2582 	 */
2583 	if (flags & VSW_MACRX_PROMISC) {
2584 		if (!(vswp->if_state & VSW_IF_PROMISC)) {
2585 			RW_EXIT(&vswp->if_lockrw);
2586 			/* Free messages only if FREEMSG flag specified */
2587 			if (flags & VSW_MACRX_FREEMSG) {
2588 				freemsgchain(mp);
2589 			}
2590 			D1(vswp, "%s:exit\n", __func__);
2591 			return;
2592 		}
2593 	}
2594 	RW_EXIT(&vswp->if_lockrw);
2595 	/*
2596 	 * If COPYMSG flag is passed, then make a copy
2597 	 * of the message chain and send up the copy.
2598 	 */
2599 	if (flags & VSW_MACRX_COPYMSG) {
2600 		mp = copymsgchain(mp);
2601 		if (mp == NULL) {
2602 			D1(vswp, "%s:exit\n", __func__);
2603 			return;
2604 		}
2605 	}
2606 
2607 	D2(vswp, "%s: sending up stack", __func__);
2608 
2609 	mpt = NULL;
2610 	(void) vsw_vlan_frame_untag(vswp, VSW_LOCALDEV, &mp, &mpt);
2611 	if (mp != NULL) {
2612 		mac_rx(vswp->if_mh, mrh, mp);
2613 	}
2614 	D1(vswp, "%s:exit\n", __func__);
2615 }
2616 
2617 /* copy mac address of vsw into soft state structure */
2618 static void
2619 vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr)
2620 {
2621 	int	i;
2622 
2623 	WRITE_ENTER(&vswp->if_lockrw);
2624 	for (i = ETHERADDRL - 1; i >= 0; i--) {
2625 		vswp->if_addr.ether_addr_octet[i] = macaddr & 0xFF;
2626 		macaddr >>= 8;
2627 	}
2628 	RW_EXIT(&vswp->if_lockrw);
2629 }
2630 
2631 /* Compare VLAN ids, array size expected to be same. */
2632 static boolean_t
2633 vsw_cmp_vids(vsw_vlanid_t *vids1, vsw_vlanid_t *vids2, int nvids)
2634 {
2635 	int i, j;
2636 	uint16_t vid;
2637 
2638 	for (i = 0; i < nvids; i++) {
2639 		vid = vids1[i].vl_vid;
2640 		for (j = 0; j < nvids; j++) {
2641 			if (vid == vids2[i].vl_vid)
2642 				break;
2643 		}
2644 		if (j == nvids) {
2645 			return (B_FALSE);
2646 		}
2647 	}
2648 	return (B_TRUE);
2649 }
2650