xref: /titanic_44/usr/src/uts/sun4v/io/vsw.c (revision da14cebe459d3275048785f25bd869cb09b5307f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/errno.h>
29 #include <sys/debug.h>
30 #include <sys/time.h>
31 #include <sys/sysmacros.h>
32 #include <sys/systm.h>
33 #include <sys/user.h>
34 #include <sys/stropts.h>
35 #include <sys/stream.h>
36 #include <sys/strlog.h>
37 #include <sys/strsubr.h>
38 #include <sys/cmn_err.h>
39 #include <sys/cpu.h>
40 #include <sys/kmem.h>
41 #include <sys/conf.h>
42 #include <sys/ddi.h>
43 #include <sys/sunddi.h>
44 #include <sys/ksynch.h>
45 #include <sys/stat.h>
46 #include <sys/kstat.h>
47 #include <sys/vtrace.h>
48 #include <sys/strsun.h>
49 #include <sys/dlpi.h>
50 #include <sys/ethernet.h>
51 #include <net/if.h>
52 #include <sys/varargs.h>
53 #include <sys/machsystm.h>
54 #include <sys/modctl.h>
55 #include <sys/modhash.h>
56 #include <sys/mac_provider.h>
57 #include <sys/mac_ether.h>
58 #include <sys/taskq.h>
59 #include <sys/note.h>
60 #include <sys/mach_descrip.h>
61 #include <sys/mac_provider.h>
62 #include <sys/mdeg.h>
63 #include <sys/ldc.h>
64 #include <sys/vsw_fdb.h>
65 #include <sys/vsw.h>
66 #include <sys/vio_mailbox.h>
67 #include <sys/vnet_mailbox.h>
68 #include <sys/vnet_common.h>
69 #include <sys/vio_util.h>
70 #include <sys/sdt.h>
71 #include <sys/atomic.h>
72 #include <sys/callb.h>
73 #include <sys/vlan.h>
74 
75 /*
76  * Function prototypes.
77  */
78 static	int vsw_attach(dev_info_t *, ddi_attach_cmd_t);
79 static	int vsw_detach(dev_info_t *, ddi_detach_cmd_t);
80 static	int vsw_get_md_physname(vsw_t *, md_t *, mde_cookie_t, char *);
81 static	int vsw_get_md_smodes(vsw_t *, md_t *, mde_cookie_t, uint8_t *);
82 
83 /* MDEG routines */
84 static	int vsw_mdeg_register(vsw_t *vswp);
85 static	void vsw_mdeg_unregister(vsw_t *vswp);
86 static	int vsw_mdeg_cb(void *cb_argp, mdeg_result_t *);
87 static	int vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *);
88 static	int vsw_get_initial_md_properties(vsw_t *vswp, md_t *, mde_cookie_t);
89 static	int vsw_read_mdprops(vsw_t *vswp);
90 static	void vsw_vlan_read_ids(void *arg, int type, md_t *mdp,
91 	mde_cookie_t node, uint16_t *pvidp, vsw_vlanid_t **vidspp,
92 	uint16_t *nvidsp, uint16_t *default_idp);
93 static	int vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp,
94 	md_t *mdp, mde_cookie_t *node);
95 static	void vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp,
96 	mde_cookie_t node);
97 static	void vsw_mtu_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node,
98 	uint32_t *mtu);
99 static	int vsw_mtu_update(vsw_t *vswp, uint32_t mtu);
100 static	void vsw_update_md_prop(vsw_t *, md_t *, mde_cookie_t);
101 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr);
102 static boolean_t vsw_cmp_vids(vsw_vlanid_t *vids1,
103 	vsw_vlanid_t *vids2, int nvids);
104 
105 /* Mac driver related routines */
106 static int vsw_mac_register(vsw_t *);
107 static int vsw_mac_unregister(vsw_t *);
108 static int vsw_m_stat(void *, uint_t, uint64_t *);
109 static void vsw_m_stop(void *arg);
110 static int vsw_m_start(void *arg);
111 static int vsw_m_unicst(void *arg, const uint8_t *);
112 static int vsw_m_multicst(void *arg, boolean_t, const uint8_t *);
113 static int vsw_m_promisc(void *arg, boolean_t);
114 static mblk_t *vsw_m_tx(void *arg, mblk_t *);
115 void vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh,
116     mblk_t *mp, vsw_macrx_flags_t flags);
117 
118 /*
119  * Functions imported from other files.
120  */
121 extern void vsw_setup_switching_timeout(void *arg);
122 extern void vsw_stop_switching_timeout(vsw_t *vswp);
123 extern int vsw_setup_switching(vsw_t *);
124 extern void vsw_switch_frame_nop(vsw_t *vswp, mblk_t *mp, int caller,
125     vsw_port_t *port, mac_resource_handle_t mrh);
126 extern int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *);
127 extern int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *);
128 extern void vsw_del_mcst_vsw(vsw_t *);
129 extern mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr);
130 extern int vsw_detach_ports(vsw_t *vswp);
131 extern int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node);
132 extern int vsw_port_detach(vsw_t *vswp, int p_instance);
133 static int vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex,
134 	md_t *prev_mdp, mde_cookie_t prev_mdex);
135 extern	int vsw_port_attach(vsw_port_t *port);
136 extern vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance);
137 extern int vsw_mac_open(vsw_t *vswp);
138 extern void vsw_mac_close(vsw_t *vswp);
139 extern void vsw_mac_cleanup_ports(vsw_t *vswp);
140 extern void vsw_unset_addrs(vsw_t *vswp);
141 extern void vsw_setup_layer2_post_process(vsw_t *vswp);
142 extern void vsw_create_vlans(void *arg, int type);
143 extern void vsw_destroy_vlans(void *arg, int type);
144 extern void vsw_vlan_add_ids(void *arg, int type);
145 extern void vsw_vlan_remove_ids(void *arg, int type);
146 extern void vsw_vlan_unaware_port_reset(vsw_port_t *portp);
147 extern uint32_t vsw_vlan_frame_untag(void *arg, int type, mblk_t **np,
148 	mblk_t **npt);
149 extern mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp);
150 extern void vsw_hio_cleanup(vsw_t *vswp);
151 extern void vsw_hio_start_ports(vsw_t *vswp);
152 extern void vsw_hio_port_update(vsw_port_t *portp, boolean_t hio_enabled);
153 extern int vsw_mac_multicast_add(vsw_t *, vsw_port_t *, mcst_addr_t *, int);
154 extern void vsw_mac_multicast_remove(vsw_t *, vsw_port_t *, mcst_addr_t *, int);
155 extern void vsw_mac_port_reconfig_vlans(vsw_port_t *portp, uint16_t new_pvid,
156     vsw_vlanid_t *new_vids, int new_nvids);
157 extern int vsw_mac_client_init(vsw_t *vswp, vsw_port_t *port, int type);
158 extern void vsw_mac_client_cleanup(vsw_t *vswp, vsw_port_t *port, int type);
159 extern void vsw_if_mac_reconfig(vsw_t *vswp, boolean_t update_vlans,
160     uint16_t new_pvid, vsw_vlanid_t *new_vids, int new_nvids);
161 extern void vsw_reset_ports(vsw_t *vswp);
162 extern void vsw_port_reset(vsw_port_t *portp);
163 void vsw_hio_port_update(vsw_port_t *portp, boolean_t hio_enabled);
164 
165 /*
166  * Internal tunables.
167  */
168 int	vsw_num_handshakes = VNET_NUM_HANDSHAKES; /* # of handshake attempts */
169 int	vsw_wretries = 100;		/* # of write attempts */
170 int	vsw_desc_delay = 0;		/* delay in us */
171 int	vsw_read_attempts = 5;		/* # of reads of descriptor */
172 int	vsw_setup_switching_delay = 3;	/* setup sw timeout interval in sec */
173 int	vsw_mac_open_retries = 300;	/* max # of mac_open() retries */
174 					/* 300*3 = 900sec(15min) of max tmout */
175 int	vsw_ldc_tx_delay = 5;		/* delay(ticks) for tx retries */
176 int	vsw_ldc_tx_retries = 10;	/* # of ldc tx retries */
177 boolean_t vsw_ldc_rxthr_enabled = B_TRUE;	/* LDC Rx thread enabled */
178 boolean_t vsw_ldc_txthr_enabled = B_TRUE;	/* LDC Tx thread enabled */
179 
180 uint32_t	vsw_fdb_nchains = 8;	/* # of chains in fdb hash table */
181 uint32_t	vsw_vlan_nchains = 4;	/* # of chains in vlan id hash table */
182 uint32_t	vsw_ethermtu = 1500;	/* mtu of the device */
183 
184 /* sw timeout for boot delay only, in milliseconds */
185 int vsw_setup_switching_boot_delay = 100 * MILLISEC;
186 
187 /* delay in usec to wait for all references on a fdb entry to be dropped */
188 uint32_t vsw_fdbe_refcnt_delay = 10;
189 
190 /*
191  * Default vlan id. This is only used internally when the "default-vlan-id"
192  * property is not present in the MD device node. Therefore, this should not be
193  * used as a tunable; if this value is changed, the corresponding variable
194  * should be updated to the same value in all vnets connected to this vsw.
195  */
196 uint16_t	vsw_default_vlan_id = 1;
197 
198 /*
199  * Workaround for a version handshake bug in obp's vnet.
200  * If vsw initiates version negotiation starting from the highest version,
201  * obp sends a nack and terminates version handshake. To workaround
202  * this, we do not initiate version handshake when the channel comes up.
203  * Instead, we wait for the peer to send its version info msg and go through
204  * the version protocol exchange. If we successfully negotiate a version,
205  * before sending the ack, we send our version info msg to the peer
206  * using the <major,minor> version that we are about to ack.
207  */
208 boolean_t vsw_obp_ver_proto_workaround = B_TRUE;
209 
210 /*
211  * In the absence of "priority-ether-types" property in MD, the following
212  * internal tunable can be set to specify a single priority ethertype.
213  */
214 uint64_t vsw_pri_eth_type = 0;
215 
216 /*
217  * Number of transmit priority buffers that are preallocated per device.
218  * This number is chosen to be a small value to throttle transmission
219  * of priority packets. Note: Must be a power of 2 for vio_create_mblks().
220  */
221 uint32_t vsw_pri_tx_nmblks = 64;
222 
223 /*
224  * Number of RARP packets sent to announce macaddr to the physical switch,
225  * after vsw's physical device is changed dynamically or after a guest (client
226  * vnet) is live migrated in.
227  */
228 uint32_t vsw_publish_macaddr_count = 3;
229 
230 boolean_t vsw_hio_enabled = B_TRUE;	/* Enable/disable HybridIO */
231 int vsw_hio_max_cleanup_retries = 10;	/* Max retries for HybridIO cleanp */
232 int vsw_hio_cleanup_delay = 10000;	/* 10ms */
233 
234 /* Number of transmit descriptors -  must be power of 2 */
235 uint32_t vsw_ntxds = VSW_RING_NUM_EL;
236 
237 /*
238  * Max number of mblks received in one receive operation.
239  */
240 uint32_t vsw_chain_len = (VSW_NUM_MBLKS * 0.6);
241 
242 /*
243  * Internal tunables for receive buffer pools, that is,  the size and number of
244  * mblks for each pool. At least 3 sizes must be specified if these are used.
245  * The sizes must be specified in increasing order. Non-zero value of the first
246  * size will be used as a hint to use these values instead of the algorithm
247  * that determines the sizes based on MTU.
248  */
249 uint32_t vsw_mblk_size1 = 0;
250 uint32_t vsw_mblk_size2 = 0;
251 uint32_t vsw_mblk_size3 = 0;
252 uint32_t vsw_mblk_size4 = 0;
253 uint32_t vsw_num_mblks1 = VSW_NUM_MBLKS;	/* number of mblks for pool1 */
254 uint32_t vsw_num_mblks2 = VSW_NUM_MBLKS;	/* number of mblks for pool2 */
255 uint32_t vsw_num_mblks3 = VSW_NUM_MBLKS;	/* number of mblks for pool3 */
256 uint32_t vsw_num_mblks4 = VSW_NUM_MBLKS;	/* number of mblks for pool4 */
257 
258 /*
259  * Set this to non-zero to enable additional internal receive buffer pools
260  * based on the MTU of the device for better performance at the cost of more
261  * memory consumption. This is turned off by default, to use allocb(9F) for
262  * receive buffer allocations of sizes > 2K.
263  */
264 boolean_t vsw_jumbo_rxpools = B_FALSE;
265 
266 /*
267  * vsw_max_tx_qcount is the maximum # of packets that can be queued
268  * before the tx worker thread begins processing the queue. Its value
269  * is chosen to be 4x the default length of tx descriptor ring.
270  */
271 uint32_t vsw_max_tx_qcount = 4 * VSW_RING_NUM_EL;
272 
273 /*
274  * MAC callbacks
275  */
276 static	mac_callbacks_t	vsw_m_callbacks = {
277 	0,
278 	vsw_m_stat,
279 	vsw_m_start,
280 	vsw_m_stop,
281 	vsw_m_promisc,
282 	vsw_m_multicst,
283 	vsw_m_unicst,
284 	vsw_m_tx,
285 	NULL,
286 	NULL,
287 	NULL
288 };
289 
290 static	struct	cb_ops	vsw_cb_ops = {
291 	nulldev,			/* cb_open */
292 	nulldev,			/* cb_close */
293 	nodev,				/* cb_strategy */
294 	nodev,				/* cb_print */
295 	nodev,				/* cb_dump */
296 	nodev,				/* cb_read */
297 	nodev,				/* cb_write */
298 	nodev,				/* cb_ioctl */
299 	nodev,				/* cb_devmap */
300 	nodev,				/* cb_mmap */
301 	nodev,				/* cb_segmap */
302 	nochpoll,			/* cb_chpoll */
303 	ddi_prop_op,			/* cb_prop_op */
304 	NULL,				/* cb_stream */
305 	D_MP,				/* cb_flag */
306 	CB_REV,				/* rev */
307 	nodev,				/* int (*cb_aread)() */
308 	nodev				/* int (*cb_awrite)() */
309 };
310 
311 static	struct	dev_ops	vsw_ops = {
312 	DEVO_REV,		/* devo_rev */
313 	0,			/* devo_refcnt */
314 	NULL,			/* devo_getinfo */
315 	nulldev,		/* devo_identify */
316 	nulldev,		/* devo_probe */
317 	vsw_attach,		/* devo_attach */
318 	vsw_detach,		/* devo_detach */
319 	nodev,			/* devo_reset */
320 	&vsw_cb_ops,		/* devo_cb_ops */
321 	(struct bus_ops *)NULL,	/* devo_bus_ops */
322 	ddi_power		/* devo_power */
323 };
324 
325 extern	struct	mod_ops	mod_driverops;
326 static struct modldrv vswmodldrv = {
327 	&mod_driverops,
328 	"sun4v Virtual Switch",
329 	&vsw_ops,
330 };
331 
332 #define	LDC_ENTER_LOCK(ldcp)	\
333 				mutex_enter(&((ldcp)->ldc_cblock));\
334 				mutex_enter(&((ldcp)->ldc_rxlock));\
335 				mutex_enter(&((ldcp)->ldc_txlock));
336 #define	LDC_EXIT_LOCK(ldcp)	\
337 				mutex_exit(&((ldcp)->ldc_txlock));\
338 				mutex_exit(&((ldcp)->ldc_rxlock));\
339 				mutex_exit(&((ldcp)->ldc_cblock));
340 
341 /* Driver soft state ptr  */
342 static void	*vsw_state;
343 
344 /*
345  * Linked list of "vsw_t" structures - one per instance.
346  */
347 vsw_t		*vsw_head = NULL;
348 krwlock_t	vsw_rw;
349 
350 /*
351  * Property names
352  */
353 static char vdev_propname[] = "virtual-device";
354 static char vsw_propname[] = "virtual-network-switch";
355 static char physdev_propname[] = "vsw-phys-dev";
356 static char smode_propname[] = "vsw-switch-mode";
357 static char macaddr_propname[] = "local-mac-address";
358 static char remaddr_propname[] = "remote-mac-address";
359 static char ldcids_propname[] = "ldc-ids";
360 static char chan_propname[] = "channel-endpoint";
361 static char id_propname[] = "id";
362 static char reg_propname[] = "reg";
363 static char pri_types_propname[] = "priority-ether-types";
364 static char vsw_pvid_propname[] = "port-vlan-id";
365 static char vsw_vid_propname[] = "vlan-id";
366 static char vsw_dvid_propname[] = "default-vlan-id";
367 static char port_pvid_propname[] = "remote-port-vlan-id";
368 static char port_vid_propname[] = "remote-vlan-id";
369 static char hybrid_propname[] = "hybrid";
370 static char vsw_mtu_propname[] = "mtu";
371 
372 /*
373  * Matching criteria passed to the MDEG to register interest
374  * in changes to 'virtual-device-port' nodes identified by their
375  * 'id' property.
376  */
377 static md_prop_match_t vport_prop_match[] = {
378 	{ MDET_PROP_VAL,    "id"   },
379 	{ MDET_LIST_END,    NULL    }
380 };
381 
382 static mdeg_node_match_t vport_match = { "virtual-device-port",
383 						vport_prop_match };
384 
385 /*
386  * Matching criteria passed to the MDEG to register interest
387  * in changes to 'virtual-device' nodes (i.e. vsw nodes) identified
388  * by their 'name' and 'cfg-handle' properties.
389  */
390 static md_prop_match_t vdev_prop_match[] = {
391 	{ MDET_PROP_STR,    "name"   },
392 	{ MDET_PROP_VAL,    "cfg-handle" },
393 	{ MDET_LIST_END,    NULL    }
394 };
395 
396 static mdeg_node_match_t vdev_match = { "virtual-device",
397 						vdev_prop_match };
398 
399 
400 /*
401  * Specification of an MD node passed to the MDEG to filter any
402  * 'vport' nodes that do not belong to the specified node. This
403  * template is copied for each vsw instance and filled in with
404  * the appropriate 'cfg-handle' value before being passed to the MDEG.
405  */
406 static mdeg_prop_spec_t vsw_prop_template[] = {
407 	{ MDET_PROP_STR,    "name",		vsw_propname },
408 	{ MDET_PROP_VAL,    "cfg-handle",	NULL	},
409 	{ MDET_LIST_END,    NULL,		NULL	}
410 };
411 
412 #define	VSW_SET_MDEG_PROP_INST(specp, val)	(specp)[1].ps_val = (val);
413 
414 #ifdef	DEBUG
415 /*
416  * Print debug messages - set to 0x1f to enable all msgs
417  * or 0x0 to turn all off.
418  */
419 int vswdbg = 0x0;
420 
421 /*
422  * debug levels:
423  * 0x01:	Function entry/exit tracing
424  * 0x02:	Internal function messages
425  * 0x04:	Verbose internal messages
426  * 0x08:	Warning messages
427  * 0x10:	Error messages
428  */
429 
430 void
431 vswdebug(vsw_t *vswp, const char *fmt, ...)
432 {
433 	char buf[512];
434 	va_list ap;
435 
436 	va_start(ap, fmt);
437 	(void) vsprintf(buf, fmt, ap);
438 	va_end(ap);
439 
440 	if (vswp == NULL)
441 		cmn_err(CE_CONT, "%s\n", buf);
442 	else
443 		cmn_err(CE_CONT, "vsw%d: %s\n", vswp->instance, buf);
444 }
445 
446 #endif	/* DEBUG */
447 
448 static struct modlinkage modlinkage = {
449 	MODREV_1,
450 	&vswmodldrv,
451 	NULL
452 };
453 
454 int
455 _init(void)
456 {
457 	int status;
458 
459 	rw_init(&vsw_rw, NULL, RW_DRIVER, NULL);
460 
461 	status = ddi_soft_state_init(&vsw_state, sizeof (vsw_t), 1);
462 	if (status != 0) {
463 		return (status);
464 	}
465 
466 	mac_init_ops(&vsw_ops, DRV_NAME);
467 	status = mod_install(&modlinkage);
468 	if (status != 0) {
469 		ddi_soft_state_fini(&vsw_state);
470 	}
471 	return (status);
472 }
473 
474 int
475 _fini(void)
476 {
477 	int status;
478 
479 	status = mod_remove(&modlinkage);
480 	if (status != 0)
481 		return (status);
482 	mac_fini_ops(&vsw_ops);
483 	ddi_soft_state_fini(&vsw_state);
484 
485 	rw_destroy(&vsw_rw);
486 
487 	return (status);
488 }
489 
490 int
491 _info(struct modinfo *modinfop)
492 {
493 	return (mod_info(&modlinkage, modinfop));
494 }
495 
496 static int
497 vsw_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
498 {
499 	vsw_t		*vswp;
500 	int		instance;
501 	char		hashname[MAXNAMELEN];
502 	char		qname[TASKQ_NAMELEN];
503 	enum		{ PROG_init = 0x00,
504 				PROG_locks = 0x01,
505 				PROG_readmd = 0x02,
506 				PROG_fdb = 0x04,
507 				PROG_mfdb = 0x08,
508 				PROG_taskq = 0x10,
509 				PROG_swmode = 0x20,
510 				PROG_macreg = 0x40,
511 				PROG_mdreg = 0x80}
512 			progress;
513 
514 	progress = PROG_init;
515 	int		rv;
516 
517 	switch (cmd) {
518 	case DDI_ATTACH:
519 		break;
520 	case DDI_RESUME:
521 		/* nothing to do for this non-device */
522 		return (DDI_SUCCESS);
523 	case DDI_PM_RESUME:
524 	default:
525 		return (DDI_FAILURE);
526 	}
527 
528 	instance = ddi_get_instance(dip);
529 	if (ddi_soft_state_zalloc(vsw_state, instance) != DDI_SUCCESS) {
530 		DERR(NULL, "vsw%d: ddi_soft_state_zalloc failed", instance);
531 		return (DDI_FAILURE);
532 	}
533 	vswp = ddi_get_soft_state(vsw_state, instance);
534 
535 	if (vswp == NULL) {
536 		DERR(NULL, "vsw%d: ddi_get_soft_state failed", instance);
537 		goto vsw_attach_fail;
538 	}
539 
540 	vswp->dip = dip;
541 	vswp->instance = instance;
542 	ddi_set_driver_private(dip, (caddr_t)vswp);
543 
544 	mutex_init(&vswp->mac_lock, NULL, MUTEX_DRIVER, NULL);
545 	mutex_init(&vswp->mca_lock, NULL, MUTEX_DRIVER, NULL);
546 	mutex_init(&vswp->swtmout_lock, NULL, MUTEX_DRIVER, NULL);
547 	rw_init(&vswp->maccl_rwlock, NULL, RW_DRIVER, NULL);
548 	rw_init(&vswp->if_lockrw, NULL, RW_DRIVER, NULL);
549 	rw_init(&vswp->mfdbrw, NULL, RW_DRIVER, NULL);
550 	rw_init(&vswp->plist.lockrw, NULL, RW_DRIVER, NULL);
551 
552 	progress |= PROG_locks;
553 
554 	rv = vsw_read_mdprops(vswp);
555 	if (rv != 0)
556 		goto vsw_attach_fail;
557 
558 	progress |= PROG_readmd;
559 
560 	/* setup the unicast forwarding database  */
561 	(void) snprintf(hashname, MAXNAMELEN, "vsw_unicst_table-%d",
562 	    vswp->instance);
563 	D2(vswp, "creating unicast hash table (%s)...", hashname);
564 	vswp->fdb_nchains = vsw_fdb_nchains;
565 	vswp->fdb_hashp = mod_hash_create_ptrhash(hashname, vswp->fdb_nchains,
566 	    mod_hash_null_valdtor, sizeof (void *));
567 	vsw_create_vlans((void *)vswp, VSW_LOCALDEV);
568 	progress |= PROG_fdb;
569 
570 	/* setup the multicast fowarding database */
571 	(void) snprintf(hashname, MAXNAMELEN, "vsw_mcst_table-%d",
572 	    vswp->instance);
573 	D2(vswp, "creating multicast hash table %s)...", hashname);
574 	vswp->mfdb = mod_hash_create_ptrhash(hashname, vsw_fdb_nchains,
575 	    mod_hash_null_valdtor, sizeof (void *));
576 
577 	progress |= PROG_mfdb;
578 
579 	/*
580 	 * Create the taskq which will process all the VIO
581 	 * control messages.
582 	 */
583 	(void) snprintf(qname, TASKQ_NAMELEN, "vsw_taskq%d", vswp->instance);
584 	if ((vswp->taskq_p = ddi_taskq_create(vswp->dip, qname, 1,
585 	    TASKQ_DEFAULTPRI, 0)) == NULL) {
586 		cmn_err(CE_WARN, "!vsw%d: Unable to create task queue",
587 		    vswp->instance);
588 		goto vsw_attach_fail;
589 	}
590 
591 	progress |= PROG_taskq;
592 
593 	/* prevent auto-detaching */
594 	if (ddi_prop_update_int(DDI_DEV_T_NONE, vswp->dip,
595 	    DDI_NO_AUTODETACH, 1) != DDI_SUCCESS) {
596 		cmn_err(CE_NOTE, "!Unable to set \"%s\" property for "
597 		    "instance %u", DDI_NO_AUTODETACH, instance);
598 	}
599 
600 	/*
601 	 * The null switching function is set to avoid panic until
602 	 * switch mode is setup.
603 	 */
604 	vswp->vsw_switch_frame = vsw_switch_frame_nop;
605 
606 	/*
607 	 * Setup the required switching mode,
608 	 * based on the mdprops that we read earlier.
609 	 * schedule a short timeout (0.1 sec) for the first time
610 	 * setup and avoid calling mac_open() directly here,
611 	 * others are regular timeout 3 secs.
612 	 */
613 	mutex_enter(&vswp->swtmout_lock);
614 
615 	vswp->swtmout_enabled = B_TRUE;
616 	vswp->swtmout_id = timeout(vsw_setup_switching_timeout, vswp,
617 	    drv_usectohz(vsw_setup_switching_boot_delay));
618 
619 	mutex_exit(&vswp->swtmout_lock);
620 
621 	progress |= PROG_swmode;
622 
623 	/* Register with mac layer as a provider */
624 	rv = vsw_mac_register(vswp);
625 	if (rv != 0)
626 		goto vsw_attach_fail;
627 
628 	progress |= PROG_macreg;
629 
630 	/*
631 	 * Now we have everything setup, register an interest in
632 	 * specific MD nodes.
633 	 *
634 	 * The callback is invoked in 2 cases, firstly if upon mdeg
635 	 * registration there are existing nodes which match our specified
636 	 * criteria, and secondly if the MD is changed (and again, there
637 	 * are nodes which we are interested in present within it. Note
638 	 * that our callback will be invoked even if our specified nodes
639 	 * have not actually changed).
640 	 *
641 	 */
642 	rv = vsw_mdeg_register(vswp);
643 	if (rv != 0)
644 		goto vsw_attach_fail;
645 
646 	progress |= PROG_mdreg;
647 
648 	WRITE_ENTER(&vsw_rw);
649 	vswp->next = vsw_head;
650 	vsw_head = vswp;
651 	RW_EXIT(&vsw_rw);
652 
653 	ddi_report_dev(vswp->dip);
654 	return (DDI_SUCCESS);
655 
656 vsw_attach_fail:
657 	DERR(NULL, "vsw_attach: failed");
658 
659 	if (progress & PROG_mdreg) {
660 		vsw_mdeg_unregister(vswp);
661 		(void) vsw_detach_ports(vswp);
662 	}
663 
664 	if (progress & PROG_macreg)
665 		(void) vsw_mac_unregister(vswp);
666 
667 	if (progress & PROG_swmode) {
668 		vsw_stop_switching_timeout(vswp);
669 		vsw_hio_cleanup(vswp);
670 		mutex_enter(&vswp->mac_lock);
671 		vsw_mac_close(vswp);
672 		mutex_exit(&vswp->mac_lock);
673 	}
674 
675 	if (progress & PROG_taskq)
676 		ddi_taskq_destroy(vswp->taskq_p);
677 
678 	if (progress & PROG_mfdb)
679 		mod_hash_destroy_hash(vswp->mfdb);
680 
681 	if (progress & PROG_fdb) {
682 		vsw_destroy_vlans(vswp, VSW_LOCALDEV);
683 		mod_hash_destroy_hash(vswp->fdb_hashp);
684 	}
685 
686 	if (progress & PROG_readmd) {
687 		if (VSW_PRI_ETH_DEFINED(vswp)) {
688 			kmem_free(vswp->pri_types,
689 			    sizeof (uint16_t) * vswp->pri_num_types);
690 		}
691 		(void) vio_destroy_mblks(vswp->pri_tx_vmp);
692 	}
693 
694 	if (progress & PROG_locks) {
695 		rw_destroy(&vswp->plist.lockrw);
696 		rw_destroy(&vswp->mfdbrw);
697 		rw_destroy(&vswp->if_lockrw);
698 		rw_destroy(&vswp->maccl_rwlock);
699 		mutex_destroy(&vswp->swtmout_lock);
700 		mutex_destroy(&vswp->mca_lock);
701 		mutex_destroy(&vswp->mac_lock);
702 	}
703 
704 	ddi_soft_state_free(vsw_state, instance);
705 	return (DDI_FAILURE);
706 }
707 
708 static int
709 vsw_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
710 {
711 	vio_mblk_pool_t		*poolp, *npoolp;
712 	vsw_t			**vswpp, *vswp;
713 	int 			instance;
714 
715 	instance = ddi_get_instance(dip);
716 	vswp = ddi_get_soft_state(vsw_state, instance);
717 
718 	if (vswp == NULL) {
719 		return (DDI_FAILURE);
720 	}
721 
722 	switch (cmd) {
723 	case DDI_DETACH:
724 		break;
725 	case DDI_SUSPEND:
726 	case DDI_PM_SUSPEND:
727 	default:
728 		return (DDI_FAILURE);
729 	}
730 
731 	D2(vswp, "detaching instance %d", instance);
732 
733 	/* Stop any pending timeout to setup switching mode. */
734 	vsw_stop_switching_timeout(vswp);
735 
736 	/* Cleanup the interface's mac client */
737 	vsw_mac_client_cleanup(vswp, NULL, VSW_LOCALDEV);
738 
739 	if (vswp->if_state & VSW_IF_REG) {
740 		if (vsw_mac_unregister(vswp) != 0) {
741 			cmn_err(CE_WARN, "!vsw%d: Unable to detach from "
742 			    "MAC layer", vswp->instance);
743 			return (DDI_FAILURE);
744 		}
745 	}
746 
747 	vsw_mdeg_unregister(vswp);
748 
749 	/* cleanup HybridIO */
750 	vsw_hio_cleanup(vswp);
751 
752 	if (vsw_detach_ports(vswp) != 0) {
753 		cmn_err(CE_WARN, "!vsw%d: Unable to unconfigure ports",
754 		    vswp->instance);
755 		return (DDI_FAILURE);
756 	}
757 
758 	rw_destroy(&vswp->if_lockrw);
759 
760 	vsw_mac_cleanup_ports(vswp);
761 
762 	/*
763 	 * Now that the ports have been deleted, stop and close
764 	 * the physical device.
765 	 */
766 	mutex_enter(&vswp->mac_lock);
767 	vsw_mac_close(vswp);
768 	mutex_exit(&vswp->mac_lock);
769 
770 	mutex_destroy(&vswp->mac_lock);
771 	mutex_destroy(&vswp->swtmout_lock);
772 	rw_destroy(&vswp->maccl_rwlock);
773 
774 	/*
775 	 * Destroy any free pools that may still exist.
776 	 */
777 	poolp = vswp->rxh;
778 	while (poolp != NULL) {
779 		npoolp = vswp->rxh = poolp->nextp;
780 		if (vio_destroy_mblks(poolp) != 0) {
781 			vswp->rxh = poolp;
782 			return (DDI_FAILURE);
783 		}
784 		poolp = npoolp;
785 	}
786 
787 	/*
788 	 * Remove this instance from any entries it may be on in
789 	 * the hash table by using the list of addresses maintained
790 	 * in the vsw_t structure.
791 	 */
792 	vsw_del_mcst_vsw(vswp);
793 
794 	vswp->mcap = NULL;
795 	mutex_destroy(&vswp->mca_lock);
796 
797 	/*
798 	 * By now any pending tasks have finished and the underlying
799 	 * ldc's have been destroyed, so its safe to delete the control
800 	 * message taskq.
801 	 */
802 	if (vswp->taskq_p != NULL)
803 		ddi_taskq_destroy(vswp->taskq_p);
804 
805 	/*
806 	 * At this stage all the data pointers in the hash table
807 	 * should be NULL, as all the ports have been removed and will
808 	 * have deleted themselves from the port lists which the data
809 	 * pointers point to. Hence we can destroy the table using the
810 	 * default destructors.
811 	 */
812 	D2(vswp, "vsw_detach: destroying hash tables..");
813 	vsw_destroy_vlans(vswp, VSW_LOCALDEV);
814 	mod_hash_destroy_hash(vswp->fdb_hashp);
815 	vswp->fdb_hashp = NULL;
816 
817 	WRITE_ENTER(&vswp->mfdbrw);
818 	mod_hash_destroy_hash(vswp->mfdb);
819 	vswp->mfdb = NULL;
820 	RW_EXIT(&vswp->mfdbrw);
821 	rw_destroy(&vswp->mfdbrw);
822 
823 	/* free pri_types table */
824 	if (VSW_PRI_ETH_DEFINED(vswp)) {
825 		kmem_free(vswp->pri_types,
826 		    sizeof (uint16_t) * vswp->pri_num_types);
827 		(void) vio_destroy_mblks(vswp->pri_tx_vmp);
828 	}
829 
830 	ddi_remove_minor_node(dip, NULL);
831 
832 	rw_destroy(&vswp->plist.lockrw);
833 	WRITE_ENTER(&vsw_rw);
834 	for (vswpp = &vsw_head; *vswpp; vswpp = &(*vswpp)->next) {
835 		if (*vswpp == vswp) {
836 			*vswpp = vswp->next;
837 			break;
838 		}
839 	}
840 	RW_EXIT(&vsw_rw);
841 	ddi_soft_state_free(vsw_state, instance);
842 
843 	return (DDI_SUCCESS);
844 }
845 
846 /*
847  * Get the value of the "vsw-phys-dev" property in the specified
848  * node. This property is the name of the physical device that
849  * the virtual switch will use to talk to the outside world.
850  *
851  * Note it is valid for this property to be NULL (but the property
852  * itself must exist). Callers of this routine should verify that
853  * the value returned is what they expected (i.e. either NULL or non NULL).
854  *
855  * On success returns value of the property in region pointed to by
856  * the 'name' argument, and with return value of 0. Otherwise returns 1.
857  */
858 static int
859 vsw_get_md_physname(vsw_t *vswp, md_t *mdp, mde_cookie_t node, char *name)
860 {
861 	int		len = 0;
862 	int		instance;
863 	char		*physname = NULL;
864 	char		*dev;
865 	const char	*dev_name;
866 	char		myname[MAXNAMELEN];
867 
868 	dev_name = ddi_driver_name(vswp->dip);
869 	instance = ddi_get_instance(vswp->dip);
870 	(void) snprintf(myname, MAXNAMELEN, "%s%d", dev_name, instance);
871 
872 	if (md_get_prop_data(mdp, node, physdev_propname,
873 	    (uint8_t **)(&physname), &len) != 0) {
874 		cmn_err(CE_WARN, "!vsw%d: Unable to get name(s) of physical "
875 		    "device(s) from MD", vswp->instance);
876 		return (1);
877 	} else if ((strlen(physname) + 1) > LIFNAMSIZ) {
878 		cmn_err(CE_WARN, "!vsw%d: %s is too long a device name",
879 		    vswp->instance, physname);
880 		return (1);
881 	} else if (strcmp(myname, physname) == 0) {
882 		/*
883 		 * Prevent the vswitch from opening itself as the
884 		 * network device.
885 		 */
886 		cmn_err(CE_WARN, "!vsw%d: %s is an invalid device name",
887 		    vswp->instance, physname);
888 		return (1);
889 	} else {
890 		(void) strncpy(name, physname, strlen(physname) + 1);
891 		D2(vswp, "%s: using first device specified (%s)",
892 		    __func__, physname);
893 	}
894 
895 #ifdef DEBUG
896 	/*
897 	 * As a temporary measure to aid testing we check to see if there
898 	 * is a vsw.conf file present. If there is we use the value of the
899 	 * vsw_physname property in the file as the name of the physical
900 	 * device, overriding the value from the MD.
901 	 *
902 	 * There may be multiple devices listed, but for the moment
903 	 * we just use the first one.
904 	 */
905 	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vswp->dip, 0,
906 	    "vsw_physname", &dev) == DDI_PROP_SUCCESS) {
907 		if ((strlen(dev) + 1) > LIFNAMSIZ) {
908 			cmn_err(CE_WARN, "vsw%d: %s is too long a device name",
909 			    vswp->instance, dev);
910 			ddi_prop_free(dev);
911 			return (1);
912 		} else {
913 			cmn_err(CE_NOTE, "vsw%d: Using device name (%s) from "
914 			    "config file", vswp->instance, dev);
915 
916 			(void) strncpy(name, dev, strlen(dev) + 1);
917 		}
918 
919 		ddi_prop_free(dev);
920 	}
921 #endif
922 
923 	return (0);
924 }
925 
926 /*
927  * Read the 'vsw-switch-mode' property from the specified MD node.
928  *
929  * Returns 0 on success, otherwise returns 1.
930  */
931 static int
932 vsw_get_md_smodes(vsw_t *vswp, md_t *mdp, mde_cookie_t node, uint8_t *mode)
933 {
934 	int		len = 0;
935 	char		*smode = NULL;
936 	char		*curr_mode = NULL;
937 
938 	D1(vswp, "%s: enter", __func__);
939 
940 	/*
941 	 * Get the switch-mode property. The modes are listed in
942 	 * decreasing order of preference, i.e. prefered mode is
943 	 * first item in list.
944 	 */
945 	len = 0;
946 	if (md_get_prop_data(mdp, node, smode_propname,
947 	    (uint8_t **)(&smode), &len) != 0) {
948 		/*
949 		 * Unable to get switch-mode property from MD, nothing
950 		 * more we can do.
951 		 */
952 		cmn_err(CE_WARN, "!vsw%d: Unable to get switch mode property"
953 		    " from the MD", vswp->instance);
954 		return (1);
955 	}
956 
957 	curr_mode = smode;
958 	/*
959 	 * Modes of operation:
960 	 * 'switched'	 - layer 2 switching, underlying HW in
961 	 *			programmed mode.
962 	 * 'promiscuous' - layer 2 switching, underlying HW in
963 	 *			promiscuous mode.
964 	 * 'routed'	 - layer 3 (i.e. IP) routing, underlying HW
965 	 *			in non-promiscuous mode.
966 	 */
967 	while (curr_mode < (smode + len)) {
968 		D2(vswp, "%s: curr_mode = [%s]", __func__, curr_mode);
969 		if (strcmp(curr_mode, "switched") == 0) {
970 			*mode = VSW_LAYER2;
971 		} else if (strcmp(curr_mode, "promiscuous") == 0) {
972 			*mode = VSW_LAYER2 | VSW_LAYER2_PROMISC;
973 		} else if (strcmp(curr_mode, "routed") == 0) {
974 			*mode = VSW_LAYER3;
975 		} else {
976 			cmn_err(CE_WARN, "!vsw%d: Unknown switch mode %s, "
977 			    "setting to default switched mode",
978 			    vswp->instance, curr_mode);
979 			*mode = VSW_LAYER2;
980 		}
981 		curr_mode += strlen(curr_mode) + 1;
982 	}
983 
984 	D2(vswp, "%s: %d mode", __func__, *mode);
985 
986 	D1(vswp, "%s: exit", __func__);
987 
988 	return (0);
989 }
990 
991 /*
992  * Register with the MAC layer as a network device, so we
993  * can be plumbed if necessary.
994  */
995 static int
996 vsw_mac_register(vsw_t *vswp)
997 {
998 	mac_register_t	*macp;
999 	int		rv;
1000 
1001 	D1(vswp, "%s: enter", __func__);
1002 
1003 	if ((macp = mac_alloc(MAC_VERSION)) == NULL)
1004 		return (EINVAL);
1005 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1006 	macp->m_driver = vswp;
1007 	macp->m_dip = vswp->dip;
1008 	macp->m_src_addr = (uint8_t *)&vswp->if_addr;
1009 	macp->m_callbacks = &vsw_m_callbacks;
1010 	macp->m_min_sdu = 0;
1011 	macp->m_max_sdu = vswp->mtu;
1012 	macp->m_margin = VLAN_TAGSZ;
1013 	rv = mac_register(macp, &vswp->if_mh);
1014 	mac_free(macp);
1015 	if (rv != 0) {
1016 		/*
1017 		 * Treat this as a non-fatal error as we may be
1018 		 * able to operate in some other mode.
1019 		 */
1020 		cmn_err(CE_NOTE, "!vsw%d: Unable to register as "
1021 		    "a provider with MAC layer", vswp->instance);
1022 		return (rv);
1023 	}
1024 
1025 	vswp->if_state |= VSW_IF_REG;
1026 
1027 	D1(vswp, "%s: exit", __func__);
1028 
1029 	return (rv);
1030 }
1031 
1032 static int
1033 vsw_mac_unregister(vsw_t *vswp)
1034 {
1035 	int		rv = 0;
1036 
1037 	D1(vswp, "%s: enter", __func__);
1038 
1039 	WRITE_ENTER(&vswp->if_lockrw);
1040 
1041 	if (vswp->if_state & VSW_IF_REG) {
1042 		rv = mac_unregister(vswp->if_mh);
1043 		if (rv != 0) {
1044 			DWARN(vswp, "%s: unable to unregister from MAC "
1045 			    "framework", __func__);
1046 
1047 			RW_EXIT(&vswp->if_lockrw);
1048 			D1(vswp, "%s: fail exit", __func__);
1049 			return (rv);
1050 		}
1051 
1052 		/* mark i/f as down and unregistered */
1053 		vswp->if_state &= ~(VSW_IF_UP | VSW_IF_REG);
1054 	}
1055 	RW_EXIT(&vswp->if_lockrw);
1056 
1057 	D1(vswp, "%s: exit", __func__);
1058 
1059 	return (rv);
1060 }
1061 
1062 static int
1063 vsw_m_stat(void *arg, uint_t stat, uint64_t *val)
1064 {
1065 	vsw_t			*vswp = (vsw_t *)arg;
1066 
1067 	D1(vswp, "%s: enter", __func__);
1068 
1069 	mutex_enter(&vswp->mac_lock);
1070 	if (vswp->mh == NULL) {
1071 		mutex_exit(&vswp->mac_lock);
1072 		return (EINVAL);
1073 	}
1074 
1075 	/* return stats from underlying device */
1076 	*val = mac_stat_get(vswp->mh, stat);
1077 
1078 	mutex_exit(&vswp->mac_lock);
1079 
1080 	return (0);
1081 }
1082 
1083 static void
1084 vsw_m_stop(void *arg)
1085 {
1086 	vsw_t	*vswp = (vsw_t *)arg;
1087 
1088 	D1(vswp, "%s: enter", __func__);
1089 
1090 	WRITE_ENTER(&vswp->if_lockrw);
1091 	vswp->if_state &= ~VSW_IF_UP;
1092 	RW_EXIT(&vswp->if_lockrw);
1093 
1094 	/* Cleanup and close the mac client */
1095 	vsw_mac_client_cleanup(vswp, NULL, VSW_LOCALDEV);
1096 
1097 	D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state);
1098 }
1099 
1100 static int
1101 vsw_m_start(void *arg)
1102 {
1103 	int		rv;
1104 	vsw_t		*vswp = (vsw_t *)arg;
1105 
1106 	D1(vswp, "%s: enter", __func__);
1107 
1108 	WRITE_ENTER(&vswp->if_lockrw);
1109 
1110 	vswp->if_state |= VSW_IF_UP;
1111 
1112 	if (vswp->switching_setup_done == B_FALSE) {
1113 		/*
1114 		 * If the switching mode has not been setup yet, just
1115 		 * return. The unicast address will be programmed
1116 		 * after the physical device is successfully setup by the
1117 		 * timeout handler.
1118 		 */
1119 		RW_EXIT(&vswp->if_lockrw);
1120 		return (0);
1121 	}
1122 
1123 	/* if in layer2 mode, program unicast address. */
1124 	if (vswp->mh != NULL) {
1125 		/* Init a mac client and program addresses */
1126 		rv = vsw_mac_client_init(vswp, NULL, VSW_LOCALDEV);
1127 		if (rv != 0) {
1128 			cmn_err(CE_NOTE,
1129 			    "!vsw%d: failed to program interface "
1130 			    "unicast address\n", vswp->instance);
1131 		}
1132 	}
1133 
1134 	RW_EXIT(&vswp->if_lockrw);
1135 
1136 	D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state);
1137 	return (0);
1138 }
1139 
1140 /*
1141  * Change the local interface address.
1142  *
1143  * Note: we don't support this entry point. The local
1144  * mac address of the switch can only be changed via its
1145  * MD node properties.
1146  */
1147 static int
1148 vsw_m_unicst(void *arg, const uint8_t *macaddr)
1149 {
1150 	_NOTE(ARGUNUSED(arg, macaddr))
1151 
1152 	return (DDI_FAILURE);
1153 }
1154 
1155 static int
1156 vsw_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
1157 {
1158 	vsw_t		*vswp = (vsw_t *)arg;
1159 	mcst_addr_t	*mcst_p = NULL;
1160 	uint64_t	addr = 0x0;
1161 	int		i, ret = 0;
1162 
1163 	D1(vswp, "%s: enter", __func__);
1164 
1165 	/*
1166 	 * Convert address into form that can be used
1167 	 * as hash table key.
1168 	 */
1169 	for (i = 0; i < ETHERADDRL; i++) {
1170 		addr = (addr << 8) | mca[i];
1171 	}
1172 
1173 	D2(vswp, "%s: addr = 0x%llx", __func__, addr);
1174 
1175 	if (add) {
1176 		D2(vswp, "%s: adding multicast", __func__);
1177 		if (vsw_add_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) {
1178 			/*
1179 			 * Update the list of multicast addresses
1180 			 * contained within the vsw_t structure to
1181 			 * include this new one.
1182 			 */
1183 			mcst_p = kmem_zalloc(sizeof (mcst_addr_t), KM_NOSLEEP);
1184 			if (mcst_p == NULL) {
1185 				DERR(vswp, "%s unable to alloc mem", __func__);
1186 				(void) vsw_del_mcst(vswp,
1187 				    VSW_LOCALDEV, addr, NULL);
1188 				return (1);
1189 			}
1190 			mcst_p->addr = addr;
1191 			ether_copy(mca, &mcst_p->mca);
1192 
1193 			/*
1194 			 * Call into the underlying driver to program the
1195 			 * address into HW.
1196 			 */
1197 			ret = vsw_mac_multicast_add(vswp, NULL, mcst_p,
1198 			    VSW_LOCALDEV);
1199 			if (ret != 0) {
1200 				(void) vsw_del_mcst(vswp,
1201 				    VSW_LOCALDEV, addr, NULL);
1202 				kmem_free(mcst_p, sizeof (*mcst_p));
1203 				return (ret);
1204 			}
1205 
1206 			mutex_enter(&vswp->mca_lock);
1207 			mcst_p->nextp = vswp->mcap;
1208 			vswp->mcap = mcst_p;
1209 			mutex_exit(&vswp->mca_lock);
1210 		} else {
1211 			cmn_err(CE_WARN, "!vsw%d: unable to add multicast "
1212 			    "address", vswp->instance);
1213 		}
1214 		return (ret);
1215 	}
1216 
1217 	D2(vswp, "%s: removing multicast", __func__);
1218 	/*
1219 	 * Remove the address from the hash table..
1220 	 */
1221 	if (vsw_del_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) {
1222 
1223 		/*
1224 		 * ..and then from the list maintained in the
1225 		 * vsw_t structure.
1226 		 */
1227 		mcst_p = vsw_del_addr(VSW_LOCALDEV, vswp, addr);
1228 		ASSERT(mcst_p != NULL);
1229 
1230 		vsw_mac_multicast_remove(vswp, NULL, mcst_p, VSW_LOCALDEV);
1231 		kmem_free(mcst_p, sizeof (*mcst_p));
1232 	}
1233 
1234 	D1(vswp, "%s: exit", __func__);
1235 
1236 	return (0);
1237 }
1238 
1239 static int
1240 vsw_m_promisc(void *arg, boolean_t on)
1241 {
1242 	vsw_t		*vswp = (vsw_t *)arg;
1243 
1244 	D1(vswp, "%s: enter", __func__);
1245 
1246 	WRITE_ENTER(&vswp->if_lockrw);
1247 	if (on)
1248 		vswp->if_state |= VSW_IF_PROMISC;
1249 	else
1250 		vswp->if_state &= ~VSW_IF_PROMISC;
1251 	RW_EXIT(&vswp->if_lockrw);
1252 
1253 	D1(vswp, "%s: exit", __func__);
1254 
1255 	return (0);
1256 }
1257 
1258 static mblk_t *
1259 vsw_m_tx(void *arg, mblk_t *mp)
1260 {
1261 	vsw_t		*vswp = (vsw_t *)arg;
1262 
1263 	D1(vswp, "%s: enter", __func__);
1264 
1265 	mp = vsw_vlan_frame_pretag(vswp, VSW_LOCALDEV, mp);
1266 
1267 	if (mp == NULL) {
1268 		return (NULL);
1269 	}
1270 
1271 	vswp->vsw_switch_frame(vswp, mp, VSW_LOCALDEV, NULL, NULL);
1272 
1273 	D1(vswp, "%s: exit", __func__);
1274 
1275 	return (NULL);
1276 }
1277 
1278 /*
1279  * Register for machine description (MD) updates.
1280  *
1281  * Returns 0 on success, 1 on failure.
1282  */
1283 static int
1284 vsw_mdeg_register(vsw_t *vswp)
1285 {
1286 	mdeg_prop_spec_t	*pspecp;
1287 	mdeg_node_spec_t	*inst_specp;
1288 	mdeg_handle_t		mdeg_hdl, mdeg_port_hdl;
1289 	size_t			templatesz;
1290 	int			rv;
1291 
1292 	D1(vswp, "%s: enter", __func__);
1293 
1294 	/*
1295 	 * Allocate and initialize a per-instance copy
1296 	 * of the global property spec array that will
1297 	 * uniquely identify this vsw instance.
1298 	 */
1299 	templatesz = sizeof (vsw_prop_template);
1300 	pspecp = kmem_zalloc(templatesz, KM_SLEEP);
1301 
1302 	bcopy(vsw_prop_template, pspecp, templatesz);
1303 
1304 	VSW_SET_MDEG_PROP_INST(pspecp, vswp->regprop);
1305 
1306 	/* initialize the complete prop spec structure */
1307 	inst_specp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP);
1308 	inst_specp->namep = "virtual-device";
1309 	inst_specp->specp = pspecp;
1310 
1311 	D2(vswp, "%s: instance %d registering with mdeg", __func__,
1312 	    vswp->regprop);
1313 	/*
1314 	 * Register an interest in 'virtual-device' nodes with a
1315 	 * 'name' property of 'virtual-network-switch'
1316 	 */
1317 	rv = mdeg_register(inst_specp, &vdev_match, vsw_mdeg_cb,
1318 	    (void *)vswp, &mdeg_hdl);
1319 	if (rv != MDEG_SUCCESS) {
1320 		DERR(vswp, "%s: mdeg_register failed (%d) for vsw node",
1321 		    __func__, rv);
1322 		goto mdeg_reg_fail;
1323 	}
1324 
1325 	/*
1326 	 * Register an interest in 'vsw-port' nodes.
1327 	 */
1328 	rv = mdeg_register(inst_specp, &vport_match, vsw_port_mdeg_cb,
1329 	    (void *)vswp, &mdeg_port_hdl);
1330 	if (rv != MDEG_SUCCESS) {
1331 		DERR(vswp, "%s: mdeg_register failed (%d)\n", __func__, rv);
1332 		(void) mdeg_unregister(mdeg_hdl);
1333 		goto mdeg_reg_fail;
1334 	}
1335 
1336 	/* save off data that will be needed later */
1337 	vswp->inst_spec = inst_specp;
1338 	vswp->mdeg_hdl = mdeg_hdl;
1339 	vswp->mdeg_port_hdl = mdeg_port_hdl;
1340 
1341 	D1(vswp, "%s: exit", __func__);
1342 	return (0);
1343 
1344 mdeg_reg_fail:
1345 	cmn_err(CE_WARN, "!vsw%d: Unable to register MDEG callbacks",
1346 	    vswp->instance);
1347 	kmem_free(pspecp, templatesz);
1348 	kmem_free(inst_specp, sizeof (mdeg_node_spec_t));
1349 
1350 	vswp->mdeg_hdl = NULL;
1351 	vswp->mdeg_port_hdl = NULL;
1352 
1353 	return (1);
1354 }
1355 
1356 static void
1357 vsw_mdeg_unregister(vsw_t *vswp)
1358 {
1359 	D1(vswp, "vsw_mdeg_unregister: enter");
1360 
1361 	if (vswp->mdeg_hdl != NULL)
1362 		(void) mdeg_unregister(vswp->mdeg_hdl);
1363 
1364 	if (vswp->mdeg_port_hdl != NULL)
1365 		(void) mdeg_unregister(vswp->mdeg_port_hdl);
1366 
1367 	if (vswp->inst_spec != NULL) {
1368 		if (vswp->inst_spec->specp != NULL) {
1369 			(void) kmem_free(vswp->inst_spec->specp,
1370 			    sizeof (vsw_prop_template));
1371 			vswp->inst_spec->specp = NULL;
1372 		}
1373 
1374 		(void) kmem_free(vswp->inst_spec, sizeof (mdeg_node_spec_t));
1375 		vswp->inst_spec = NULL;
1376 	}
1377 
1378 	D1(vswp, "vsw_mdeg_unregister: exit");
1379 }
1380 
1381 /*
1382  * Mdeg callback invoked for the vsw node itself.
1383  */
1384 static int
1385 vsw_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1386 {
1387 	vsw_t		*vswp;
1388 	md_t		*mdp;
1389 	mde_cookie_t	node;
1390 	uint64_t	inst;
1391 	char		*node_name = NULL;
1392 
1393 	if (resp == NULL)
1394 		return (MDEG_FAILURE);
1395 
1396 	vswp = (vsw_t *)cb_argp;
1397 
1398 	D1(vswp, "%s: added %d : removed %d : curr matched %d"
1399 	    " : prev matched %d", __func__, resp->added.nelem,
1400 	    resp->removed.nelem, resp->match_curr.nelem,
1401 	    resp->match_prev.nelem);
1402 
1403 	/*
1404 	 * We get an initial callback for this node as 'added'
1405 	 * after registering with mdeg. Note that we would have
1406 	 * already gathered information about this vsw node by
1407 	 * walking MD earlier during attach (in vsw_read_mdprops()).
1408 	 * So, there is a window where the properties of this
1409 	 * node might have changed when we get this initial 'added'
1410 	 * callback. We handle this as if an update occured
1411 	 * and invoke the same function which handles updates to
1412 	 * the properties of this vsw-node if any.
1413 	 *
1414 	 * A non-zero 'match' value indicates that the MD has been
1415 	 * updated and that a virtual-network-switch node is
1416 	 * present which may or may not have been updated. It is
1417 	 * up to the clients to examine their own nodes and
1418 	 * determine if they have changed.
1419 	 */
1420 	if (resp->added.nelem != 0) {
1421 
1422 		if (resp->added.nelem != 1) {
1423 			cmn_err(CE_NOTE, "!vsw%d: number of nodes added "
1424 			    "invalid: %d\n", vswp->instance, resp->added.nelem);
1425 			return (MDEG_FAILURE);
1426 		}
1427 
1428 		mdp = resp->added.mdp;
1429 		node = resp->added.mdep[0];
1430 
1431 	} else if (resp->match_curr.nelem != 0) {
1432 
1433 		if (resp->match_curr.nelem != 1) {
1434 			cmn_err(CE_NOTE, "!vsw%d: number of nodes updated "
1435 			    "invalid: %d\n", vswp->instance,
1436 			    resp->match_curr.nelem);
1437 			return (MDEG_FAILURE);
1438 		}
1439 
1440 		mdp = resp->match_curr.mdp;
1441 		node = resp->match_curr.mdep[0];
1442 
1443 	} else {
1444 		return (MDEG_FAILURE);
1445 	}
1446 
1447 	/* Validate name and instance */
1448 	if (md_get_prop_str(mdp, node, "name", &node_name) != 0) {
1449 		DERR(vswp, "%s: unable to get node name\n",  __func__);
1450 		return (MDEG_FAILURE);
1451 	}
1452 
1453 	/* is this a virtual-network-switch? */
1454 	if (strcmp(node_name, vsw_propname) != 0) {
1455 		DERR(vswp, "%s: Invalid node name: %s\n",
1456 		    __func__, node_name);
1457 		return (MDEG_FAILURE);
1458 	}
1459 
1460 	if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) {
1461 		DERR(vswp, "%s: prop(cfg-handle) not found\n",
1462 		    __func__);
1463 		return (MDEG_FAILURE);
1464 	}
1465 
1466 	/* is this the right instance of vsw? */
1467 	if (inst != vswp->regprop) {
1468 		DERR(vswp, "%s: Invalid cfg-handle: %lx\n",
1469 		    __func__, inst);
1470 		return (MDEG_FAILURE);
1471 	}
1472 
1473 	vsw_update_md_prop(vswp, mdp, node);
1474 
1475 	return (MDEG_SUCCESS);
1476 }
1477 
1478 /*
1479  * Mdeg callback invoked for changes to the vsw-port nodes
1480  * under the vsw node.
1481  */
1482 static int
1483 vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1484 {
1485 	vsw_t		*vswp;
1486 	int		idx;
1487 	md_t		*mdp;
1488 	mde_cookie_t	node;
1489 	uint64_t	inst;
1490 	int		rv;
1491 
1492 	if ((resp == NULL) || (cb_argp == NULL))
1493 		return (MDEG_FAILURE);
1494 
1495 	vswp = (vsw_t *)cb_argp;
1496 
1497 	D2(vswp, "%s: added %d : removed %d : curr matched %d"
1498 	    " : prev matched %d", __func__, resp->added.nelem,
1499 	    resp->removed.nelem, resp->match_curr.nelem,
1500 	    resp->match_prev.nelem);
1501 
1502 	/* process added ports */
1503 	for (idx = 0; idx < resp->added.nelem; idx++) {
1504 		mdp = resp->added.mdp;
1505 		node = resp->added.mdep[idx];
1506 
1507 		D2(vswp, "%s: adding node(%d) 0x%lx", __func__, idx, node);
1508 
1509 		if ((rv = vsw_port_add(vswp, mdp, &node)) != 0) {
1510 			cmn_err(CE_WARN, "!vsw%d: Unable to add new port "
1511 			    "(0x%lx), err=%d", vswp->instance, node, rv);
1512 		}
1513 	}
1514 
1515 	/* process removed ports */
1516 	for (idx = 0; idx < resp->removed.nelem; idx++) {
1517 		mdp = resp->removed.mdp;
1518 		node = resp->removed.mdep[idx];
1519 
1520 		if (md_get_prop_val(mdp, node, id_propname, &inst)) {
1521 			DERR(vswp, "%s: prop(%s) not found in port(%d)",
1522 			    __func__, id_propname, idx);
1523 			continue;
1524 		}
1525 
1526 		D2(vswp, "%s: removing node(%d) 0x%lx", __func__, idx, node);
1527 
1528 		if (vsw_port_detach(vswp, inst) != 0) {
1529 			cmn_err(CE_WARN, "!vsw%d: Unable to remove port %ld",
1530 			    vswp->instance, inst);
1531 		}
1532 	}
1533 
1534 	for (idx = 0; idx < resp->match_curr.nelem; idx++) {
1535 		(void) vsw_port_update(vswp, resp->match_curr.mdp,
1536 		    resp->match_curr.mdep[idx],
1537 		    resp->match_prev.mdp,
1538 		    resp->match_prev.mdep[idx]);
1539 	}
1540 
1541 	D1(vswp, "%s: exit", __func__);
1542 
1543 	return (MDEG_SUCCESS);
1544 }
1545 
1546 /*
1547  * Scan the machine description for this instance of vsw
1548  * and read its properties. Called only from vsw_attach().
1549  * Returns: 0 on success, 1 on failure.
1550  */
1551 static int
1552 vsw_read_mdprops(vsw_t *vswp)
1553 {
1554 	md_t		*mdp = NULL;
1555 	mde_cookie_t	rootnode;
1556 	mde_cookie_t	*listp = NULL;
1557 	uint64_t	inst;
1558 	uint64_t	cfgh;
1559 	char		*name;
1560 	int		rv = 1;
1561 	int		num_nodes = 0;
1562 	int		num_devs = 0;
1563 	int		listsz = 0;
1564 	int		i;
1565 
1566 	/*
1567 	 * In each 'virtual-device' node in the MD there is a
1568 	 * 'cfg-handle' property which is the MD's concept of
1569 	 * an instance number (this may be completely different from
1570 	 * the device drivers instance #). OBP reads that value and
1571 	 * stores it in the 'reg' property of the appropriate node in
1572 	 * the device tree. We first read this reg property and use this
1573 	 * to compare against the 'cfg-handle' property of vsw nodes
1574 	 * in MD to get to this specific vsw instance and then read
1575 	 * other properties that we are interested in.
1576 	 * We also cache the value of 'reg' property and use it later
1577 	 * to register callbacks with mdeg (see vsw_mdeg_register())
1578 	 */
1579 	inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip,
1580 	    DDI_PROP_DONTPASS, reg_propname, -1);
1581 	if (inst == -1) {
1582 		cmn_err(CE_NOTE, "!vsw%d: Unable to read %s property from "
1583 		    "OBP device tree", vswp->instance, reg_propname);
1584 		return (rv);
1585 	}
1586 
1587 	vswp->regprop = inst;
1588 
1589 	if ((mdp = md_get_handle()) == NULL) {
1590 		DWARN(vswp, "%s: cannot init MD\n", __func__);
1591 		return (rv);
1592 	}
1593 
1594 	num_nodes = md_node_count(mdp);
1595 	ASSERT(num_nodes > 0);
1596 
1597 	listsz = num_nodes * sizeof (mde_cookie_t);
1598 	listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP);
1599 
1600 	rootnode = md_root_node(mdp);
1601 
1602 	/* search for all "virtual_device" nodes */
1603 	num_devs = md_scan_dag(mdp, rootnode,
1604 	    md_find_name(mdp, vdev_propname),
1605 	    md_find_name(mdp, "fwd"), listp);
1606 	if (num_devs <= 0) {
1607 		DWARN(vswp, "%s: invalid num_devs:%d\n", __func__, num_devs);
1608 		goto vsw_readmd_exit;
1609 	}
1610 
1611 	/*
1612 	 * Now loop through the list of virtual-devices looking for
1613 	 * devices with name "virtual-network-switch" and for each
1614 	 * such device compare its instance with what we have from
1615 	 * the 'reg' property to find the right node in MD and then
1616 	 * read all its properties.
1617 	 */
1618 	for (i = 0; i < num_devs; i++) {
1619 
1620 		if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) {
1621 			DWARN(vswp, "%s: name property not found\n",
1622 			    __func__);
1623 			goto vsw_readmd_exit;
1624 		}
1625 
1626 		/* is this a virtual-network-switch? */
1627 		if (strcmp(name, vsw_propname) != 0)
1628 			continue;
1629 
1630 		if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) {
1631 			DWARN(vswp, "%s: cfg-handle property not found\n",
1632 			    __func__);
1633 			goto vsw_readmd_exit;
1634 		}
1635 
1636 		/* is this the required instance of vsw? */
1637 		if (inst != cfgh)
1638 			continue;
1639 
1640 		/* now read all properties of this vsw instance */
1641 		rv = vsw_get_initial_md_properties(vswp, mdp, listp[i]);
1642 		break;
1643 	}
1644 
1645 vsw_readmd_exit:
1646 
1647 	kmem_free(listp, listsz);
1648 	(void) md_fini_handle(mdp);
1649 	return (rv);
1650 }
1651 
1652 /*
1653  * Read the initial start-of-day values from the specified MD node.
1654  */
1655 static int
1656 vsw_get_initial_md_properties(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
1657 {
1658 	uint64_t	macaddr = 0;
1659 
1660 	D1(vswp, "%s: enter", __func__);
1661 
1662 	if (vsw_get_md_physname(vswp, mdp, node, vswp->physname) != 0) {
1663 		return (1);
1664 	}
1665 
1666 	/* mac address for vswitch device itself */
1667 	if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) {
1668 		cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD",
1669 		    vswp->instance);
1670 		return (1);
1671 	}
1672 
1673 	vsw_save_lmacaddr(vswp, macaddr);
1674 
1675 	if (vsw_get_md_smodes(vswp, mdp, node, &vswp->smode)) {
1676 		DWARN(vswp, "%s: Unable to read %s property from MD, "
1677 		    "defaulting to 'switched' mode",
1678 		    __func__, smode_propname);
1679 
1680 		vswp->smode = VSW_LAYER2;
1681 	}
1682 
1683 	/* read mtu */
1684 	vsw_mtu_read(vswp, mdp, node, &vswp->mtu);
1685 	if (vswp->mtu < ETHERMTU || vswp->mtu > VNET_MAX_MTU) {
1686 		vswp->mtu = ETHERMTU;
1687 	}
1688 	vswp->max_frame_size = vswp->mtu + sizeof (struct ether_header) +
1689 	    VLAN_TAGSZ;
1690 
1691 	/* read vlan id properties of this vsw instance */
1692 	vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &vswp->pvid,
1693 	    &vswp->vids, &vswp->nvids, &vswp->default_vlan_id);
1694 
1695 	/* read priority-ether-types */
1696 	vsw_read_pri_eth_types(vswp, mdp, node);
1697 
1698 	D1(vswp, "%s: exit", __func__);
1699 	return (0);
1700 }
1701 
1702 /*
1703  * Read vlan id properties of the given MD node.
1704  * Arguments:
1705  *   arg:          device argument(vsw device or a port)
1706  *   type:         type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port)
1707  *   mdp:          machine description
1708  *   node:         md node cookie
1709  *
1710  * Returns:
1711  *   pvidp:        port-vlan-id of the node
1712  *   vidspp:       list of vlan-ids of the node
1713  *   nvidsp:       # of vlan-ids in the list
1714  *   default_idp:  default-vlan-id of the node(if node is vsw device)
1715  */
1716 static void
1717 vsw_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node,
1718 	uint16_t *pvidp, vsw_vlanid_t **vidspp, uint16_t *nvidsp,
1719 	uint16_t *default_idp)
1720 {
1721 	vsw_t		*vswp;
1722 	vsw_port_t	*portp;
1723 	char		*pvid_propname;
1724 	char		*vid_propname;
1725 	uint_t		nvids = 0;
1726 	uint32_t	vids_size;
1727 	int		rv;
1728 	int		i;
1729 	uint64_t	*data;
1730 	uint64_t	val;
1731 	int		size;
1732 	int		inst;
1733 
1734 	if (type == VSW_LOCALDEV) {
1735 
1736 		vswp = (vsw_t *)arg;
1737 		pvid_propname = vsw_pvid_propname;
1738 		vid_propname = vsw_vid_propname;
1739 		inst = vswp->instance;
1740 
1741 	} else if (type == VSW_VNETPORT) {
1742 
1743 		portp = (vsw_port_t *)arg;
1744 		vswp = portp->p_vswp;
1745 		pvid_propname = port_pvid_propname;
1746 		vid_propname = port_vid_propname;
1747 		inst = portp->p_instance;
1748 
1749 	} else {
1750 		return;
1751 	}
1752 
1753 	if (type == VSW_LOCALDEV && default_idp != NULL) {
1754 		rv = md_get_prop_val(mdp, node, vsw_dvid_propname, &val);
1755 		if (rv != 0) {
1756 			DWARN(vswp, "%s: prop(%s) not found", __func__,
1757 			    vsw_dvid_propname);
1758 
1759 			*default_idp = vsw_default_vlan_id;
1760 		} else {
1761 			*default_idp = val & 0xFFF;
1762 			D2(vswp, "%s: %s(%d): (%d)\n", __func__,
1763 			    vsw_dvid_propname, inst, *default_idp);
1764 		}
1765 	}
1766 
1767 	rv = md_get_prop_val(mdp, node, pvid_propname, &val);
1768 	if (rv != 0) {
1769 		DWARN(vswp, "%s: prop(%s) not found", __func__, pvid_propname);
1770 		*pvidp = vsw_default_vlan_id;
1771 	} else {
1772 
1773 		*pvidp = val & 0xFFF;
1774 		D2(vswp, "%s: %s(%d): (%d)\n", __func__,
1775 		    pvid_propname, inst, *pvidp);
1776 	}
1777 
1778 	rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data,
1779 	    &size);
1780 	if (rv != 0) {
1781 		D2(vswp, "%s: prop(%s) not found", __func__, vid_propname);
1782 		size = 0;
1783 	} else {
1784 		size /= sizeof (uint64_t);
1785 	}
1786 	nvids = size;
1787 
1788 	if (nvids != 0) {
1789 		D2(vswp, "%s: %s(%d): ", __func__, vid_propname, inst);
1790 		vids_size = sizeof (vsw_vlanid_t) * nvids;
1791 		*vidspp = kmem_zalloc(vids_size, KM_SLEEP);
1792 		for (i = 0; i < nvids; i++) {
1793 			(*vidspp)[i].vl_vid = data[i] & 0xFFFF;
1794 			(*vidspp)[i].vl_set = B_FALSE;
1795 			D2(vswp, " %d ", (*vidspp)[i].vl_vid);
1796 		}
1797 		D2(vswp, "\n");
1798 	}
1799 
1800 	*nvidsp = nvids;
1801 }
1802 
1803 /*
1804  * This function reads "priority-ether-types" property from md. This property
1805  * is used to enable support for priority frames. Applications which need
1806  * guaranteed and timely delivery of certain high priority frames to/from
1807  * a vnet or vsw within ldoms, should configure this property by providing
1808  * the ether type(s) for which the priority facility is needed.
1809  * Normal data frames are delivered over a ldc channel using the descriptor
1810  * ring mechanism which is constrained by factors such as descriptor ring size,
1811  * the rate at which the ring is processed at the peer ldc end point, etc.
1812  * The priority mechanism provides an Out-Of-Band path to send/receive frames
1813  * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the
1814  * descriptor ring path and enables a more reliable and timely delivery of
1815  * frames to the peer.
1816  */
1817 static void
1818 vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
1819 {
1820 	int		rv;
1821 	uint16_t	*types;
1822 	uint64_t	*data;
1823 	int		size;
1824 	int		i;
1825 	size_t		mblk_sz;
1826 
1827 	rv = md_get_prop_data(mdp, node, pri_types_propname,
1828 	    (uint8_t **)&data, &size);
1829 	if (rv != 0) {
1830 		/*
1831 		 * Property may not exist if we are running pre-ldoms1.1 f/w.
1832 		 * Check if 'vsw_pri_eth_type' has been set in that case.
1833 		 */
1834 		if (vsw_pri_eth_type != 0) {
1835 			size = sizeof (vsw_pri_eth_type);
1836 			data = &vsw_pri_eth_type;
1837 		} else {
1838 			D3(vswp, "%s: prop(%s) not found", __func__,
1839 			    pri_types_propname);
1840 			size = 0;
1841 		}
1842 	}
1843 
1844 	if (size == 0) {
1845 		vswp->pri_num_types = 0;
1846 		return;
1847 	}
1848 
1849 	/*
1850 	 * we have some priority-ether-types defined;
1851 	 * allocate a table of these types and also
1852 	 * allocate a pool of mblks to transmit these
1853 	 * priority packets.
1854 	 */
1855 	size /= sizeof (uint64_t);
1856 	vswp->pri_num_types = size;
1857 	vswp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP);
1858 	for (i = 0, types = vswp->pri_types; i < size; i++) {
1859 		types[i] = data[i] & 0xFFFF;
1860 	}
1861 	mblk_sz = (VIO_PKT_DATA_HDRSIZE + ETHERMAX + 7) & ~7;
1862 	(void) vio_create_mblks(vsw_pri_tx_nmblks, mblk_sz, &vswp->pri_tx_vmp);
1863 }
1864 
1865 static void
1866 vsw_mtu_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node, uint32_t *mtu)
1867 {
1868 	int		rv;
1869 	int		inst;
1870 	uint64_t	val;
1871 	char		*mtu_propname;
1872 
1873 	mtu_propname = vsw_mtu_propname;
1874 	inst = vswp->instance;
1875 
1876 	rv = md_get_prop_val(mdp, node, mtu_propname, &val);
1877 	if (rv != 0) {
1878 		D3(vswp, "%s: prop(%s) not found", __func__, mtu_propname);
1879 		*mtu = vsw_ethermtu;
1880 	} else {
1881 
1882 		*mtu = val & 0xFFFF;
1883 		D2(vswp, "%s: %s(%d): (%d)\n", __func__,
1884 		    mtu_propname, inst, *mtu);
1885 	}
1886 }
1887 
1888 /*
1889  * Update the mtu of the vsw device. We first check if the device has been
1890  * plumbed and if so fail the mtu update. Otherwise, we continue to update the
1891  * new mtu and reset all ports to initiate handshake re-negotiation with peers
1892  * using the new mtu.
1893  */
1894 static int
1895 vsw_mtu_update(vsw_t *vswp, uint32_t mtu)
1896 {
1897 	int	rv;
1898 
1899 	WRITE_ENTER(&vswp->if_lockrw);
1900 
1901 	if (vswp->if_state & VSW_IF_UP) {
1902 
1903 		RW_EXIT(&vswp->if_lockrw);
1904 
1905 		cmn_err(CE_NOTE, "!vsw%d: Unable to process mtu update"
1906 		    " as the device is plumbed\n", vswp->instance);
1907 		return (EBUSY);
1908 
1909 	} else {
1910 
1911 		D2(vswp, "%s: curr_mtu(%d) new_mtu(%d)\n",
1912 		    __func__, vswp->mtu, mtu);
1913 
1914 		vswp->mtu = mtu;
1915 		vswp->max_frame_size = vswp->mtu +
1916 		    sizeof (struct ether_header) + VLAN_TAGSZ;
1917 
1918 		rv = mac_maxsdu_update(vswp->if_mh, mtu);
1919 		if (rv != 0) {
1920 			cmn_err(CE_NOTE,
1921 			    "!vsw%d: Unable to update mtu with mac"
1922 			    " layer\n", vswp->instance);
1923 		}
1924 
1925 		RW_EXIT(&vswp->if_lockrw);
1926 
1927 		/* Reset ports to renegotiate with the new mtu */
1928 		vsw_reset_ports(vswp);
1929 
1930 	}
1931 
1932 	return (0);
1933 }
1934 
1935 /*
1936  * Check to see if the relevant properties in the specified node have
1937  * changed, and if so take the appropriate action.
1938  *
1939  * If any of the properties are missing or invalid we don't take
1940  * any action, as this function should only be invoked when modifications
1941  * have been made to what we assume is a working configuration, which
1942  * we leave active.
1943  *
1944  * Note it is legal for this routine to be invoked even if none of the
1945  * properties in the port node within the MD have actually changed.
1946  */
1947 static void
1948 vsw_update_md_prop(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
1949 {
1950 	char		physname[LIFNAMSIZ];
1951 	char		drv[LIFNAMSIZ];
1952 	uint_t		ddi_instance;
1953 	uint8_t		new_smode;
1954 	int		i;
1955 	uint64_t 	macaddr = 0;
1956 	enum		{MD_init = 0x1,
1957 				MD_physname = 0x2,
1958 				MD_macaddr = 0x4,
1959 				MD_smode = 0x8,
1960 				MD_vlans = 0x10,
1961 				MD_mtu = 0x20} updated;
1962 	int		rv;
1963 	uint16_t	pvid;
1964 	vsw_vlanid_t	*vids;
1965 	uint16_t	nvids;
1966 	uint32_t	mtu;
1967 
1968 	updated = MD_init;
1969 
1970 	D1(vswp, "%s: enter", __func__);
1971 
1972 	/*
1973 	 * Check if name of physical device in MD has changed.
1974 	 */
1975 	if (vsw_get_md_physname(vswp, mdp, node, (char *)&physname) == 0) {
1976 		/*
1977 		 * Do basic sanity check on new device name/instance,
1978 		 * if its non NULL. It is valid for the device name to
1979 		 * have changed from a non NULL to a NULL value, i.e.
1980 		 * the vsw is being changed to 'routed' mode.
1981 		 */
1982 		if ((strlen(physname) != 0) &&
1983 		    (ddi_parse(physname, drv,
1984 		    &ddi_instance) != DDI_SUCCESS)) {
1985 			cmn_err(CE_WARN, "!vsw%d: physical device %s is not"
1986 			    " a valid device name/instance",
1987 			    vswp->instance, physname);
1988 			goto fail_reconf;
1989 		}
1990 
1991 		if (strcmp(physname, vswp->physname)) {
1992 			D2(vswp, "%s: device name changed from %s to %s",
1993 			    __func__, vswp->physname, physname);
1994 
1995 			updated |= MD_physname;
1996 		} else {
1997 			D2(vswp, "%s: device name unchanged at %s",
1998 			    __func__, vswp->physname);
1999 		}
2000 	} else {
2001 		cmn_err(CE_WARN, "!vsw%d: Unable to read name of physical "
2002 		    "device from updated MD.", vswp->instance);
2003 		goto fail_reconf;
2004 	}
2005 
2006 	/*
2007 	 * Check if MAC address has changed.
2008 	 */
2009 	if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) {
2010 		cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD",
2011 		    vswp->instance);
2012 		goto fail_reconf;
2013 	} else {
2014 		uint64_t maddr = macaddr;
2015 		READ_ENTER(&vswp->if_lockrw);
2016 		for (i = ETHERADDRL - 1; i >= 0; i--) {
2017 			if (vswp->if_addr.ether_addr_octet[i]
2018 			    != (macaddr & 0xFF)) {
2019 				D2(vswp, "%s: octet[%d] 0x%x != 0x%x",
2020 				    __func__, i,
2021 				    vswp->if_addr.ether_addr_octet[i],
2022 				    (macaddr & 0xFF));
2023 				updated |= MD_macaddr;
2024 				macaddr = maddr;
2025 				break;
2026 			}
2027 			macaddr >>= 8;
2028 		}
2029 		RW_EXIT(&vswp->if_lockrw);
2030 		if (updated & MD_macaddr) {
2031 			vsw_save_lmacaddr(vswp, macaddr);
2032 		}
2033 	}
2034 
2035 	/*
2036 	 * Check if switching modes have changed.
2037 	 */
2038 	if (vsw_get_md_smodes(vswp, mdp, node, &new_smode)) {
2039 		cmn_err(CE_WARN, "!vsw%d: Unable to read %s property from MD",
2040 		    vswp->instance, smode_propname);
2041 		goto fail_reconf;
2042 	} else {
2043 		if (new_smode != vswp->smode) {
2044 			D2(vswp, "%s: switching mode changed from %d to %d",
2045 			    __func__, vswp->smode, new_smode);
2046 
2047 			updated |= MD_smode;
2048 		}
2049 	}
2050 
2051 	/* Read the vlan ids */
2052 	vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &pvid, &vids,
2053 	    &nvids, NULL);
2054 
2055 	/* Determine if there are any vlan id updates */
2056 	if ((pvid != vswp->pvid) ||		/* pvid changed? */
2057 	    (nvids != vswp->nvids) ||		/* # of vids changed? */
2058 	    ((nvids != 0) && (vswp->nvids != 0) &&	/* vids changed? */
2059 	    !vsw_cmp_vids(vids, vswp->vids, nvids))) {
2060 		updated |= MD_vlans;
2061 	}
2062 
2063 	/* Read mtu */
2064 	vsw_mtu_read(vswp, mdp, node, &mtu);
2065 	if (mtu != vswp->mtu) {
2066 		if (mtu >= ETHERMTU && mtu <= VNET_MAX_MTU) {
2067 			updated |= MD_mtu;
2068 		} else {
2069 			cmn_err(CE_NOTE, "!vsw%d: Unable to process mtu update"
2070 			    " as the specified value:%d is invalid\n",
2071 			    vswp->instance, mtu);
2072 		}
2073 	}
2074 
2075 	/*
2076 	 * Now make any changes which are needed...
2077 	 */
2078 
2079 	if (updated & (MD_physname | MD_smode | MD_mtu)) {
2080 
2081 		/*
2082 		 * Stop any pending timeout to setup switching mode.
2083 		 */
2084 		vsw_stop_switching_timeout(vswp);
2085 
2086 		/* Cleanup HybridIO */
2087 		vsw_hio_cleanup(vswp);
2088 
2089 		/*
2090 		 * Remove unicst, mcst addrs of vsw interface
2091 		 * and ports from the physdev. This also closes
2092 		 * the corresponding mac clients.
2093 		 */
2094 		vsw_unset_addrs(vswp);
2095 
2096 		/*
2097 		 * Stop, detach and close the old device..
2098 		 */
2099 		mutex_enter(&vswp->mac_lock);
2100 		vsw_mac_close(vswp);
2101 		mutex_exit(&vswp->mac_lock);
2102 
2103 		/*
2104 		 * Update phys name.
2105 		 */
2106 		if (updated & MD_physname) {
2107 			cmn_err(CE_NOTE, "!vsw%d: changing from %s to %s",
2108 			    vswp->instance, vswp->physname, physname);
2109 			(void) strncpy(vswp->physname,
2110 			    physname, strlen(physname) + 1);
2111 		}
2112 
2113 		/*
2114 		 * Update array with the new switch mode values.
2115 		 */
2116 		if (updated & MD_smode) {
2117 			vswp->smode = new_smode;
2118 		}
2119 
2120 		/* Update mtu */
2121 		if (updated & MD_mtu) {
2122 			rv = vsw_mtu_update(vswp, mtu);
2123 			if (rv != 0) {
2124 				goto fail_update;
2125 			}
2126 		}
2127 
2128 		/*
2129 		 * ..and attach, start the new device.
2130 		 */
2131 		rv = vsw_setup_switching(vswp);
2132 		if (rv == EAGAIN) {
2133 			/*
2134 			 * Unable to setup switching mode.
2135 			 * As the error is EAGAIN, schedule a timeout to retry
2136 			 * and return. Programming addresses of ports and
2137 			 * vsw interface will be done when the timeout handler
2138 			 * completes successfully.
2139 			 */
2140 			mutex_enter(&vswp->swtmout_lock);
2141 
2142 			vswp->swtmout_enabled = B_TRUE;
2143 			vswp->swtmout_id =
2144 			    timeout(vsw_setup_switching_timeout, vswp,
2145 			    (vsw_setup_switching_delay *
2146 			    drv_usectohz(MICROSEC)));
2147 
2148 			mutex_exit(&vswp->swtmout_lock);
2149 
2150 			return;
2151 
2152 		} else if (rv) {
2153 			goto fail_update;
2154 		}
2155 
2156 		vsw_setup_layer2_post_process(vswp);
2157 	} else if (updated & MD_macaddr) {
2158 		/*
2159 		 * We enter here if only MD_macaddr is exclusively updated.
2160 		 * If MD_physname and/or MD_smode are also updated, then
2161 		 * as part of that, we would have implicitly processed
2162 		 * MD_macaddr update (above).
2163 		 */
2164 		cmn_err(CE_NOTE, "!vsw%d: changing mac address to 0x%lx",
2165 		    vswp->instance, macaddr);
2166 
2167 		READ_ENTER(&vswp->if_lockrw);
2168 		if (vswp->if_state & VSW_IF_UP) {
2169 			/* reconfigure with new address */
2170 			vsw_if_mac_reconfig(vswp, B_FALSE, 0, NULL, 0);
2171 
2172 			/*
2173 			 * Notify the MAC layer of the changed address.
2174 			 */
2175 			mac_unicst_update(vswp->if_mh,
2176 			    (uint8_t *)&vswp->if_addr);
2177 
2178 		}
2179 		RW_EXIT(&vswp->if_lockrw);
2180 
2181 	}
2182 
2183 	if (updated & MD_vlans) {
2184 		/* Remove existing vlan ids from the hash table. */
2185 		vsw_vlan_remove_ids(vswp, VSW_LOCALDEV);
2186 
2187 		if (vswp->if_state & VSW_IF_UP) {
2188 			vsw_if_mac_reconfig(vswp, B_TRUE, pvid, vids, nvids);
2189 		} else {
2190 			if (vswp->nvids != 0) {
2191 				kmem_free(vswp->vids,
2192 				    sizeof (vsw_vlanid_t) * vswp->nvids);
2193 			}
2194 			vswp->vids = vids;
2195 			vswp->nvids = nvids;
2196 			vswp->pvid = pvid;
2197 		}
2198 
2199 		/* add these new vlan ids into hash table */
2200 		vsw_vlan_add_ids(vswp, VSW_LOCALDEV);
2201 	} else {
2202 		if (nvids != 0) {
2203 			kmem_free(vids, sizeof (vsw_vlanid_t) * nvids);
2204 		}
2205 	}
2206 
2207 	return;
2208 
2209 fail_reconf:
2210 	cmn_err(CE_WARN, "!vsw%d: configuration unchanged", vswp->instance);
2211 	return;
2212 
2213 fail_update:
2214 	cmn_err(CE_WARN, "!vsw%d: re-configuration failed",
2215 	    vswp->instance);
2216 }
2217 
2218 /*
2219  * Read the port's md properties.
2220  */
2221 static int
2222 vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp,
2223 	md_t *mdp, mde_cookie_t *node)
2224 {
2225 	uint64_t		ldc_id;
2226 	uint8_t			*addrp;
2227 	int			i, addrsz;
2228 	int			num_nodes = 0, nchan = 0;
2229 	int			listsz = 0;
2230 	mde_cookie_t		*listp = NULL;
2231 	struct ether_addr	ea;
2232 	uint64_t		macaddr;
2233 	uint64_t		inst = 0;
2234 	uint64_t		val;
2235 
2236 	if (md_get_prop_val(mdp, *node, id_propname, &inst)) {
2237 		DWARN(vswp, "%s: prop(%s) not found", __func__,
2238 		    id_propname);
2239 		return (1);
2240 	}
2241 
2242 	/*
2243 	 * Find the channel endpoint node(s) (which should be under this
2244 	 * port node) which contain the channel id(s).
2245 	 */
2246 	if ((num_nodes = md_node_count(mdp)) <= 0) {
2247 		DERR(vswp, "%s: invalid number of nodes found (%d)",
2248 		    __func__, num_nodes);
2249 		return (1);
2250 	}
2251 
2252 	D2(vswp, "%s: %d nodes found", __func__, num_nodes);
2253 
2254 	/* allocate enough space for node list */
2255 	listsz = num_nodes * sizeof (mde_cookie_t);
2256 	listp = kmem_zalloc(listsz, KM_SLEEP);
2257 
2258 	nchan = md_scan_dag(mdp, *node, md_find_name(mdp, chan_propname),
2259 	    md_find_name(mdp, "fwd"), listp);
2260 
2261 	if (nchan <= 0) {
2262 		DWARN(vswp, "%s: no %s nodes found", __func__, chan_propname);
2263 		kmem_free(listp, listsz);
2264 		return (1);
2265 	}
2266 
2267 	D2(vswp, "%s: %d %s nodes found", __func__, nchan, chan_propname);
2268 
2269 	/* use property from first node found */
2270 	if (md_get_prop_val(mdp, listp[0], id_propname, &ldc_id)) {
2271 		DWARN(vswp, "%s: prop(%s) not found\n", __func__,
2272 		    id_propname);
2273 		kmem_free(listp, listsz);
2274 		return (1);
2275 	}
2276 
2277 	/* don't need list any more */
2278 	kmem_free(listp, listsz);
2279 
2280 	D2(vswp, "%s: ldc_id 0x%llx", __func__, ldc_id);
2281 
2282 	/* read mac-address property */
2283 	if (md_get_prop_data(mdp, *node, remaddr_propname,
2284 	    &addrp, &addrsz)) {
2285 		DWARN(vswp, "%s: prop(%s) not found",
2286 		    __func__, remaddr_propname);
2287 		return (1);
2288 	}
2289 
2290 	if (addrsz < ETHERADDRL) {
2291 		DWARN(vswp, "%s: invalid address size", __func__);
2292 		return (1);
2293 	}
2294 
2295 	macaddr = *((uint64_t *)addrp);
2296 	D2(vswp, "%s: remote mac address 0x%llx", __func__, macaddr);
2297 
2298 	for (i = ETHERADDRL - 1; i >= 0; i--) {
2299 		ea.ether_addr_octet[i] = macaddr & 0xFF;
2300 		macaddr >>= 8;
2301 	}
2302 
2303 	/* now update all properties into the port */
2304 	portp->p_vswp = vswp;
2305 	portp->p_instance = inst;
2306 	portp->addr_set = B_FALSE;
2307 	ether_copy(&ea, &portp->p_macaddr);
2308 	if (nchan > VSW_PORT_MAX_LDCS) {
2309 		D2(vswp, "%s: using first of %d ldc ids",
2310 		    __func__, nchan);
2311 		nchan = VSW_PORT_MAX_LDCS;
2312 	}
2313 	portp->num_ldcs = nchan;
2314 	portp->ldc_ids =
2315 	    kmem_zalloc(sizeof (uint64_t) * nchan, KM_SLEEP);
2316 	bcopy(&ldc_id, (portp->ldc_ids), sizeof (uint64_t) * nchan);
2317 
2318 	/* read vlan id properties of this port node */
2319 	vsw_vlan_read_ids(portp, VSW_VNETPORT, mdp, *node, &portp->pvid,
2320 	    &portp->vids, &portp->nvids, NULL);
2321 
2322 	/* Check if hybrid property is present */
2323 	if (md_get_prop_val(mdp, *node, hybrid_propname, &val) == 0) {
2324 		D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname);
2325 		portp->p_hio_enabled = B_TRUE;
2326 	} else {
2327 		portp->p_hio_enabled = B_FALSE;
2328 	}
2329 	/*
2330 	 * Port hio capability determined after version
2331 	 * negotiation, i.e., when we know the peer is HybridIO capable.
2332 	 */
2333 	portp->p_hio_capable = B_FALSE;
2334 	return (0);
2335 }
2336 
2337 /*
2338  * Add a new port to the system.
2339  *
2340  * Returns 0 on success, 1 on failure.
2341  */
2342 int
2343 vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node)
2344 {
2345 	vsw_port_t	*portp;
2346 	int		rv;
2347 
2348 	portp = kmem_zalloc(sizeof (vsw_port_t), KM_SLEEP);
2349 
2350 	rv = vsw_port_read_props(portp, vswp, mdp, node);
2351 	if (rv != 0) {
2352 		kmem_free(portp, sizeof (*portp));
2353 		return (1);
2354 	}
2355 
2356 	rv = vsw_port_attach(portp);
2357 	if (rv != 0) {
2358 		DERR(vswp, "%s: failed to attach port", __func__);
2359 		return (1);
2360 	}
2361 
2362 	return (0);
2363 }
2364 
2365 static int
2366 vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex,
2367 	md_t *prev_mdp, mde_cookie_t prev_mdex)
2368 {
2369 	uint64_t	cport_num;
2370 	uint64_t	pport_num;
2371 	vsw_port_list_t	*plistp;
2372 	vsw_port_t	*portp;
2373 	boolean_t	updated_vlans = B_FALSE;
2374 	uint16_t	pvid;
2375 	vsw_vlanid_t	*vids;
2376 	uint16_t	nvids;
2377 	uint64_t	val;
2378 	boolean_t	hio_enabled = B_FALSE;
2379 
2380 	/*
2381 	 * For now, we get port updates only if vlan ids changed.
2382 	 * We read the port num and do some sanity check.
2383 	 */
2384 	if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) {
2385 		return (1);
2386 	}
2387 
2388 	if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) {
2389 		return (1);
2390 	}
2391 	if (cport_num != pport_num)
2392 		return (1);
2393 
2394 	plistp = &(vswp->plist);
2395 
2396 	READ_ENTER(&plistp->lockrw);
2397 
2398 	portp = vsw_lookup_port(vswp, cport_num);
2399 	if (portp == NULL) {
2400 		RW_EXIT(&plistp->lockrw);
2401 		return (1);
2402 	}
2403 
2404 	/* Read the vlan ids */
2405 	vsw_vlan_read_ids(portp, VSW_VNETPORT, curr_mdp, curr_mdex, &pvid,
2406 	    &vids, &nvids, NULL);
2407 
2408 	/* Determine if there are any vlan id updates */
2409 	if ((pvid != portp->pvid) ||		/* pvid changed? */
2410 	    (nvids != portp->nvids) ||		/* # of vids changed? */
2411 	    ((nvids != 0) && (portp->nvids != 0) &&	/* vids changed? */
2412 	    !vsw_cmp_vids(vids, portp->vids, nvids))) {
2413 		updated_vlans = B_TRUE;
2414 	}
2415 
2416 	if (updated_vlans == B_TRUE) {
2417 
2418 		/* Remove existing vlan ids from the hash table. */
2419 		vsw_vlan_remove_ids(portp, VSW_VNETPORT);
2420 
2421 		/* Reconfigure vlans with network device */
2422 		vsw_mac_port_reconfig_vlans(portp, pvid, vids, nvids);
2423 
2424 		/* add these new vlan ids into hash table */
2425 		vsw_vlan_add_ids(portp, VSW_VNETPORT);
2426 
2427 		/* reset the port if it is vlan unaware (ver < 1.3) */
2428 		vsw_vlan_unaware_port_reset(portp);
2429 	}
2430 
2431 	/* Check if hybrid property is present */
2432 	if (md_get_prop_val(curr_mdp, curr_mdex, hybrid_propname, &val) == 0) {
2433 		D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname);
2434 		hio_enabled = B_TRUE;
2435 	}
2436 
2437 	if (portp->p_hio_enabled != hio_enabled) {
2438 		vsw_hio_port_update(portp, hio_enabled);
2439 	}
2440 
2441 	RW_EXIT(&plistp->lockrw);
2442 
2443 	return (0);
2444 }
2445 
2446 /*
2447  * vsw_mac_rx -- A common function to send packets to the interface.
2448  * By default this function check if the interface is UP or not, the
2449  * rest of the behaviour depends on the flags as below:
2450  *
2451  *	VSW_MACRX_PROMISC -- Check if the promisc mode set or not.
2452  *	VSW_MACRX_COPYMSG -- Make a copy of the message(s).
2453  *	VSW_MACRX_FREEMSG -- Free if the messages cannot be sent up the stack.
2454  */
2455 void
2456 vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh,
2457     mblk_t *mp, vsw_macrx_flags_t flags)
2458 {
2459 	mblk_t		*mpt;
2460 
2461 	D1(vswp, "%s:enter\n", __func__);
2462 	READ_ENTER(&vswp->if_lockrw);
2463 	/* Check if the interface is up */
2464 	if (!(vswp->if_state & VSW_IF_UP)) {
2465 		RW_EXIT(&vswp->if_lockrw);
2466 		/* Free messages only if FREEMSG flag specified */
2467 		if (flags & VSW_MACRX_FREEMSG) {
2468 			freemsgchain(mp);
2469 		}
2470 		D1(vswp, "%s:exit\n", __func__);
2471 		return;
2472 	}
2473 	/*
2474 	 * If PROMISC flag is passed, then check if
2475 	 * the interface is in the PROMISC mode.
2476 	 * If not, drop the messages.
2477 	 */
2478 	if (flags & VSW_MACRX_PROMISC) {
2479 		if (!(vswp->if_state & VSW_IF_PROMISC)) {
2480 			RW_EXIT(&vswp->if_lockrw);
2481 			/* Free messages only if FREEMSG flag specified */
2482 			if (flags & VSW_MACRX_FREEMSG) {
2483 				freemsgchain(mp);
2484 			}
2485 			D1(vswp, "%s:exit\n", __func__);
2486 			return;
2487 		}
2488 	}
2489 	RW_EXIT(&vswp->if_lockrw);
2490 	/*
2491 	 * If COPYMSG flag is passed, then make a copy
2492 	 * of the message chain and send up the copy.
2493 	 */
2494 	if (flags & VSW_MACRX_COPYMSG) {
2495 		mp = copymsgchain(mp);
2496 		if (mp == NULL) {
2497 			D1(vswp, "%s:exit\n", __func__);
2498 			return;
2499 		}
2500 	}
2501 
2502 	D2(vswp, "%s: sending up stack", __func__);
2503 
2504 	mpt = NULL;
2505 	(void) vsw_vlan_frame_untag(vswp, VSW_LOCALDEV, &mp, &mpt);
2506 	if (mp != NULL) {
2507 		mac_rx(vswp->if_mh, mrh, mp);
2508 	}
2509 	D1(vswp, "%s:exit\n", __func__);
2510 }
2511 
2512 /* copy mac address of vsw into soft state structure */
2513 static void
2514 vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr)
2515 {
2516 	int	i;
2517 
2518 	WRITE_ENTER(&vswp->if_lockrw);
2519 	for (i = ETHERADDRL - 1; i >= 0; i--) {
2520 		vswp->if_addr.ether_addr_octet[i] = macaddr & 0xFF;
2521 		macaddr >>= 8;
2522 	}
2523 	RW_EXIT(&vswp->if_lockrw);
2524 }
2525 
2526 /* Compare VLAN ids, array size expected to be same. */
2527 static boolean_t
2528 vsw_cmp_vids(vsw_vlanid_t *vids1, vsw_vlanid_t *vids2, int nvids)
2529 {
2530 	int i, j;
2531 	uint16_t vid;
2532 
2533 	for (i = 0; i < nvids; i++) {
2534 		vid = vids1[i].vl_vid;
2535 		for (j = 0; j < nvids; j++) {
2536 			if (vid == vids2[i].vl_vid)
2537 				break;
2538 		}
2539 		if (j == nvids) {
2540 			return (B_FALSE);
2541 		}
2542 	}
2543 	return (B_TRUE);
2544 }
2545