xref: /illumos-gate/usr/src/uts/sun4v/io/vsw.c (revision cd11837edb943ce20ca539d505e60b469f89bf20)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/errno.h>
29 #include <sys/debug.h>
30 #include <sys/time.h>
31 #include <sys/sysmacros.h>
32 #include <sys/systm.h>
33 #include <sys/user.h>
34 #include <sys/stropts.h>
35 #include <sys/stream.h>
36 #include <sys/strlog.h>
37 #include <sys/strsubr.h>
38 #include <sys/cmn_err.h>
39 #include <sys/cpu.h>
40 #include <sys/kmem.h>
41 #include <sys/conf.h>
42 #include <sys/ddi.h>
43 #include <sys/sunddi.h>
44 #include <sys/ksynch.h>
45 #include <sys/stat.h>
46 #include <sys/kstat.h>
47 #include <sys/vtrace.h>
48 #include <sys/strsun.h>
49 #include <sys/dlpi.h>
50 #include <sys/ethernet.h>
51 #include <net/if.h>
52 #include <sys/varargs.h>
53 #include <sys/machsystm.h>
54 #include <sys/modctl.h>
55 #include <sys/modhash.h>
56 #include <sys/mac.h>
57 #include <sys/mac_ether.h>
58 #include <sys/taskq.h>
59 #include <sys/note.h>
60 #include <sys/mach_descrip.h>
61 #include <sys/mac.h>
62 #include <sys/mdeg.h>
63 #include <sys/ldc.h>
64 #include <sys/vsw_fdb.h>
65 #include <sys/vsw.h>
66 #include <sys/vio_mailbox.h>
67 #include <sys/vnet_mailbox.h>
68 #include <sys/vnet_common.h>
69 #include <sys/vio_util.h>
70 #include <sys/sdt.h>
71 #include <sys/atomic.h>
72 #include <sys/callb.h>
73 #include <sys/vlan.h>
74 
75 /*
76  * Function prototypes.
77  */
78 static	int vsw_attach(dev_info_t *, ddi_attach_cmd_t);
79 static	int vsw_detach(dev_info_t *, ddi_detach_cmd_t);
80 static	int vsw_get_md_physname(vsw_t *, md_t *, mde_cookie_t, char *);
81 static	int vsw_get_md_smodes(vsw_t *, md_t *, mde_cookie_t, uint8_t *, int *);
82 
83 /* MDEG routines */
84 static	int vsw_mdeg_register(vsw_t *vswp);
85 static	void vsw_mdeg_unregister(vsw_t *vswp);
86 static	int vsw_mdeg_cb(void *cb_argp, mdeg_result_t *);
87 static	int vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *);
88 static	int vsw_get_initial_md_properties(vsw_t *vswp, md_t *, mde_cookie_t);
89 static	int vsw_read_mdprops(vsw_t *vswp);
90 static	void vsw_vlan_read_ids(void *arg, int type, md_t *mdp,
91 	mde_cookie_t node, uint16_t *pvidp, uint16_t **vidspp,
92 	uint16_t *nvidsp, uint16_t *default_idp);
93 static	int vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp,
94 	md_t *mdp, mde_cookie_t *node);
95 static	void vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp,
96 	mde_cookie_t node);
97 static	void vsw_mtu_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node,
98 	uint32_t *mtu);
99 static	int vsw_mtu_update(vsw_t *vswp, uint32_t mtu);
100 static	void vsw_update_md_prop(vsw_t *, md_t *, mde_cookie_t);
101 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr);
102 
103 /* Mac driver related routines */
104 static int vsw_mac_register(vsw_t *);
105 static int vsw_mac_unregister(vsw_t *);
106 static int vsw_m_stat(void *, uint_t, uint64_t *);
107 static void vsw_m_stop(void *arg);
108 static int vsw_m_start(void *arg);
109 static int vsw_m_unicst(void *arg, const uint8_t *);
110 static int vsw_m_multicst(void *arg, boolean_t, const uint8_t *);
111 static int vsw_m_promisc(void *arg, boolean_t);
112 static mblk_t *vsw_m_tx(void *arg, mblk_t *);
113 void vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh,
114     mblk_t *mp, vsw_macrx_flags_t flags);
115 
116 /*
117  * Functions imported from other files.
118  */
119 extern void vsw_setup_switching_timeout(void *arg);
120 extern void vsw_stop_switching_timeout(vsw_t *vswp);
121 extern int vsw_setup_switching(vsw_t *);
122 extern void vsw_switch_frame_nop(vsw_t *vswp, mblk_t *mp, int caller,
123     vsw_port_t *port, mac_resource_handle_t mrh);
124 extern int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *);
125 extern int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *);
126 extern void vsw_del_mcst_vsw(vsw_t *);
127 extern mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr);
128 extern int vsw_detach_ports(vsw_t *vswp);
129 extern int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node);
130 extern int vsw_port_detach(vsw_t *vswp, int p_instance);
131 static int vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex,
132 	md_t *prev_mdp, mde_cookie_t prev_mdex);
133 extern	int vsw_port_attach(vsw_port_t *port);
134 extern vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance);
135 extern int vsw_mac_attach(vsw_t *vswp);
136 extern void vsw_mac_detach(vsw_t *vswp);
137 extern int vsw_mac_open(vsw_t *vswp);
138 extern void vsw_mac_close(vsw_t *vswp);
139 extern int vsw_set_hw(vsw_t *, vsw_port_t *, int);
140 extern int vsw_unset_hw(vsw_t *, vsw_port_t *, int);
141 extern void vsw_reconfig_hw(vsw_t *);
142 extern void vsw_unset_addrs(vsw_t *vswp);
143 extern void vsw_set_addrs(vsw_t *vswp);
144 extern void vsw_create_vlans(void *arg, int type);
145 extern void vsw_destroy_vlans(void *arg, int type);
146 extern void vsw_vlan_add_ids(void *arg, int type);
147 extern void vsw_vlan_remove_ids(void *arg, int type);
148 extern void vsw_vlan_unaware_port_reset(vsw_port_t *portp);
149 extern uint32_t vsw_vlan_frame_untag(void *arg, int type, mblk_t **np,
150 	mblk_t **npt);
151 extern mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp);
152 extern void vsw_hio_cleanup(vsw_t *vswp);
153 extern void vsw_hio_start_ports(vsw_t *vswp);
154 extern void vsw_reset_ports(vsw_t *vswp);
155 extern void vsw_port_reset(vsw_port_t *portp);
156 void vsw_hio_port_update(vsw_port_t *portp, boolean_t hio_enabled);
157 
158 /*
159  * Internal tunables.
160  */
161 int	vsw_num_handshakes = VNET_NUM_HANDSHAKES; /* # of handshake attempts */
162 int	vsw_wretries = 100;		/* # of write attempts */
163 int	vsw_desc_delay = 0;		/* delay in us */
164 int	vsw_read_attempts = 5;		/* # of reads of descriptor */
165 int	vsw_setup_switching_delay = 3;	/* setup sw timeout interval in sec */
166 int	vsw_mac_open_retries = 300;	/* max # of mac_open() retries */
167 					/* 300*3 = 900sec(15min) of max tmout */
168 int	vsw_ldc_tx_delay = 5;		/* delay(ticks) for tx retries */
169 int	vsw_ldc_tx_retries = 10;	/* # of ldc tx retries */
170 boolean_t vsw_ldc_rxthr_enabled = B_TRUE;	/* LDC Rx thread enabled */
171 boolean_t vsw_ldc_txthr_enabled = B_TRUE;	/* LDC Tx thread enabled */
172 
173 uint32_t	vsw_fdb_nchains = 8;	/* # of chains in fdb hash table */
174 uint32_t	vsw_vlan_nchains = 4;	/* # of chains in vlan id hash table */
175 uint32_t	vsw_ethermtu = 1500;	/* mtu of the device */
176 
177 /* sw timeout for boot delay only, in milliseconds */
178 int vsw_setup_switching_boot_delay = 100 * MILLISEC;
179 
180 /* delay in usec to wait for all references on a fdb entry to be dropped */
181 uint32_t vsw_fdbe_refcnt_delay = 10;
182 
183 /*
184  * Default vlan id. This is only used internally when the "default-vlan-id"
185  * property is not present in the MD device node. Therefore, this should not be
186  * used as a tunable; if this value is changed, the corresponding variable
187  * should be updated to the same value in all vnets connected to this vsw.
188  */
189 uint16_t	vsw_default_vlan_id = 1;
190 
191 /*
192  * Workaround for a version handshake bug in obp's vnet.
193  * If vsw initiates version negotiation starting from the highest version,
194  * obp sends a nack and terminates version handshake. To workaround
195  * this, we do not initiate version handshake when the channel comes up.
196  * Instead, we wait for the peer to send its version info msg and go through
197  * the version protocol exchange. If we successfully negotiate a version,
198  * before sending the ack, we send our version info msg to the peer
199  * using the <major,minor> version that we are about to ack.
200  */
201 boolean_t vsw_obp_ver_proto_workaround = B_TRUE;
202 
203 /*
204  * In the absence of "priority-ether-types" property in MD, the following
205  * internal tunable can be set to specify a single priority ethertype.
206  */
207 uint64_t vsw_pri_eth_type = 0;
208 
209 /*
210  * Number of transmit priority buffers that are preallocated per device.
211  * This number is chosen to be a small value to throttle transmission
212  * of priority packets. Note: Must be a power of 2 for vio_create_mblks().
213  */
214 uint32_t vsw_pri_tx_nmblks = 64;
215 
216 /*
217  * Number of RARP packets sent to announce macaddr to the physical switch,
218  * after vsw's physical device is changed dynamically or after a guest (client
219  * vnet) is live migrated in.
220  */
221 uint32_t vsw_publish_macaddr_count = 3;
222 
223 boolean_t vsw_hio_enabled = B_TRUE;	/* Enable/disable HybridIO */
224 int vsw_hio_max_cleanup_retries = 10;	/* Max retries for HybridIO cleanp */
225 int vsw_hio_cleanup_delay = 10000;	/* 10ms */
226 
227 /*
228  * External tunables.
229  */
230 /*
231  * Enable/disable thread per ring. This is a mode selection
232  * that is done a vsw driver attach time.
233  */
234 boolean_t vsw_multi_ring_enable = B_FALSE;
235 int vsw_mac_rx_rings = VSW_MAC_RX_RINGS;
236 
237 /* Number of transmit descriptors -  must be power of 2 */
238 uint32_t vsw_ntxds = VSW_RING_NUM_EL;
239 
240 /*
241  * Max number of mblks received in one receive operation.
242  */
243 uint32_t vsw_chain_len = (VSW_NUM_MBLKS * 0.6);
244 
245 /*
246  * Internal tunables for receive buffer pools, that is,  the size and number of
247  * mblks for each pool. At least 3 sizes must be specified if these are used.
248  * The sizes must be specified in increasing order. Non-zero value of the first
249  * size will be used as a hint to use these values instead of the algorithm
250  * that determines the sizes based on MTU.
251  */
252 uint32_t vsw_mblk_size1 = 0;
253 uint32_t vsw_mblk_size2 = 0;
254 uint32_t vsw_mblk_size3 = 0;
255 uint32_t vsw_mblk_size4 = 0;
256 uint32_t vsw_num_mblks1 = VSW_NUM_MBLKS;	/* number of mblks for pool1 */
257 uint32_t vsw_num_mblks2 = VSW_NUM_MBLKS;	/* number of mblks for pool2 */
258 uint32_t vsw_num_mblks3 = VSW_NUM_MBLKS;	/* number of mblks for pool3 */
259 uint32_t vsw_num_mblks4 = VSW_NUM_MBLKS;	/* number of mblks for pool4 */
260 
261 /*
262  * Set this to non-zero to enable additional internal receive buffer pools
263  * based on the MTU of the device for better performance at the cost of more
264  * memory consumption. This is turned off by default, to use allocb(9F) for
265  * receive buffer allocations of sizes > 2K.
266  */
267 boolean_t vsw_jumbo_rxpools = B_FALSE;
268 
269 /*
270  * vsw_max_tx_qcount is the maximum # of packets that can be queued
271  * before the tx worker thread begins processing the queue. Its value
272  * is chosen to be 4x the default length of tx descriptor ring.
273  */
274 uint32_t vsw_max_tx_qcount = 4 * VSW_RING_NUM_EL;
275 
276 /*
277  * MAC callbacks
278  */
279 static	mac_callbacks_t	vsw_m_callbacks = {
280 	0,
281 	vsw_m_stat,
282 	vsw_m_start,
283 	vsw_m_stop,
284 	vsw_m_promisc,
285 	vsw_m_multicst,
286 	vsw_m_unicst,
287 	vsw_m_tx,
288 	NULL,
289 	NULL,
290 	NULL
291 };
292 
293 static	struct	cb_ops	vsw_cb_ops = {
294 	nulldev,			/* cb_open */
295 	nulldev,			/* cb_close */
296 	nodev,				/* cb_strategy */
297 	nodev,				/* cb_print */
298 	nodev,				/* cb_dump */
299 	nodev,				/* cb_read */
300 	nodev,				/* cb_write */
301 	nodev,				/* cb_ioctl */
302 	nodev,				/* cb_devmap */
303 	nodev,				/* cb_mmap */
304 	nodev,				/* cb_segmap */
305 	nochpoll,			/* cb_chpoll */
306 	ddi_prop_op,			/* cb_prop_op */
307 	NULL,				/* cb_stream */
308 	D_MP,				/* cb_flag */
309 	CB_REV,				/* rev */
310 	nodev,				/* int (*cb_aread)() */
311 	nodev				/* int (*cb_awrite)() */
312 };
313 
314 static	struct	dev_ops	vsw_ops = {
315 	DEVO_REV,		/* devo_rev */
316 	0,			/* devo_refcnt */
317 	NULL,			/* devo_getinfo */
318 	nulldev,		/* devo_identify */
319 	nulldev,		/* devo_probe */
320 	vsw_attach,		/* devo_attach */
321 	vsw_detach,		/* devo_detach */
322 	nodev,			/* devo_reset */
323 	&vsw_cb_ops,		/* devo_cb_ops */
324 	(struct bus_ops *)NULL,	/* devo_bus_ops */
325 	ddi_power		/* devo_power */
326 };
327 
328 extern	struct	mod_ops	mod_driverops;
329 static struct modldrv vswmodldrv = {
330 	&mod_driverops,
331 	"sun4v Virtual Switch",
332 	&vsw_ops,
333 };
334 
335 #define	LDC_ENTER_LOCK(ldcp)	\
336 				mutex_enter(&((ldcp)->ldc_cblock));\
337 				mutex_enter(&((ldcp)->ldc_rxlock));\
338 				mutex_enter(&((ldcp)->ldc_txlock));
339 #define	LDC_EXIT_LOCK(ldcp)	\
340 				mutex_exit(&((ldcp)->ldc_txlock));\
341 				mutex_exit(&((ldcp)->ldc_rxlock));\
342 				mutex_exit(&((ldcp)->ldc_cblock));
343 
344 /* Driver soft state ptr  */
345 static void	*vsw_state;
346 
347 /*
348  * Linked list of "vsw_t" structures - one per instance.
349  */
350 vsw_t		*vsw_head = NULL;
351 krwlock_t	vsw_rw;
352 
353 /*
354  * Property names
355  */
356 static char vdev_propname[] = "virtual-device";
357 static char vsw_propname[] = "virtual-network-switch";
358 static char physdev_propname[] = "vsw-phys-dev";
359 static char smode_propname[] = "vsw-switch-mode";
360 static char macaddr_propname[] = "local-mac-address";
361 static char remaddr_propname[] = "remote-mac-address";
362 static char ldcids_propname[] = "ldc-ids";
363 static char chan_propname[] = "channel-endpoint";
364 static char id_propname[] = "id";
365 static char reg_propname[] = "reg";
366 static char pri_types_propname[] = "priority-ether-types";
367 static char vsw_pvid_propname[] = "port-vlan-id";
368 static char vsw_vid_propname[] = "vlan-id";
369 static char vsw_dvid_propname[] = "default-vlan-id";
370 static char port_pvid_propname[] = "remote-port-vlan-id";
371 static char port_vid_propname[] = "remote-vlan-id";
372 static char hybrid_propname[] = "hybrid";
373 static char vsw_mtu_propname[] = "mtu";
374 
375 /*
376  * Matching criteria passed to the MDEG to register interest
377  * in changes to 'virtual-device-port' nodes identified by their
378  * 'id' property.
379  */
380 static md_prop_match_t vport_prop_match[] = {
381 	{ MDET_PROP_VAL,    "id"   },
382 	{ MDET_LIST_END,    NULL    }
383 };
384 
385 static mdeg_node_match_t vport_match = { "virtual-device-port",
386 						vport_prop_match };
387 
388 /*
389  * Matching criteria passed to the MDEG to register interest
390  * in changes to 'virtual-device' nodes (i.e. vsw nodes) identified
391  * by their 'name' and 'cfg-handle' properties.
392  */
393 static md_prop_match_t vdev_prop_match[] = {
394 	{ MDET_PROP_STR,    "name"   },
395 	{ MDET_PROP_VAL,    "cfg-handle" },
396 	{ MDET_LIST_END,    NULL    }
397 };
398 
399 static mdeg_node_match_t vdev_match = { "virtual-device",
400 						vdev_prop_match };
401 
402 
403 /*
404  * Specification of an MD node passed to the MDEG to filter any
405  * 'vport' nodes that do not belong to the specified node. This
406  * template is copied for each vsw instance and filled in with
407  * the appropriate 'cfg-handle' value before being passed to the MDEG.
408  */
409 static mdeg_prop_spec_t vsw_prop_template[] = {
410 	{ MDET_PROP_STR,    "name",		vsw_propname },
411 	{ MDET_PROP_VAL,    "cfg-handle",	NULL	},
412 	{ MDET_LIST_END,    NULL,		NULL	}
413 };
414 
415 #define	VSW_SET_MDEG_PROP_INST(specp, val)	(specp)[1].ps_val = (val);
416 
417 #ifdef	DEBUG
418 /*
419  * Print debug messages - set to 0x1f to enable all msgs
420  * or 0x0 to turn all off.
421  */
422 int vswdbg = 0x0;
423 
424 /*
425  * debug levels:
426  * 0x01:	Function entry/exit tracing
427  * 0x02:	Internal function messages
428  * 0x04:	Verbose internal messages
429  * 0x08:	Warning messages
430  * 0x10:	Error messages
431  */
432 
433 void
434 vswdebug(vsw_t *vswp, const char *fmt, ...)
435 {
436 	char buf[512];
437 	va_list ap;
438 
439 	va_start(ap, fmt);
440 	(void) vsprintf(buf, fmt, ap);
441 	va_end(ap);
442 
443 	if (vswp == NULL)
444 		cmn_err(CE_CONT, "%s\n", buf);
445 	else
446 		cmn_err(CE_CONT, "vsw%d: %s\n", vswp->instance, buf);
447 }
448 
449 #endif	/* DEBUG */
450 
451 static struct modlinkage modlinkage = {
452 	MODREV_1,
453 	&vswmodldrv,
454 	NULL
455 };
456 
457 int
458 _init(void)
459 {
460 	int status;
461 
462 	rw_init(&vsw_rw, NULL, RW_DRIVER, NULL);
463 
464 	status = ddi_soft_state_init(&vsw_state, sizeof (vsw_t), 1);
465 	if (status != 0) {
466 		return (status);
467 	}
468 
469 	mac_init_ops(&vsw_ops, DRV_NAME);
470 	status = mod_install(&modlinkage);
471 	if (status != 0) {
472 		ddi_soft_state_fini(&vsw_state);
473 	}
474 	return (status);
475 }
476 
477 int
478 _fini(void)
479 {
480 	int status;
481 
482 	status = mod_remove(&modlinkage);
483 	if (status != 0)
484 		return (status);
485 	mac_fini_ops(&vsw_ops);
486 	ddi_soft_state_fini(&vsw_state);
487 
488 	rw_destroy(&vsw_rw);
489 
490 	return (status);
491 }
492 
493 int
494 _info(struct modinfo *modinfop)
495 {
496 	return (mod_info(&modlinkage, modinfop));
497 }
498 
499 static int
500 vsw_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
501 {
502 	vsw_t		*vswp;
503 	int		instance;
504 	char		hashname[MAXNAMELEN];
505 	char		qname[TASKQ_NAMELEN];
506 	enum		{ PROG_init = 0x00,
507 				PROG_locks = 0x01,
508 				PROG_readmd = 0x02,
509 				PROG_fdb = 0x04,
510 				PROG_mfdb = 0x08,
511 				PROG_taskq = 0x10,
512 				PROG_swmode = 0x20,
513 				PROG_macreg = 0x40,
514 				PROG_mdreg = 0x80}
515 			progress;
516 
517 	progress = PROG_init;
518 	int		rv;
519 
520 	switch (cmd) {
521 	case DDI_ATTACH:
522 		break;
523 	case DDI_RESUME:
524 		/* nothing to do for this non-device */
525 		return (DDI_SUCCESS);
526 	case DDI_PM_RESUME:
527 	default:
528 		return (DDI_FAILURE);
529 	}
530 
531 	instance = ddi_get_instance(dip);
532 	if (ddi_soft_state_zalloc(vsw_state, instance) != DDI_SUCCESS) {
533 		DERR(NULL, "vsw%d: ddi_soft_state_zalloc failed", instance);
534 		return (DDI_FAILURE);
535 	}
536 	vswp = ddi_get_soft_state(vsw_state, instance);
537 
538 	if (vswp == NULL) {
539 		DERR(NULL, "vsw%d: ddi_get_soft_state failed", instance);
540 		goto vsw_attach_fail;
541 	}
542 
543 	vswp->dip = dip;
544 	vswp->instance = instance;
545 	ddi_set_driver_private(dip, (caddr_t)vswp);
546 
547 	mutex_init(&vswp->hw_lock, NULL, MUTEX_DRIVER, NULL);
548 	mutex_init(&vswp->mca_lock, NULL, MUTEX_DRIVER, NULL);
549 	mutex_init(&vswp->swtmout_lock, NULL, MUTEX_DRIVER, NULL);
550 	rw_init(&vswp->if_lockrw, NULL, RW_DRIVER, NULL);
551 	rw_init(&vswp->mac_rwlock, NULL, RW_DRIVER, NULL);
552 	rw_init(&vswp->mfdbrw, NULL, RW_DRIVER, NULL);
553 	rw_init(&vswp->plist.lockrw, NULL, RW_DRIVER, NULL);
554 
555 	progress |= PROG_locks;
556 
557 	rv = vsw_read_mdprops(vswp);
558 	if (rv != 0)
559 		goto vsw_attach_fail;
560 
561 	progress |= PROG_readmd;
562 
563 	/* setup the unicast forwarding database  */
564 	(void) snprintf(hashname, MAXNAMELEN, "vsw_unicst_table-%d",
565 	    vswp->instance);
566 	D2(vswp, "creating unicast hash table (%s)...", hashname);
567 	vswp->fdb_nchains = vsw_fdb_nchains;
568 	vswp->fdb_hashp = mod_hash_create_ptrhash(hashname, vswp->fdb_nchains,
569 	    mod_hash_null_valdtor, sizeof (void *));
570 	vsw_create_vlans((void *)vswp, VSW_LOCALDEV);
571 	progress |= PROG_fdb;
572 
573 	/* setup the multicast fowarding database */
574 	(void) snprintf(hashname, MAXNAMELEN, "vsw_mcst_table-%d",
575 	    vswp->instance);
576 	D2(vswp, "creating multicast hash table %s)...", hashname);
577 	vswp->mfdb = mod_hash_create_ptrhash(hashname, vsw_fdb_nchains,
578 	    mod_hash_null_valdtor, sizeof (void *));
579 
580 	progress |= PROG_mfdb;
581 
582 	/*
583 	 * Create the taskq which will process all the VIO
584 	 * control messages.
585 	 */
586 	(void) snprintf(qname, TASKQ_NAMELEN, "vsw_taskq%d", vswp->instance);
587 	if ((vswp->taskq_p = ddi_taskq_create(vswp->dip, qname, 1,
588 	    TASKQ_DEFAULTPRI, 0)) == NULL) {
589 		cmn_err(CE_WARN, "!vsw%d: Unable to create task queue",
590 		    vswp->instance);
591 		goto vsw_attach_fail;
592 	}
593 
594 	progress |= PROG_taskq;
595 
596 	/* prevent auto-detaching */
597 	if (ddi_prop_update_int(DDI_DEV_T_NONE, vswp->dip,
598 	    DDI_NO_AUTODETACH, 1) != DDI_SUCCESS) {
599 		cmn_err(CE_NOTE, "!Unable to set \"%s\" property for "
600 		    "instance %u", DDI_NO_AUTODETACH, instance);
601 	}
602 
603 	/*
604 	 * The null switching function is set to avoid panic until
605 	 * switch mode is setup.
606 	 */
607 	vswp->vsw_switch_frame = vsw_switch_frame_nop;
608 
609 	/*
610 	 * Setup the required switching mode,
611 	 * based on the mdprops that we read earlier.
612 	 * schedule a short timeout (0.1 sec) for the first time
613 	 * setup and avoid calling mac_open() directly here,
614 	 * others are regular timeout 3 secs.
615 	 */
616 	mutex_enter(&vswp->swtmout_lock);
617 
618 	vswp->swtmout_enabled = B_TRUE;
619 	vswp->swtmout_id = timeout(vsw_setup_switching_timeout, vswp,
620 	    drv_usectohz(vsw_setup_switching_boot_delay));
621 
622 	mutex_exit(&vswp->swtmout_lock);
623 
624 	progress |= PROG_swmode;
625 
626 	/* Register with mac layer as a provider */
627 	rv = vsw_mac_register(vswp);
628 	if (rv != 0)
629 		goto vsw_attach_fail;
630 
631 	progress |= PROG_macreg;
632 
633 	/*
634 	 * Now we have everything setup, register an interest in
635 	 * specific MD nodes.
636 	 *
637 	 * The callback is invoked in 2 cases, firstly if upon mdeg
638 	 * registration there are existing nodes which match our specified
639 	 * criteria, and secondly if the MD is changed (and again, there
640 	 * are nodes which we are interested in present within it. Note
641 	 * that our callback will be invoked even if our specified nodes
642 	 * have not actually changed).
643 	 *
644 	 */
645 	rv = vsw_mdeg_register(vswp);
646 	if (rv != 0)
647 		goto vsw_attach_fail;
648 
649 	progress |= PROG_mdreg;
650 
651 	WRITE_ENTER(&vsw_rw);
652 	vswp->next = vsw_head;
653 	vsw_head = vswp;
654 	RW_EXIT(&vsw_rw);
655 
656 	ddi_report_dev(vswp->dip);
657 	return (DDI_SUCCESS);
658 
659 vsw_attach_fail:
660 	DERR(NULL, "vsw_attach: failed");
661 
662 	if (progress & PROG_mdreg) {
663 		vsw_mdeg_unregister(vswp);
664 		(void) vsw_detach_ports(vswp);
665 	}
666 
667 	if (progress & PROG_macreg)
668 		(void) vsw_mac_unregister(vswp);
669 
670 	if (progress & PROG_swmode) {
671 		vsw_stop_switching_timeout(vswp);
672 		vsw_hio_cleanup(vswp);
673 		WRITE_ENTER(&vswp->mac_rwlock);
674 		vsw_mac_detach(vswp);
675 		vsw_mac_close(vswp);
676 		RW_EXIT(&vswp->mac_rwlock);
677 	}
678 
679 	if (progress & PROG_taskq)
680 		ddi_taskq_destroy(vswp->taskq_p);
681 
682 	if (progress & PROG_mfdb)
683 		mod_hash_destroy_hash(vswp->mfdb);
684 
685 	if (progress & PROG_fdb) {
686 		vsw_destroy_vlans(vswp, VSW_LOCALDEV);
687 		mod_hash_destroy_hash(vswp->fdb_hashp);
688 	}
689 
690 	if (progress & PROG_readmd) {
691 		if (VSW_PRI_ETH_DEFINED(vswp)) {
692 			kmem_free(vswp->pri_types,
693 			    sizeof (uint16_t) * vswp->pri_num_types);
694 		}
695 		(void) vio_destroy_mblks(vswp->pri_tx_vmp);
696 	}
697 
698 	if (progress & PROG_locks) {
699 		rw_destroy(&vswp->plist.lockrw);
700 		rw_destroy(&vswp->mfdbrw);
701 		rw_destroy(&vswp->mac_rwlock);
702 		rw_destroy(&vswp->if_lockrw);
703 		mutex_destroy(&vswp->swtmout_lock);
704 		mutex_destroy(&vswp->mca_lock);
705 		mutex_destroy(&vswp->hw_lock);
706 	}
707 
708 	ddi_soft_state_free(vsw_state, instance);
709 	return (DDI_FAILURE);
710 }
711 
712 static int
713 vsw_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
714 {
715 	vio_mblk_pool_t		*poolp, *npoolp;
716 	vsw_t			**vswpp, *vswp;
717 	int 			instance;
718 
719 	instance = ddi_get_instance(dip);
720 	vswp = ddi_get_soft_state(vsw_state, instance);
721 
722 	if (vswp == NULL) {
723 		return (DDI_FAILURE);
724 	}
725 
726 	switch (cmd) {
727 	case DDI_DETACH:
728 		break;
729 	case DDI_SUSPEND:
730 	case DDI_PM_SUSPEND:
731 	default:
732 		return (DDI_FAILURE);
733 	}
734 
735 	D2(vswp, "detaching instance %d", instance);
736 
737 	/* Stop any pending timeout to setup switching mode. */
738 	vsw_stop_switching_timeout(vswp);
739 
740 	if (vswp->if_state & VSW_IF_REG) {
741 		if (vsw_mac_unregister(vswp) != 0) {
742 			cmn_err(CE_WARN, "!vsw%d: Unable to detach from "
743 			    "MAC layer", vswp->instance);
744 			return (DDI_FAILURE);
745 		}
746 	}
747 
748 	vsw_mdeg_unregister(vswp);
749 
750 	/* remove mac layer callback */
751 	WRITE_ENTER(&vswp->mac_rwlock);
752 	if ((vswp->mh != NULL) && (vswp->mrh != NULL)) {
753 		mac_rx_remove(vswp->mh, vswp->mrh, B_TRUE);
754 		vswp->mrh = NULL;
755 	}
756 	RW_EXIT(&vswp->mac_rwlock);
757 
758 	if (vsw_detach_ports(vswp) != 0) {
759 		cmn_err(CE_WARN, "!vsw%d: Unable to unconfigure ports",
760 		    vswp->instance);
761 		return (DDI_FAILURE);
762 	}
763 
764 	rw_destroy(&vswp->if_lockrw);
765 
766 	/* cleanup HybridIO */
767 	vsw_hio_cleanup(vswp);
768 
769 	mutex_destroy(&vswp->hw_lock);
770 
771 	/*
772 	 * Now that the ports have been deleted, stop and close
773 	 * the physical device.
774 	 */
775 	WRITE_ENTER(&vswp->mac_rwlock);
776 
777 	vsw_mac_detach(vswp);
778 	vsw_mac_close(vswp);
779 
780 	RW_EXIT(&vswp->mac_rwlock);
781 
782 	rw_destroy(&vswp->mac_rwlock);
783 	mutex_destroy(&vswp->swtmout_lock);
784 
785 	/*
786 	 * Destroy any free pools that may still exist.
787 	 */
788 	poolp = vswp->rxh;
789 	while (poolp != NULL) {
790 		npoolp = vswp->rxh = poolp->nextp;
791 		if (vio_destroy_mblks(poolp) != 0) {
792 			vswp->rxh = poolp;
793 			return (DDI_FAILURE);
794 		}
795 		poolp = npoolp;
796 	}
797 
798 	/*
799 	 * Remove this instance from any entries it may be on in
800 	 * the hash table by using the list of addresses maintained
801 	 * in the vsw_t structure.
802 	 */
803 	vsw_del_mcst_vsw(vswp);
804 
805 	vswp->mcap = NULL;
806 	mutex_destroy(&vswp->mca_lock);
807 
808 	/*
809 	 * By now any pending tasks have finished and the underlying
810 	 * ldc's have been destroyed, so its safe to delete the control
811 	 * message taskq.
812 	 */
813 	if (vswp->taskq_p != NULL)
814 		ddi_taskq_destroy(vswp->taskq_p);
815 
816 	/*
817 	 * At this stage all the data pointers in the hash table
818 	 * should be NULL, as all the ports have been removed and will
819 	 * have deleted themselves from the port lists which the data
820 	 * pointers point to. Hence we can destroy the table using the
821 	 * default destructors.
822 	 */
823 	D2(vswp, "vsw_detach: destroying hash tables..");
824 	vsw_destroy_vlans(vswp, VSW_LOCALDEV);
825 	mod_hash_destroy_hash(vswp->fdb_hashp);
826 	vswp->fdb_hashp = NULL;
827 
828 	WRITE_ENTER(&vswp->mfdbrw);
829 	mod_hash_destroy_hash(vswp->mfdb);
830 	vswp->mfdb = NULL;
831 	RW_EXIT(&vswp->mfdbrw);
832 	rw_destroy(&vswp->mfdbrw);
833 
834 	/* free pri_types table */
835 	if (VSW_PRI_ETH_DEFINED(vswp)) {
836 		kmem_free(vswp->pri_types,
837 		    sizeof (uint16_t) * vswp->pri_num_types);
838 		(void) vio_destroy_mblks(vswp->pri_tx_vmp);
839 	}
840 
841 	ddi_remove_minor_node(dip, NULL);
842 
843 	rw_destroy(&vswp->plist.lockrw);
844 	WRITE_ENTER(&vsw_rw);
845 	for (vswpp = &vsw_head; *vswpp; vswpp = &(*vswpp)->next) {
846 		if (*vswpp == vswp) {
847 			*vswpp = vswp->next;
848 			break;
849 		}
850 	}
851 	RW_EXIT(&vsw_rw);
852 	ddi_soft_state_free(vsw_state, instance);
853 
854 	return (DDI_SUCCESS);
855 }
856 
857 /*
858  * Get the value of the "vsw-phys-dev" property in the specified
859  * node. This property is the name of the physical device that
860  * the virtual switch will use to talk to the outside world.
861  *
862  * Note it is valid for this property to be NULL (but the property
863  * itself must exist). Callers of this routine should verify that
864  * the value returned is what they expected (i.e. either NULL or non NULL).
865  *
866  * On success returns value of the property in region pointed to by
867  * the 'name' argument, and with return value of 0. Otherwise returns 1.
868  */
869 static int
870 vsw_get_md_physname(vsw_t *vswp, md_t *mdp, mde_cookie_t node, char *name)
871 {
872 	int		len = 0;
873 	int		instance;
874 	char		*physname = NULL;
875 	char		*dev;
876 	const char	*dev_name;
877 	char		myname[MAXNAMELEN];
878 
879 	dev_name = ddi_driver_name(vswp->dip);
880 	instance = ddi_get_instance(vswp->dip);
881 	(void) snprintf(myname, MAXNAMELEN, "%s%d", dev_name, instance);
882 
883 	if (md_get_prop_data(mdp, node, physdev_propname,
884 	    (uint8_t **)(&physname), &len) != 0) {
885 		cmn_err(CE_WARN, "!vsw%d: Unable to get name(s) of physical "
886 		    "device(s) from MD", vswp->instance);
887 		return (1);
888 	} else if ((strlen(physname) + 1) > LIFNAMSIZ) {
889 		cmn_err(CE_WARN, "!vsw%d: %s is too long a device name",
890 		    vswp->instance, physname);
891 		return (1);
892 	} else if (strcmp(myname, physname) == 0) {
893 		/*
894 		 * Prevent the vswitch from opening itself as the
895 		 * network device.
896 		 */
897 		cmn_err(CE_WARN, "!vsw%d: %s is an invalid device name",
898 		    vswp->instance, physname);
899 		return (1);
900 	} else {
901 		(void) strncpy(name, physname, strlen(physname) + 1);
902 		D2(vswp, "%s: using first device specified (%s)",
903 		    __func__, physname);
904 	}
905 
906 #ifdef DEBUG
907 	/*
908 	 * As a temporary measure to aid testing we check to see if there
909 	 * is a vsw.conf file present. If there is we use the value of the
910 	 * vsw_physname property in the file as the name of the physical
911 	 * device, overriding the value from the MD.
912 	 *
913 	 * There may be multiple devices listed, but for the moment
914 	 * we just use the first one.
915 	 */
916 	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vswp->dip, 0,
917 	    "vsw_physname", &dev) == DDI_PROP_SUCCESS) {
918 		if ((strlen(dev) + 1) > LIFNAMSIZ) {
919 			cmn_err(CE_WARN, "vsw%d: %s is too long a device name",
920 			    vswp->instance, dev);
921 			ddi_prop_free(dev);
922 			return (1);
923 		} else {
924 			cmn_err(CE_NOTE, "vsw%d: Using device name (%s) from "
925 			    "config file", vswp->instance, dev);
926 
927 			(void) strncpy(name, dev, strlen(dev) + 1);
928 		}
929 
930 		ddi_prop_free(dev);
931 	}
932 #endif
933 
934 	return (0);
935 }
936 
937 /*
938  * Read the 'vsw-switch-mode' property from the specified MD node.
939  *
940  * Returns 0 on success and the number of modes found in 'found',
941  * otherwise returns 1.
942  */
943 static int
944 vsw_get_md_smodes(vsw_t *vswp, md_t *mdp, mde_cookie_t node,
945 						uint8_t *modes, int *found)
946 {
947 	int		len = 0;
948 	int		smode_num = 0;
949 	char		*smode = NULL;
950 	char		*curr_mode = NULL;
951 
952 	D1(vswp, "%s: enter", __func__);
953 
954 	/*
955 	 * Get the switch-mode property. The modes are listed in
956 	 * decreasing order of preference, i.e. prefered mode is
957 	 * first item in list.
958 	 */
959 	len = 0;
960 	smode_num = 0;
961 	if (md_get_prop_data(mdp, node, smode_propname,
962 	    (uint8_t **)(&smode), &len) != 0) {
963 		/*
964 		 * Unable to get switch-mode property from MD, nothing
965 		 * more we can do.
966 		 */
967 		cmn_err(CE_WARN, "!vsw%d: Unable to get switch mode property"
968 		    " from the MD", vswp->instance);
969 		*found = 0;
970 		return (1);
971 	}
972 
973 	curr_mode = smode;
974 	/*
975 	 * Modes of operation:
976 	 * 'switched'	 - layer 2 switching, underlying HW in
977 	 *			programmed mode.
978 	 * 'promiscuous' - layer 2 switching, underlying HW in
979 	 *			promiscuous mode.
980 	 * 'routed'	 - layer 3 (i.e. IP) routing, underlying HW
981 	 *			in non-promiscuous mode.
982 	 */
983 	while ((curr_mode < (smode + len)) && (smode_num < NUM_SMODES)) {
984 		D2(vswp, "%s: curr_mode = [%s]", __func__, curr_mode);
985 		if (strcmp(curr_mode, "switched") == 0) {
986 			modes[smode_num++] = VSW_LAYER2;
987 		} else if (strcmp(curr_mode, "promiscuous") == 0) {
988 			modes[smode_num++] = VSW_LAYER2_PROMISC;
989 		} else if (strcmp(curr_mode, "routed") == 0) {
990 			modes[smode_num++] = VSW_LAYER3;
991 		} else {
992 			DWARN(vswp, "%s: Unknown switch mode %s, "
993 			    "setting to default 'switched' mode",
994 			    __func__, curr_mode);
995 			modes[smode_num++] = VSW_LAYER2;
996 		}
997 		curr_mode += strlen(curr_mode) + 1;
998 	}
999 	*found = smode_num;
1000 
1001 	D2(vswp, "%s: %d modes found", __func__, smode_num);
1002 
1003 	D1(vswp, "%s: exit", __func__);
1004 
1005 	return (0);
1006 }
1007 
1008 /*
1009  * Register with the MAC layer as a network device, so we
1010  * can be plumbed if necessary.
1011  */
1012 static int
1013 vsw_mac_register(vsw_t *vswp)
1014 {
1015 	mac_register_t	*macp;
1016 	int		rv;
1017 
1018 	D1(vswp, "%s: enter", __func__);
1019 
1020 	if ((macp = mac_alloc(MAC_VERSION)) == NULL)
1021 		return (EINVAL);
1022 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1023 	macp->m_driver = vswp;
1024 	macp->m_dip = vswp->dip;
1025 	macp->m_src_addr = (uint8_t *)&vswp->if_addr;
1026 	macp->m_callbacks = &vsw_m_callbacks;
1027 	macp->m_min_sdu = 0;
1028 	macp->m_max_sdu = vswp->mtu;
1029 	macp->m_margin = VLAN_TAGSZ;
1030 	rv = mac_register(macp, &vswp->if_mh);
1031 	mac_free(macp);
1032 	if (rv != 0) {
1033 		/*
1034 		 * Treat this as a non-fatal error as we may be
1035 		 * able to operate in some other mode.
1036 		 */
1037 		cmn_err(CE_NOTE, "!vsw%d: Unable to register as "
1038 		    "a provider with MAC layer", vswp->instance);
1039 		return (rv);
1040 	}
1041 
1042 	vswp->if_state |= VSW_IF_REG;
1043 
1044 	D1(vswp, "%s: exit", __func__);
1045 
1046 	return (rv);
1047 }
1048 
1049 static int
1050 vsw_mac_unregister(vsw_t *vswp)
1051 {
1052 	int		rv = 0;
1053 
1054 	D1(vswp, "%s: enter", __func__);
1055 
1056 	WRITE_ENTER(&vswp->if_lockrw);
1057 
1058 	if (vswp->if_state & VSW_IF_REG) {
1059 		rv = mac_unregister(vswp->if_mh);
1060 		if (rv != 0) {
1061 			DWARN(vswp, "%s: unable to unregister from MAC "
1062 			    "framework", __func__);
1063 
1064 			RW_EXIT(&vswp->if_lockrw);
1065 			D1(vswp, "%s: fail exit", __func__);
1066 			return (rv);
1067 		}
1068 
1069 		/* mark i/f as down and unregistered */
1070 		vswp->if_state &= ~(VSW_IF_UP | VSW_IF_REG);
1071 	}
1072 	RW_EXIT(&vswp->if_lockrw);
1073 
1074 	D1(vswp, "%s: exit", __func__);
1075 
1076 	return (rv);
1077 }
1078 
1079 static int
1080 vsw_m_stat(void *arg, uint_t stat, uint64_t *val)
1081 {
1082 	vsw_t			*vswp = (vsw_t *)arg;
1083 
1084 	D1(vswp, "%s: enter", __func__);
1085 
1086 	WRITE_ENTER(&vswp->mac_rwlock);
1087 	if (vswp->mh == NULL) {
1088 		RW_EXIT(&vswp->mac_rwlock);
1089 		return (EINVAL);
1090 	}
1091 
1092 	/* return stats from underlying device */
1093 	*val = mac_stat_get(vswp->mh, stat);
1094 
1095 	RW_EXIT(&vswp->mac_rwlock);
1096 
1097 	return (0);
1098 }
1099 
1100 static void
1101 vsw_m_stop(void *arg)
1102 {
1103 	vsw_t	*vswp = (vsw_t *)arg;
1104 
1105 	D1(vswp, "%s: enter", __func__);
1106 
1107 	WRITE_ENTER(&vswp->if_lockrw);
1108 	vswp->if_state &= ~VSW_IF_UP;
1109 	RW_EXIT(&vswp->if_lockrw);
1110 
1111 	mutex_enter(&vswp->hw_lock);
1112 
1113 	(void) vsw_unset_hw(vswp, NULL, VSW_LOCALDEV);
1114 
1115 	if (vswp->recfg_reqd)
1116 		vsw_reconfig_hw(vswp);
1117 
1118 	mutex_exit(&vswp->hw_lock);
1119 
1120 	D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state);
1121 }
1122 
1123 static int
1124 vsw_m_start(void *arg)
1125 {
1126 	vsw_t		*vswp = (vsw_t *)arg;
1127 
1128 	D1(vswp, "%s: enter", __func__);
1129 
1130 	WRITE_ENTER(&vswp->if_lockrw);
1131 
1132 	vswp->if_state |= VSW_IF_UP;
1133 
1134 	if (vswp->switching_setup_done == B_FALSE) {
1135 		/*
1136 		 * If the switching mode has not been setup yet, just
1137 		 * return. The unicast address will be programmed
1138 		 * after the physical device is successfully setup by the
1139 		 * timeout handler.
1140 		 */
1141 		RW_EXIT(&vswp->if_lockrw);
1142 		return (0);
1143 	}
1144 
1145 	/* if in layer2 mode, program unicast address. */
1146 	if (vswp->mh != NULL) {
1147 		mutex_enter(&vswp->hw_lock);
1148 		(void) vsw_set_hw(vswp, NULL, VSW_LOCALDEV);
1149 		mutex_exit(&vswp->hw_lock);
1150 	}
1151 
1152 	RW_EXIT(&vswp->if_lockrw);
1153 
1154 	D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state);
1155 	return (0);
1156 }
1157 
1158 /*
1159  * Change the local interface address.
1160  *
1161  * Note: we don't support this entry point. The local
1162  * mac address of the switch can only be changed via its
1163  * MD node properties.
1164  */
1165 static int
1166 vsw_m_unicst(void *arg, const uint8_t *macaddr)
1167 {
1168 	_NOTE(ARGUNUSED(arg, macaddr))
1169 
1170 	return (DDI_FAILURE);
1171 }
1172 
1173 static int
1174 vsw_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
1175 {
1176 	vsw_t		*vswp = (vsw_t *)arg;
1177 	mcst_addr_t	*mcst_p = NULL;
1178 	uint64_t	addr = 0x0;
1179 	int		i, ret = 0;
1180 
1181 	D1(vswp, "%s: enter", __func__);
1182 
1183 	/*
1184 	 * Convert address into form that can be used
1185 	 * as hash table key.
1186 	 */
1187 	for (i = 0; i < ETHERADDRL; i++) {
1188 		addr = (addr << 8) | mca[i];
1189 	}
1190 
1191 	D2(vswp, "%s: addr = 0x%llx", __func__, addr);
1192 
1193 	if (add) {
1194 		D2(vswp, "%s: adding multicast", __func__);
1195 		if (vsw_add_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) {
1196 			/*
1197 			 * Update the list of multicast addresses
1198 			 * contained within the vsw_t structure to
1199 			 * include this new one.
1200 			 */
1201 			mcst_p = kmem_zalloc(sizeof (mcst_addr_t), KM_NOSLEEP);
1202 			if (mcst_p == NULL) {
1203 				DERR(vswp, "%s unable to alloc mem", __func__);
1204 				(void) vsw_del_mcst(vswp,
1205 				    VSW_LOCALDEV, addr, NULL);
1206 				return (1);
1207 			}
1208 			mcst_p->addr = addr;
1209 			ether_copy(mca, &mcst_p->mca);
1210 
1211 			/*
1212 			 * Call into the underlying driver to program the
1213 			 * address into HW.
1214 			 */
1215 			WRITE_ENTER(&vswp->mac_rwlock);
1216 			if (vswp->mh != NULL) {
1217 				ret = mac_multicst_add(vswp->mh, mca);
1218 				if (ret != 0) {
1219 					cmn_err(CE_NOTE, "!vsw%d: unable to "
1220 					    "add multicast address",
1221 					    vswp->instance);
1222 					RW_EXIT(&vswp->mac_rwlock);
1223 					(void) vsw_del_mcst(vswp,
1224 					    VSW_LOCALDEV, addr, NULL);
1225 					kmem_free(mcst_p, sizeof (*mcst_p));
1226 					return (ret);
1227 				}
1228 				mcst_p->mac_added = B_TRUE;
1229 			}
1230 			RW_EXIT(&vswp->mac_rwlock);
1231 
1232 			mutex_enter(&vswp->mca_lock);
1233 			mcst_p->nextp = vswp->mcap;
1234 			vswp->mcap = mcst_p;
1235 			mutex_exit(&vswp->mca_lock);
1236 		} else {
1237 			cmn_err(CE_NOTE, "!vsw%d: unable to add multicast "
1238 			    "address", vswp->instance);
1239 		}
1240 		return (ret);
1241 	}
1242 
1243 	D2(vswp, "%s: removing multicast", __func__);
1244 	/*
1245 	 * Remove the address from the hash table..
1246 	 */
1247 	if (vsw_del_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) {
1248 
1249 		/*
1250 		 * ..and then from the list maintained in the
1251 		 * vsw_t structure.
1252 		 */
1253 		mcst_p = vsw_del_addr(VSW_LOCALDEV, vswp, addr);
1254 		ASSERT(mcst_p != NULL);
1255 
1256 		WRITE_ENTER(&vswp->mac_rwlock);
1257 		if (vswp->mh != NULL && mcst_p->mac_added) {
1258 			(void) mac_multicst_remove(vswp->mh, mca);
1259 			mcst_p->mac_added = B_FALSE;
1260 		}
1261 		RW_EXIT(&vswp->mac_rwlock);
1262 		kmem_free(mcst_p, sizeof (*mcst_p));
1263 	}
1264 
1265 	D1(vswp, "%s: exit", __func__);
1266 
1267 	return (0);
1268 }
1269 
1270 static int
1271 vsw_m_promisc(void *arg, boolean_t on)
1272 {
1273 	vsw_t		*vswp = (vsw_t *)arg;
1274 
1275 	D1(vswp, "%s: enter", __func__);
1276 
1277 	WRITE_ENTER(&vswp->if_lockrw);
1278 	if (on)
1279 		vswp->if_state |= VSW_IF_PROMISC;
1280 	else
1281 		vswp->if_state &= ~VSW_IF_PROMISC;
1282 	RW_EXIT(&vswp->if_lockrw);
1283 
1284 	D1(vswp, "%s: exit", __func__);
1285 
1286 	return (0);
1287 }
1288 
1289 static mblk_t *
1290 vsw_m_tx(void *arg, mblk_t *mp)
1291 {
1292 	vsw_t		*vswp = (vsw_t *)arg;
1293 
1294 	D1(vswp, "%s: enter", __func__);
1295 
1296 	mp = vsw_vlan_frame_pretag(vswp, VSW_LOCALDEV, mp);
1297 
1298 	if (mp == NULL) {
1299 		return (NULL);
1300 	}
1301 
1302 	vswp->vsw_switch_frame(vswp, mp, VSW_LOCALDEV, NULL, NULL);
1303 
1304 	D1(vswp, "%s: exit", __func__);
1305 
1306 	return (NULL);
1307 }
1308 
1309 /*
1310  * Register for machine description (MD) updates.
1311  *
1312  * Returns 0 on success, 1 on failure.
1313  */
1314 static int
1315 vsw_mdeg_register(vsw_t *vswp)
1316 {
1317 	mdeg_prop_spec_t	*pspecp;
1318 	mdeg_node_spec_t	*inst_specp;
1319 	mdeg_handle_t		mdeg_hdl, mdeg_port_hdl;
1320 	size_t			templatesz;
1321 	int			rv;
1322 
1323 	D1(vswp, "%s: enter", __func__);
1324 
1325 	/*
1326 	 * Allocate and initialize a per-instance copy
1327 	 * of the global property spec array that will
1328 	 * uniquely identify this vsw instance.
1329 	 */
1330 	templatesz = sizeof (vsw_prop_template);
1331 	pspecp = kmem_zalloc(templatesz, KM_SLEEP);
1332 
1333 	bcopy(vsw_prop_template, pspecp, templatesz);
1334 
1335 	VSW_SET_MDEG_PROP_INST(pspecp, vswp->regprop);
1336 
1337 	/* initialize the complete prop spec structure */
1338 	inst_specp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP);
1339 	inst_specp->namep = "virtual-device";
1340 	inst_specp->specp = pspecp;
1341 
1342 	D2(vswp, "%s: instance %d registering with mdeg", __func__,
1343 	    vswp->regprop);
1344 	/*
1345 	 * Register an interest in 'virtual-device' nodes with a
1346 	 * 'name' property of 'virtual-network-switch'
1347 	 */
1348 	rv = mdeg_register(inst_specp, &vdev_match, vsw_mdeg_cb,
1349 	    (void *)vswp, &mdeg_hdl);
1350 	if (rv != MDEG_SUCCESS) {
1351 		DERR(vswp, "%s: mdeg_register failed (%d) for vsw node",
1352 		    __func__, rv);
1353 		goto mdeg_reg_fail;
1354 	}
1355 
1356 	/*
1357 	 * Register an interest in 'vsw-port' nodes.
1358 	 */
1359 	rv = mdeg_register(inst_specp, &vport_match, vsw_port_mdeg_cb,
1360 	    (void *)vswp, &mdeg_port_hdl);
1361 	if (rv != MDEG_SUCCESS) {
1362 		DERR(vswp, "%s: mdeg_register failed (%d)\n", __func__, rv);
1363 		(void) mdeg_unregister(mdeg_hdl);
1364 		goto mdeg_reg_fail;
1365 	}
1366 
1367 	/* save off data that will be needed later */
1368 	vswp->inst_spec = inst_specp;
1369 	vswp->mdeg_hdl = mdeg_hdl;
1370 	vswp->mdeg_port_hdl = mdeg_port_hdl;
1371 
1372 	D1(vswp, "%s: exit", __func__);
1373 	return (0);
1374 
1375 mdeg_reg_fail:
1376 	cmn_err(CE_WARN, "!vsw%d: Unable to register MDEG callbacks",
1377 	    vswp->instance);
1378 	kmem_free(pspecp, templatesz);
1379 	kmem_free(inst_specp, sizeof (mdeg_node_spec_t));
1380 
1381 	vswp->mdeg_hdl = NULL;
1382 	vswp->mdeg_port_hdl = NULL;
1383 
1384 	return (1);
1385 }
1386 
1387 static void
1388 vsw_mdeg_unregister(vsw_t *vswp)
1389 {
1390 	D1(vswp, "vsw_mdeg_unregister: enter");
1391 
1392 	if (vswp->mdeg_hdl != NULL)
1393 		(void) mdeg_unregister(vswp->mdeg_hdl);
1394 
1395 	if (vswp->mdeg_port_hdl != NULL)
1396 		(void) mdeg_unregister(vswp->mdeg_port_hdl);
1397 
1398 	if (vswp->inst_spec != NULL) {
1399 		if (vswp->inst_spec->specp != NULL) {
1400 			(void) kmem_free(vswp->inst_spec->specp,
1401 			    sizeof (vsw_prop_template));
1402 			vswp->inst_spec->specp = NULL;
1403 		}
1404 
1405 		(void) kmem_free(vswp->inst_spec, sizeof (mdeg_node_spec_t));
1406 		vswp->inst_spec = NULL;
1407 	}
1408 
1409 	D1(vswp, "vsw_mdeg_unregister: exit");
1410 }
1411 
1412 /*
1413  * Mdeg callback invoked for the vsw node itself.
1414  */
1415 static int
1416 vsw_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1417 {
1418 	vsw_t		*vswp;
1419 	md_t		*mdp;
1420 	mde_cookie_t	node;
1421 	uint64_t	inst;
1422 	char		*node_name = NULL;
1423 
1424 	if (resp == NULL)
1425 		return (MDEG_FAILURE);
1426 
1427 	vswp = (vsw_t *)cb_argp;
1428 
1429 	D1(vswp, "%s: added %d : removed %d : curr matched %d"
1430 	    " : prev matched %d", __func__, resp->added.nelem,
1431 	    resp->removed.nelem, resp->match_curr.nelem,
1432 	    resp->match_prev.nelem);
1433 
1434 	/*
1435 	 * We get an initial callback for this node as 'added'
1436 	 * after registering with mdeg. Note that we would have
1437 	 * already gathered information about this vsw node by
1438 	 * walking MD earlier during attach (in vsw_read_mdprops()).
1439 	 * So, there is a window where the properties of this
1440 	 * node might have changed when we get this initial 'added'
1441 	 * callback. We handle this as if an update occured
1442 	 * and invoke the same function which handles updates to
1443 	 * the properties of this vsw-node if any.
1444 	 *
1445 	 * A non-zero 'match' value indicates that the MD has been
1446 	 * updated and that a virtual-network-switch node is
1447 	 * present which may or may not have been updated. It is
1448 	 * up to the clients to examine their own nodes and
1449 	 * determine if they have changed.
1450 	 */
1451 	if (resp->added.nelem != 0) {
1452 
1453 		if (resp->added.nelem != 1) {
1454 			cmn_err(CE_NOTE, "!vsw%d: number of nodes added "
1455 			    "invalid: %d\n", vswp->instance, resp->added.nelem);
1456 			return (MDEG_FAILURE);
1457 		}
1458 
1459 		mdp = resp->added.mdp;
1460 		node = resp->added.mdep[0];
1461 
1462 	} else if (resp->match_curr.nelem != 0) {
1463 
1464 		if (resp->match_curr.nelem != 1) {
1465 			cmn_err(CE_NOTE, "!vsw%d: number of nodes updated "
1466 			    "invalid: %d\n", vswp->instance,
1467 			    resp->match_curr.nelem);
1468 			return (MDEG_FAILURE);
1469 		}
1470 
1471 		mdp = resp->match_curr.mdp;
1472 		node = resp->match_curr.mdep[0];
1473 
1474 	} else {
1475 		return (MDEG_FAILURE);
1476 	}
1477 
1478 	/* Validate name and instance */
1479 	if (md_get_prop_str(mdp, node, "name", &node_name) != 0) {
1480 		DERR(vswp, "%s: unable to get node name\n",  __func__);
1481 		return (MDEG_FAILURE);
1482 	}
1483 
1484 	/* is this a virtual-network-switch? */
1485 	if (strcmp(node_name, vsw_propname) != 0) {
1486 		DERR(vswp, "%s: Invalid node name: %s\n",
1487 		    __func__, node_name);
1488 		return (MDEG_FAILURE);
1489 	}
1490 
1491 	if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) {
1492 		DERR(vswp, "%s: prop(cfg-handle) not found\n",
1493 		    __func__);
1494 		return (MDEG_FAILURE);
1495 	}
1496 
1497 	/* is this the right instance of vsw? */
1498 	if (inst != vswp->regprop) {
1499 		DERR(vswp, "%s: Invalid cfg-handle: %lx\n",
1500 		    __func__, inst);
1501 		return (MDEG_FAILURE);
1502 	}
1503 
1504 	vsw_update_md_prop(vswp, mdp, node);
1505 
1506 	return (MDEG_SUCCESS);
1507 }
1508 
1509 /*
1510  * Mdeg callback invoked for changes to the vsw-port nodes
1511  * under the vsw node.
1512  */
1513 static int
1514 vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1515 {
1516 	vsw_t		*vswp;
1517 	int		idx;
1518 	md_t		*mdp;
1519 	mde_cookie_t	node;
1520 	uint64_t	inst;
1521 	int		rv;
1522 
1523 	if ((resp == NULL) || (cb_argp == NULL))
1524 		return (MDEG_FAILURE);
1525 
1526 	vswp = (vsw_t *)cb_argp;
1527 
1528 	D2(vswp, "%s: added %d : removed %d : curr matched %d"
1529 	    " : prev matched %d", __func__, resp->added.nelem,
1530 	    resp->removed.nelem, resp->match_curr.nelem,
1531 	    resp->match_prev.nelem);
1532 
1533 	/* process added ports */
1534 	for (idx = 0; idx < resp->added.nelem; idx++) {
1535 		mdp = resp->added.mdp;
1536 		node = resp->added.mdep[idx];
1537 
1538 		D2(vswp, "%s: adding node(%d) 0x%lx", __func__, idx, node);
1539 
1540 		if ((rv = vsw_port_add(vswp, mdp, &node)) != 0) {
1541 			cmn_err(CE_WARN, "!vsw%d: Unable to add new port "
1542 			    "(0x%lx), err=%d", vswp->instance, node, rv);
1543 		}
1544 	}
1545 
1546 	/* process removed ports */
1547 	for (idx = 0; idx < resp->removed.nelem; idx++) {
1548 		mdp = resp->removed.mdp;
1549 		node = resp->removed.mdep[idx];
1550 
1551 		if (md_get_prop_val(mdp, node, id_propname, &inst)) {
1552 			DERR(vswp, "%s: prop(%s) not found in port(%d)",
1553 			    __func__, id_propname, idx);
1554 			continue;
1555 		}
1556 
1557 		D2(vswp, "%s: removing node(%d) 0x%lx", __func__, idx, node);
1558 
1559 		if (vsw_port_detach(vswp, inst) != 0) {
1560 			cmn_err(CE_WARN, "!vsw%d: Unable to remove port %ld",
1561 			    vswp->instance, inst);
1562 		}
1563 	}
1564 
1565 	for (idx = 0; idx < resp->match_curr.nelem; idx++) {
1566 		(void) vsw_port_update(vswp, resp->match_curr.mdp,
1567 		    resp->match_curr.mdep[idx],
1568 		    resp->match_prev.mdp,
1569 		    resp->match_prev.mdep[idx]);
1570 	}
1571 
1572 	D1(vswp, "%s: exit", __func__);
1573 
1574 	return (MDEG_SUCCESS);
1575 }
1576 
1577 /*
1578  * Scan the machine description for this instance of vsw
1579  * and read its properties. Called only from vsw_attach().
1580  * Returns: 0 on success, 1 on failure.
1581  */
1582 static int
1583 vsw_read_mdprops(vsw_t *vswp)
1584 {
1585 	md_t		*mdp = NULL;
1586 	mde_cookie_t	rootnode;
1587 	mde_cookie_t	*listp = NULL;
1588 	uint64_t	inst;
1589 	uint64_t	cfgh;
1590 	char		*name;
1591 	int		rv = 1;
1592 	int		num_nodes = 0;
1593 	int		num_devs = 0;
1594 	int		listsz = 0;
1595 	int		i;
1596 
1597 	/*
1598 	 * In each 'virtual-device' node in the MD there is a
1599 	 * 'cfg-handle' property which is the MD's concept of
1600 	 * an instance number (this may be completely different from
1601 	 * the device drivers instance #). OBP reads that value and
1602 	 * stores it in the 'reg' property of the appropriate node in
1603 	 * the device tree. We first read this reg property and use this
1604 	 * to compare against the 'cfg-handle' property of vsw nodes
1605 	 * in MD to get to this specific vsw instance and then read
1606 	 * other properties that we are interested in.
1607 	 * We also cache the value of 'reg' property and use it later
1608 	 * to register callbacks with mdeg (see vsw_mdeg_register())
1609 	 */
1610 	inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip,
1611 	    DDI_PROP_DONTPASS, reg_propname, -1);
1612 	if (inst == -1) {
1613 		cmn_err(CE_NOTE, "!vsw%d: Unable to read %s property from "
1614 		    "OBP device tree", vswp->instance, reg_propname);
1615 		return (rv);
1616 	}
1617 
1618 	vswp->regprop = inst;
1619 
1620 	if ((mdp = md_get_handle()) == NULL) {
1621 		DWARN(vswp, "%s: cannot init MD\n", __func__);
1622 		return (rv);
1623 	}
1624 
1625 	num_nodes = md_node_count(mdp);
1626 	ASSERT(num_nodes > 0);
1627 
1628 	listsz = num_nodes * sizeof (mde_cookie_t);
1629 	listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP);
1630 
1631 	rootnode = md_root_node(mdp);
1632 
1633 	/* search for all "virtual_device" nodes */
1634 	num_devs = md_scan_dag(mdp, rootnode,
1635 	    md_find_name(mdp, vdev_propname),
1636 	    md_find_name(mdp, "fwd"), listp);
1637 	if (num_devs <= 0) {
1638 		DWARN(vswp, "%s: invalid num_devs:%d\n", __func__, num_devs);
1639 		goto vsw_readmd_exit;
1640 	}
1641 
1642 	/*
1643 	 * Now loop through the list of virtual-devices looking for
1644 	 * devices with name "virtual-network-switch" and for each
1645 	 * such device compare its instance with what we have from
1646 	 * the 'reg' property to find the right node in MD and then
1647 	 * read all its properties.
1648 	 */
1649 	for (i = 0; i < num_devs; i++) {
1650 
1651 		if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) {
1652 			DWARN(vswp, "%s: name property not found\n",
1653 			    __func__);
1654 			goto vsw_readmd_exit;
1655 		}
1656 
1657 		/* is this a virtual-network-switch? */
1658 		if (strcmp(name, vsw_propname) != 0)
1659 			continue;
1660 
1661 		if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) {
1662 			DWARN(vswp, "%s: cfg-handle property not found\n",
1663 			    __func__);
1664 			goto vsw_readmd_exit;
1665 		}
1666 
1667 		/* is this the required instance of vsw? */
1668 		if (inst != cfgh)
1669 			continue;
1670 
1671 		/* now read all properties of this vsw instance */
1672 		rv = vsw_get_initial_md_properties(vswp, mdp, listp[i]);
1673 		break;
1674 	}
1675 
1676 vsw_readmd_exit:
1677 
1678 	kmem_free(listp, listsz);
1679 	(void) md_fini_handle(mdp);
1680 	return (rv);
1681 }
1682 
1683 /*
1684  * Read the initial start-of-day values from the specified MD node.
1685  */
1686 static int
1687 vsw_get_initial_md_properties(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
1688 {
1689 	int		i;
1690 	uint64_t 	macaddr = 0;
1691 
1692 	D1(vswp, "%s: enter", __func__);
1693 
1694 	if (vsw_get_md_physname(vswp, mdp, node, vswp->physname) != 0) {
1695 		return (1);
1696 	}
1697 
1698 	/* mac address for vswitch device itself */
1699 	if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) {
1700 		cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD",
1701 		    vswp->instance);
1702 		return (1);
1703 	}
1704 
1705 	vsw_save_lmacaddr(vswp, macaddr);
1706 
1707 	if (vsw_get_md_smodes(vswp, mdp, node, vswp->smode, &vswp->smode_num)) {
1708 		DWARN(vswp, "%s: Unable to read %s property from MD, "
1709 		    "defaulting to 'switched' mode",
1710 		    __func__, smode_propname);
1711 
1712 		for (i = 0; i < NUM_SMODES; i++)
1713 			vswp->smode[i] = VSW_LAYER2;
1714 
1715 		vswp->smode_num = NUM_SMODES;
1716 	} else {
1717 		ASSERT(vswp->smode_num != 0);
1718 	}
1719 
1720 	/* read mtu */
1721 	vsw_mtu_read(vswp, mdp, node, &vswp->mtu);
1722 	if (vswp->mtu < ETHERMTU || vswp->mtu > VNET_MAX_MTU) {
1723 		vswp->mtu = ETHERMTU;
1724 	}
1725 	vswp->max_frame_size = vswp->mtu + sizeof (struct ether_header) +
1726 	    VLAN_TAGSZ;
1727 
1728 	/* read vlan id properties of this vsw instance */
1729 	vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &vswp->pvid,
1730 	    &vswp->vids, &vswp->nvids, &vswp->default_vlan_id);
1731 
1732 	/* read priority-ether-types */
1733 	vsw_read_pri_eth_types(vswp, mdp, node);
1734 
1735 	D1(vswp, "%s: exit", __func__);
1736 	return (0);
1737 }
1738 
1739 /*
1740  * Read vlan id properties of the given MD node.
1741  * Arguments:
1742  *   arg:          device argument(vsw device or a port)
1743  *   type:         type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port)
1744  *   mdp:          machine description
1745  *   node:         md node cookie
1746  *
1747  * Returns:
1748  *   pvidp:        port-vlan-id of the node
1749  *   vidspp:       list of vlan-ids of the node
1750  *   nvidsp:       # of vlan-ids in the list
1751  *   default_idp:  default-vlan-id of the node(if node is vsw device)
1752  */
1753 static void
1754 vsw_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node,
1755 	uint16_t *pvidp, uint16_t **vidspp, uint16_t *nvidsp,
1756 	uint16_t *default_idp)
1757 {
1758 	vsw_t		*vswp;
1759 	vsw_port_t	*portp;
1760 	char		*pvid_propname;
1761 	char		*vid_propname;
1762 	uint_t		nvids = 0;
1763 	uint32_t	vids_size;
1764 	int		rv;
1765 	int		i;
1766 	uint64_t	*data;
1767 	uint64_t	val;
1768 	int		size;
1769 	int		inst;
1770 
1771 	if (type == VSW_LOCALDEV) {
1772 
1773 		vswp = (vsw_t *)arg;
1774 		pvid_propname = vsw_pvid_propname;
1775 		vid_propname = vsw_vid_propname;
1776 		inst = vswp->instance;
1777 
1778 	} else if (type == VSW_VNETPORT) {
1779 
1780 		portp = (vsw_port_t *)arg;
1781 		vswp = portp->p_vswp;
1782 		pvid_propname = port_pvid_propname;
1783 		vid_propname = port_vid_propname;
1784 		inst = portp->p_instance;
1785 
1786 	} else {
1787 		return;
1788 	}
1789 
1790 	if (type == VSW_LOCALDEV && default_idp != NULL) {
1791 		rv = md_get_prop_val(mdp, node, vsw_dvid_propname, &val);
1792 		if (rv != 0) {
1793 			DWARN(vswp, "%s: prop(%s) not found", __func__,
1794 			    vsw_dvid_propname);
1795 
1796 			*default_idp = vsw_default_vlan_id;
1797 		} else {
1798 			*default_idp = val & 0xFFF;
1799 			D2(vswp, "%s: %s(%d): (%d)\n", __func__,
1800 			    vsw_dvid_propname, inst, *default_idp);
1801 		}
1802 	}
1803 
1804 	rv = md_get_prop_val(mdp, node, pvid_propname, &val);
1805 	if (rv != 0) {
1806 		DWARN(vswp, "%s: prop(%s) not found", __func__, pvid_propname);
1807 		*pvidp = vsw_default_vlan_id;
1808 	} else {
1809 
1810 		*pvidp = val & 0xFFF;
1811 		D2(vswp, "%s: %s(%d): (%d)\n", __func__,
1812 		    pvid_propname, inst, *pvidp);
1813 	}
1814 
1815 	rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data,
1816 	    &size);
1817 	if (rv != 0) {
1818 		D2(vswp, "%s: prop(%s) not found", __func__, vid_propname);
1819 		size = 0;
1820 	} else {
1821 		size /= sizeof (uint64_t);
1822 	}
1823 	nvids = size;
1824 
1825 	if (nvids != 0) {
1826 		D2(vswp, "%s: %s(%d): ", __func__, vid_propname, inst);
1827 		vids_size = sizeof (uint16_t) * nvids;
1828 		*vidspp = kmem_zalloc(vids_size, KM_SLEEP);
1829 		for (i = 0; i < nvids; i++) {
1830 			(*vidspp)[i] = data[i] & 0xFFFF;
1831 			D2(vswp, " %d ", (*vidspp)[i]);
1832 		}
1833 		D2(vswp, "\n");
1834 	}
1835 
1836 	*nvidsp = nvids;
1837 }
1838 
1839 /*
1840  * This function reads "priority-ether-types" property from md. This property
1841  * is used to enable support for priority frames. Applications which need
1842  * guaranteed and timely delivery of certain high priority frames to/from
1843  * a vnet or vsw within ldoms, should configure this property by providing
1844  * the ether type(s) for which the priority facility is needed.
1845  * Normal data frames are delivered over a ldc channel using the descriptor
1846  * ring mechanism which is constrained by factors such as descriptor ring size,
1847  * the rate at which the ring is processed at the peer ldc end point, etc.
1848  * The priority mechanism provides an Out-Of-Band path to send/receive frames
1849  * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the
1850  * descriptor ring path and enables a more reliable and timely delivery of
1851  * frames to the peer.
1852  */
1853 static void
1854 vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
1855 {
1856 	int		rv;
1857 	uint16_t	*types;
1858 	uint64_t	*data;
1859 	int		size;
1860 	int		i;
1861 	size_t		mblk_sz;
1862 
1863 	rv = md_get_prop_data(mdp, node, pri_types_propname,
1864 	    (uint8_t **)&data, &size);
1865 	if (rv != 0) {
1866 		/*
1867 		 * Property may not exist if we are running pre-ldoms1.1 f/w.
1868 		 * Check if 'vsw_pri_eth_type' has been set in that case.
1869 		 */
1870 		if (vsw_pri_eth_type != 0) {
1871 			size = sizeof (vsw_pri_eth_type);
1872 			data = &vsw_pri_eth_type;
1873 		} else {
1874 			D3(vswp, "%s: prop(%s) not found", __func__,
1875 			    pri_types_propname);
1876 			size = 0;
1877 		}
1878 	}
1879 
1880 	if (size == 0) {
1881 		vswp->pri_num_types = 0;
1882 		return;
1883 	}
1884 
1885 	/*
1886 	 * we have some priority-ether-types defined;
1887 	 * allocate a table of these types and also
1888 	 * allocate a pool of mblks to transmit these
1889 	 * priority packets.
1890 	 */
1891 	size /= sizeof (uint64_t);
1892 	vswp->pri_num_types = size;
1893 	vswp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP);
1894 	for (i = 0, types = vswp->pri_types; i < size; i++) {
1895 		types[i] = data[i] & 0xFFFF;
1896 	}
1897 	mblk_sz = (VIO_PKT_DATA_HDRSIZE + ETHERMAX + 7) & ~7;
1898 	(void) vio_create_mblks(vsw_pri_tx_nmblks, mblk_sz, &vswp->pri_tx_vmp);
1899 }
1900 
1901 static void
1902 vsw_mtu_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node, uint32_t *mtu)
1903 {
1904 	int		rv;
1905 	int		inst;
1906 	uint64_t	val;
1907 	char		*mtu_propname;
1908 
1909 	mtu_propname = vsw_mtu_propname;
1910 	inst = vswp->instance;
1911 
1912 	rv = md_get_prop_val(mdp, node, mtu_propname, &val);
1913 	if (rv != 0) {
1914 		D3(vswp, "%s: prop(%s) not found", __func__, mtu_propname);
1915 		*mtu = vsw_ethermtu;
1916 	} else {
1917 
1918 		*mtu = val & 0xFFFF;
1919 		D2(vswp, "%s: %s(%d): (%d)\n", __func__,
1920 		    mtu_propname, inst, *mtu);
1921 	}
1922 }
1923 
1924 /*
1925  * Update the mtu of the vsw device. We first check if the device has been
1926  * plumbed and if so fail the mtu update. Otherwise, we continue to update the
1927  * new mtu and reset all ports to initiate handshake re-negotiation with peers
1928  * using the new mtu.
1929  */
1930 static int
1931 vsw_mtu_update(vsw_t *vswp, uint32_t mtu)
1932 {
1933 	int	rv;
1934 
1935 	WRITE_ENTER(&vswp->if_lockrw);
1936 
1937 	if (vswp->if_state & VSW_IF_UP) {
1938 
1939 		RW_EXIT(&vswp->if_lockrw);
1940 
1941 		cmn_err(CE_NOTE, "!vsw%d: Unable to process mtu update"
1942 		    " as the device is plumbed\n", vswp->instance);
1943 		return (EBUSY);
1944 
1945 	} else {
1946 
1947 		D2(vswp, "%s: curr_mtu(%d) new_mtu(%d)\n",
1948 		    __func__, vswp->mtu, mtu);
1949 
1950 		vswp->mtu = mtu;
1951 		vswp->max_frame_size = vswp->mtu +
1952 		    sizeof (struct ether_header) + VLAN_TAGSZ;
1953 
1954 		rv = mac_maxsdu_update(vswp->if_mh, mtu);
1955 		if (rv != 0) {
1956 			cmn_err(CE_NOTE,
1957 			    "!vsw%d: Unable to update mtu with mac"
1958 			    " layer\n", vswp->instance);
1959 		}
1960 
1961 		RW_EXIT(&vswp->if_lockrw);
1962 
1963 		WRITE_ENTER(&vswp->mac_rwlock);
1964 
1965 		if (vswp->mh == 0) {
1966 			/*
1967 			 * Physical device is not available yet; mtu will be
1968 			 * updated after we open it successfully, as we have
1969 			 * saved the new mtu.
1970 			 */
1971 			D2(vswp, "%s: Physical device:%s is not "
1972 			    "available yet; can't update its mtu\n",
1973 			    __func__, vswp->physname);
1974 
1975 		} else {
1976 
1977 			/*
1978 			 * Stop and restart to enable the
1979 			 * new mtu in the physical device.
1980 			 */
1981 			vsw_mac_detach(vswp);
1982 			rv = vsw_mac_attach(vswp);
1983 			if (rv != 0) {
1984 				RW_EXIT(&vswp->mac_rwlock);
1985 				return (EIO);
1986 			}
1987 
1988 		}
1989 
1990 		RW_EXIT(&vswp->mac_rwlock);
1991 
1992 		/* Reset ports to renegotiate with the new mtu */
1993 		vsw_reset_ports(vswp);
1994 
1995 	}
1996 
1997 	return (0);
1998 }
1999 
2000 /*
2001  * Check to see if the relevant properties in the specified node have
2002  * changed, and if so take the appropriate action.
2003  *
2004  * If any of the properties are missing or invalid we don't take
2005  * any action, as this function should only be invoked when modifications
2006  * have been made to what we assume is a working configuration, which
2007  * we leave active.
2008  *
2009  * Note it is legal for this routine to be invoked even if none of the
2010  * properties in the port node within the MD have actually changed.
2011  */
2012 static void
2013 vsw_update_md_prop(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
2014 {
2015 	char		physname[LIFNAMSIZ];
2016 	char		drv[LIFNAMSIZ];
2017 	uint_t		ddi_instance;
2018 	uint8_t		new_smode[NUM_SMODES];
2019 	int		i, smode_num = 0;
2020 	uint64_t 	macaddr = 0;
2021 	enum		{MD_init = 0x1,
2022 				MD_physname = 0x2,
2023 				MD_macaddr = 0x4,
2024 				MD_smode = 0x8,
2025 				MD_vlans = 0x10,
2026 				MD_mtu = 0x20} updated;
2027 	int		rv;
2028 	uint16_t	pvid;
2029 	uint16_t	*vids;
2030 	uint16_t	nvids;
2031 	uint32_t	mtu;
2032 
2033 	updated = MD_init;
2034 
2035 	D1(vswp, "%s: enter", __func__);
2036 
2037 	/*
2038 	 * Check if name of physical device in MD has changed.
2039 	 */
2040 	if (vsw_get_md_physname(vswp, mdp, node, (char *)&physname) == 0) {
2041 		/*
2042 		 * Do basic sanity check on new device name/instance,
2043 		 * if its non NULL. It is valid for the device name to
2044 		 * have changed from a non NULL to a NULL value, i.e.
2045 		 * the vsw is being changed to 'routed' mode.
2046 		 */
2047 		if ((strlen(physname) != 0) &&
2048 		    (ddi_parse(physname, drv,
2049 		    &ddi_instance) != DDI_SUCCESS)) {
2050 			cmn_err(CE_WARN, "!vsw%d: physical device %s is not"
2051 			    " a valid device name/instance",
2052 			    vswp->instance, physname);
2053 			goto fail_reconf;
2054 		}
2055 
2056 		if (strcmp(physname, vswp->physname)) {
2057 			D2(vswp, "%s: device name changed from %s to %s",
2058 			    __func__, vswp->physname, physname);
2059 
2060 			updated |= MD_physname;
2061 		} else {
2062 			D2(vswp, "%s: device name unchanged at %s",
2063 			    __func__, vswp->physname);
2064 		}
2065 	} else {
2066 		cmn_err(CE_WARN, "!vsw%d: Unable to read name of physical "
2067 		    "device from updated MD.", vswp->instance);
2068 		goto fail_reconf;
2069 	}
2070 
2071 	/*
2072 	 * Check if MAC address has changed.
2073 	 */
2074 	if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) {
2075 		cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD",
2076 		    vswp->instance);
2077 		goto fail_reconf;
2078 	} else {
2079 		uint64_t maddr = macaddr;
2080 		READ_ENTER(&vswp->if_lockrw);
2081 		for (i = ETHERADDRL - 1; i >= 0; i--) {
2082 			if (vswp->if_addr.ether_addr_octet[i]
2083 			    != (macaddr & 0xFF)) {
2084 				D2(vswp, "%s: octet[%d] 0x%x != 0x%x",
2085 				    __func__, i,
2086 				    vswp->if_addr.ether_addr_octet[i],
2087 				    (macaddr & 0xFF));
2088 				updated |= MD_macaddr;
2089 				macaddr = maddr;
2090 				break;
2091 			}
2092 			macaddr >>= 8;
2093 		}
2094 		RW_EXIT(&vswp->if_lockrw);
2095 		if (updated & MD_macaddr) {
2096 			vsw_save_lmacaddr(vswp, macaddr);
2097 		}
2098 	}
2099 
2100 	/*
2101 	 * Check if switching modes have changed.
2102 	 */
2103 	if (vsw_get_md_smodes(vswp, mdp, node,
2104 	    new_smode, &smode_num)) {
2105 		cmn_err(CE_WARN, "!vsw%d: Unable to read %s property from MD",
2106 		    vswp->instance, smode_propname);
2107 		goto fail_reconf;
2108 	} else {
2109 		ASSERT(smode_num != 0);
2110 		if (smode_num != vswp->smode_num) {
2111 			D2(vswp, "%s: number of modes changed from %d to %d",
2112 			    __func__, vswp->smode_num, smode_num);
2113 		}
2114 
2115 		for (i = 0; i < smode_num; i++) {
2116 			if (new_smode[i] != vswp->smode[i]) {
2117 				D2(vswp, "%s: mode changed from %d to %d",
2118 				    __func__, vswp->smode[i], new_smode[i]);
2119 				updated |= MD_smode;
2120 				break;
2121 			}
2122 		}
2123 	}
2124 
2125 	/* Read the vlan ids */
2126 	vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &pvid, &vids,
2127 	    &nvids, NULL);
2128 
2129 	/* Determine if there are any vlan id updates */
2130 	if ((pvid != vswp->pvid) ||		/* pvid changed? */
2131 	    (nvids != vswp->nvids) ||		/* # of vids changed? */
2132 	    ((nvids != 0) && (vswp->nvids != 0) &&	/* vids changed? */
2133 	    bcmp(vids, vswp->vids, sizeof (uint16_t) * nvids))) {
2134 		updated |= MD_vlans;
2135 	}
2136 
2137 	/* Read mtu */
2138 	vsw_mtu_read(vswp, mdp, node, &mtu);
2139 	if (mtu != vswp->mtu) {
2140 		if (mtu >= ETHERMTU && mtu <= VNET_MAX_MTU) {
2141 			updated |= MD_mtu;
2142 		} else {
2143 			cmn_err(CE_NOTE, "!vsw%d: Unable to process mtu update"
2144 			    " as the specified value:%d is invalid\n",
2145 			    vswp->instance, mtu);
2146 		}
2147 	}
2148 
2149 	/*
2150 	 * Now make any changes which are needed...
2151 	 */
2152 
2153 	if (updated & (MD_physname | MD_smode)) {
2154 
2155 		/*
2156 		 * Stop any pending timeout to setup switching mode.
2157 		 */
2158 		vsw_stop_switching_timeout(vswp);
2159 
2160 		/* Cleanup HybridIO */
2161 		vsw_hio_cleanup(vswp);
2162 
2163 		/*
2164 		 * Remove unicst, mcst addrs of vsw interface
2165 		 * and ports from the physdev.
2166 		 */
2167 		vsw_unset_addrs(vswp);
2168 
2169 		/*
2170 		 * Stop, detach and close the old device..
2171 		 */
2172 		WRITE_ENTER(&vswp->mac_rwlock);
2173 
2174 		vsw_mac_detach(vswp);
2175 		vsw_mac_close(vswp);
2176 
2177 		RW_EXIT(&vswp->mac_rwlock);
2178 
2179 		/*
2180 		 * Update phys name.
2181 		 */
2182 		if (updated & MD_physname) {
2183 			cmn_err(CE_NOTE, "!vsw%d: changing from %s to %s",
2184 			    vswp->instance, vswp->physname, physname);
2185 			(void) strncpy(vswp->physname,
2186 			    physname, strlen(physname) + 1);
2187 		}
2188 
2189 		/*
2190 		 * Update array with the new switch mode values.
2191 		 */
2192 		if (updated & MD_smode) {
2193 			for (i = 0; i < smode_num; i++)
2194 				vswp->smode[i] = new_smode[i];
2195 
2196 			vswp->smode_num = smode_num;
2197 			vswp->smode_idx = 0;
2198 		}
2199 
2200 		/*
2201 		 * ..and attach, start the new device.
2202 		 */
2203 		rv = vsw_setup_switching(vswp);
2204 		if (rv == EAGAIN) {
2205 			/*
2206 			 * Unable to setup switching mode.
2207 			 * As the error is EAGAIN, schedule a timeout to retry
2208 			 * and return. Programming addresses of ports and
2209 			 * vsw interface will be done when the timeout handler
2210 			 * completes successfully.
2211 			 */
2212 			mutex_enter(&vswp->swtmout_lock);
2213 
2214 			vswp->swtmout_enabled = B_TRUE;
2215 			vswp->swtmout_id =
2216 			    timeout(vsw_setup_switching_timeout, vswp,
2217 			    (vsw_setup_switching_delay *
2218 			    drv_usectohz(MICROSEC)));
2219 
2220 			mutex_exit(&vswp->swtmout_lock);
2221 
2222 			return;
2223 
2224 		} else if (rv) {
2225 			goto fail_update;
2226 		}
2227 
2228 		/*
2229 		 * program unicst, mcst addrs of vsw interface
2230 		 * and ports in the physdev.
2231 		 */
2232 		vsw_set_addrs(vswp);
2233 
2234 		/* Start HIO for ports that have already connected */
2235 		vsw_hio_start_ports(vswp);
2236 
2237 	} else if (updated & MD_macaddr) {
2238 		/*
2239 		 * We enter here if only MD_macaddr is exclusively updated.
2240 		 * If MD_physname and/or MD_smode are also updated, then
2241 		 * as part of that, we would have implicitly processed
2242 		 * MD_macaddr update (above).
2243 		 */
2244 		cmn_err(CE_NOTE, "!vsw%d: changing mac address to 0x%lx",
2245 		    vswp->instance, macaddr);
2246 
2247 		READ_ENTER(&vswp->if_lockrw);
2248 		if (vswp->if_state & VSW_IF_UP) {
2249 
2250 			mutex_enter(&vswp->hw_lock);
2251 			/*
2252 			 * Remove old mac address of vsw interface
2253 			 * from the physdev
2254 			 */
2255 			(void) vsw_unset_hw(vswp, NULL, VSW_LOCALDEV);
2256 			/*
2257 			 * Program new mac address of vsw interface
2258 			 * in the physdev
2259 			 */
2260 			rv = vsw_set_hw(vswp, NULL, VSW_LOCALDEV);
2261 			mutex_exit(&vswp->hw_lock);
2262 			if (rv != 0) {
2263 				cmn_err(CE_NOTE,
2264 				    "!vsw%d: failed to program interface "
2265 				    "unicast address\n", vswp->instance);
2266 			}
2267 			/*
2268 			 * Notify the MAC layer of the changed address.
2269 			 */
2270 			mac_unicst_update(vswp->if_mh,
2271 			    (uint8_t *)&vswp->if_addr);
2272 
2273 		}
2274 		RW_EXIT(&vswp->if_lockrw);
2275 
2276 	}
2277 
2278 	if (updated & MD_vlans) {
2279 		/* Remove existing vlan ids from the hash table. */
2280 		vsw_vlan_remove_ids(vswp, VSW_LOCALDEV);
2281 
2282 		/* save the new vlan ids */
2283 		vswp->pvid = pvid;
2284 		if (vswp->nvids != 0) {
2285 			kmem_free(vswp->vids, sizeof (uint16_t) * vswp->nvids);
2286 			vswp->nvids = 0;
2287 		}
2288 		if (nvids != 0) {
2289 			vswp->nvids = nvids;
2290 			vswp->vids = vids;
2291 		}
2292 
2293 		/* add these new vlan ids into hash table */
2294 		vsw_vlan_add_ids(vswp, VSW_LOCALDEV);
2295 	} else {
2296 		if (nvids != 0) {
2297 			kmem_free(vids, sizeof (uint16_t) * nvids);
2298 		}
2299 	}
2300 
2301 	if (updated & MD_mtu) {
2302 
2303 		rv = vsw_mtu_update(vswp, mtu);
2304 		if (rv != 0) {
2305 			goto fail_update;
2306 		}
2307 
2308 	}
2309 
2310 	return;
2311 
2312 fail_reconf:
2313 	cmn_err(CE_WARN, "!vsw%d: configuration unchanged", vswp->instance);
2314 	return;
2315 
2316 fail_update:
2317 	cmn_err(CE_WARN, "!vsw%d: re-configuration failed",
2318 	    vswp->instance);
2319 }
2320 
2321 /*
2322  * Read the port's md properties.
2323  */
2324 static int
2325 vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp,
2326 	md_t *mdp, mde_cookie_t *node)
2327 {
2328 	uint64_t		ldc_id;
2329 	uint8_t			*addrp;
2330 	int			i, addrsz;
2331 	int			num_nodes = 0, nchan = 0;
2332 	int			listsz = 0;
2333 	mde_cookie_t		*listp = NULL;
2334 	struct ether_addr	ea;
2335 	uint64_t		macaddr;
2336 	uint64_t		inst = 0;
2337 	uint64_t		val;
2338 
2339 	if (md_get_prop_val(mdp, *node, id_propname, &inst)) {
2340 		DWARN(vswp, "%s: prop(%s) not found", __func__,
2341 		    id_propname);
2342 		return (1);
2343 	}
2344 
2345 	/*
2346 	 * Find the channel endpoint node(s) (which should be under this
2347 	 * port node) which contain the channel id(s).
2348 	 */
2349 	if ((num_nodes = md_node_count(mdp)) <= 0) {
2350 		DERR(vswp, "%s: invalid number of nodes found (%d)",
2351 		    __func__, num_nodes);
2352 		return (1);
2353 	}
2354 
2355 	D2(vswp, "%s: %d nodes found", __func__, num_nodes);
2356 
2357 	/* allocate enough space for node list */
2358 	listsz = num_nodes * sizeof (mde_cookie_t);
2359 	listp = kmem_zalloc(listsz, KM_SLEEP);
2360 
2361 	nchan = md_scan_dag(mdp, *node, md_find_name(mdp, chan_propname),
2362 	    md_find_name(mdp, "fwd"), listp);
2363 
2364 	if (nchan <= 0) {
2365 		DWARN(vswp, "%s: no %s nodes found", __func__, chan_propname);
2366 		kmem_free(listp, listsz);
2367 		return (1);
2368 	}
2369 
2370 	D2(vswp, "%s: %d %s nodes found", __func__, nchan, chan_propname);
2371 
2372 	/* use property from first node found */
2373 	if (md_get_prop_val(mdp, listp[0], id_propname, &ldc_id)) {
2374 		DWARN(vswp, "%s: prop(%s) not found\n", __func__,
2375 		    id_propname);
2376 		kmem_free(listp, listsz);
2377 		return (1);
2378 	}
2379 
2380 	/* don't need list any more */
2381 	kmem_free(listp, listsz);
2382 
2383 	D2(vswp, "%s: ldc_id 0x%llx", __func__, ldc_id);
2384 
2385 	/* read mac-address property */
2386 	if (md_get_prop_data(mdp, *node, remaddr_propname,
2387 	    &addrp, &addrsz)) {
2388 		DWARN(vswp, "%s: prop(%s) not found",
2389 		    __func__, remaddr_propname);
2390 		return (1);
2391 	}
2392 
2393 	if (addrsz < ETHERADDRL) {
2394 		DWARN(vswp, "%s: invalid address size", __func__);
2395 		return (1);
2396 	}
2397 
2398 	macaddr = *((uint64_t *)addrp);
2399 	D2(vswp, "%s: remote mac address 0x%llx", __func__, macaddr);
2400 
2401 	for (i = ETHERADDRL - 1; i >= 0; i--) {
2402 		ea.ether_addr_octet[i] = macaddr & 0xFF;
2403 		macaddr >>= 8;
2404 	}
2405 
2406 	/* now update all properties into the port */
2407 	portp->p_vswp = vswp;
2408 	portp->p_instance = inst;
2409 	portp->addr_set = VSW_ADDR_UNSET;
2410 	ether_copy(&ea, &portp->p_macaddr);
2411 	if (nchan > VSW_PORT_MAX_LDCS) {
2412 		D2(vswp, "%s: using first of %d ldc ids",
2413 		    __func__, nchan);
2414 		nchan = VSW_PORT_MAX_LDCS;
2415 	}
2416 	portp->num_ldcs = nchan;
2417 	portp->ldc_ids =
2418 	    kmem_zalloc(sizeof (uint64_t) * nchan, KM_SLEEP);
2419 	bcopy(&ldc_id, (portp->ldc_ids), sizeof (uint64_t) * nchan);
2420 
2421 	/* read vlan id properties of this port node */
2422 	vsw_vlan_read_ids(portp, VSW_VNETPORT, mdp, *node, &portp->pvid,
2423 	    &portp->vids, &portp->nvids, NULL);
2424 
2425 	/* Check if hybrid property is present */
2426 	if (md_get_prop_val(mdp, *node, hybrid_propname, &val) == 0) {
2427 		D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname);
2428 		portp->p_hio_enabled = B_TRUE;
2429 	} else {
2430 		portp->p_hio_enabled = B_FALSE;
2431 	}
2432 	/*
2433 	 * Port hio capability determined after version
2434 	 * negotiation, i.e., when we know the peer is HybridIO capable.
2435 	 */
2436 	portp->p_hio_capable = B_FALSE;
2437 	return (0);
2438 }
2439 
2440 /*
2441  * Add a new port to the system.
2442  *
2443  * Returns 0 on success, 1 on failure.
2444  */
2445 int
2446 vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node)
2447 {
2448 	vsw_port_t	*portp;
2449 	int		rv;
2450 
2451 	portp = kmem_zalloc(sizeof (vsw_port_t), KM_SLEEP);
2452 
2453 	rv = vsw_port_read_props(portp, vswp, mdp, node);
2454 	if (rv != 0) {
2455 		kmem_free(portp, sizeof (*portp));
2456 		return (1);
2457 	}
2458 
2459 	rv = vsw_port_attach(portp);
2460 	if (rv != 0) {
2461 		DERR(vswp, "%s: failed to attach port", __func__);
2462 		return (1);
2463 	}
2464 
2465 	return (0);
2466 }
2467 
2468 static int
2469 vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex,
2470 	md_t *prev_mdp, mde_cookie_t prev_mdex)
2471 {
2472 	uint64_t	cport_num;
2473 	uint64_t	pport_num;
2474 	vsw_port_list_t	*plistp;
2475 	vsw_port_t	*portp;
2476 	boolean_t	updated_vlans = B_FALSE;
2477 	uint16_t	pvid;
2478 	uint16_t	*vids;
2479 	uint16_t	nvids;
2480 	uint64_t	val;
2481 	boolean_t	hio_enabled = B_FALSE;
2482 
2483 	/*
2484 	 * For now, we get port updates only if vlan ids changed.
2485 	 * We read the port num and do some sanity check.
2486 	 */
2487 	if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) {
2488 		return (1);
2489 	}
2490 
2491 	if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) {
2492 		return (1);
2493 	}
2494 	if (cport_num != pport_num)
2495 		return (1);
2496 
2497 	plistp = &(vswp->plist);
2498 
2499 	READ_ENTER(&plistp->lockrw);
2500 
2501 	portp = vsw_lookup_port(vswp, cport_num);
2502 	if (portp == NULL) {
2503 		RW_EXIT(&plistp->lockrw);
2504 		return (1);
2505 	}
2506 
2507 	/* Read the vlan ids */
2508 	vsw_vlan_read_ids(portp, VSW_VNETPORT, curr_mdp, curr_mdex, &pvid,
2509 	    &vids, &nvids, NULL);
2510 
2511 	/* Determine if there are any vlan id updates */
2512 	if ((pvid != portp->pvid) ||		/* pvid changed? */
2513 	    (nvids != portp->nvids) ||		/* # of vids changed? */
2514 	    ((nvids != 0) && (portp->nvids != 0) &&	/* vids changed? */
2515 	    bcmp(vids, portp->vids, sizeof (uint16_t) * nvids))) {
2516 		updated_vlans = B_TRUE;
2517 	}
2518 
2519 	if (updated_vlans == B_TRUE) {
2520 
2521 		/* Remove existing vlan ids from the hash table. */
2522 		vsw_vlan_remove_ids(portp, VSW_VNETPORT);
2523 
2524 		/* save the new vlan ids */
2525 		portp->pvid = pvid;
2526 		if (portp->nvids != 0) {
2527 			kmem_free(portp->vids,
2528 			    sizeof (uint16_t) * portp->nvids);
2529 			portp->nvids = 0;
2530 		}
2531 		if (nvids != 0) {
2532 			portp->vids = kmem_zalloc(sizeof (uint16_t) *
2533 			    nvids, KM_SLEEP);
2534 			bcopy(vids, portp->vids, sizeof (uint16_t) * nvids);
2535 			portp->nvids = nvids;
2536 			kmem_free(vids, sizeof (uint16_t) * nvids);
2537 		}
2538 
2539 		/* add these new vlan ids into hash table */
2540 		vsw_vlan_add_ids(portp, VSW_VNETPORT);
2541 
2542 		/* reset the port if it is vlan unaware (ver < 1.3) */
2543 		vsw_vlan_unaware_port_reset(portp);
2544 	}
2545 
2546 	/* Check if hybrid property is present */
2547 	if (md_get_prop_val(curr_mdp, curr_mdex, hybrid_propname, &val) == 0) {
2548 		D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname);
2549 		hio_enabled = B_TRUE;
2550 	}
2551 
2552 	if (portp->p_hio_enabled != hio_enabled) {
2553 		vsw_hio_port_update(portp, hio_enabled);
2554 	}
2555 
2556 	RW_EXIT(&plistp->lockrw);
2557 
2558 	return (0);
2559 }
2560 
2561 /*
2562  * vsw_mac_rx -- A common function to send packets to the interface.
2563  * By default this function check if the interface is UP or not, the
2564  * rest of the behaviour depends on the flags as below:
2565  *
2566  *	VSW_MACRX_PROMISC -- Check if the promisc mode set or not.
2567  *	VSW_MACRX_COPYMSG -- Make a copy of the message(s).
2568  *	VSW_MACRX_FREEMSG -- Free if the messages cannot be sent up the stack.
2569  */
2570 void
2571 vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh,
2572     mblk_t *mp, vsw_macrx_flags_t flags)
2573 {
2574 	mblk_t		*mpt;
2575 
2576 	D1(vswp, "%s:enter\n", __func__);
2577 	READ_ENTER(&vswp->if_lockrw);
2578 	/* Check if the interface is up */
2579 	if (!(vswp->if_state & VSW_IF_UP)) {
2580 		RW_EXIT(&vswp->if_lockrw);
2581 		/* Free messages only if FREEMSG flag specified */
2582 		if (flags & VSW_MACRX_FREEMSG) {
2583 			freemsgchain(mp);
2584 		}
2585 		D1(vswp, "%s:exit\n", __func__);
2586 		return;
2587 	}
2588 	/*
2589 	 * If PROMISC flag is passed, then check if
2590 	 * the interface is in the PROMISC mode.
2591 	 * If not, drop the messages.
2592 	 */
2593 	if (flags & VSW_MACRX_PROMISC) {
2594 		if (!(vswp->if_state & VSW_IF_PROMISC)) {
2595 			RW_EXIT(&vswp->if_lockrw);
2596 			/* Free messages only if FREEMSG flag specified */
2597 			if (flags & VSW_MACRX_FREEMSG) {
2598 				freemsgchain(mp);
2599 			}
2600 			D1(vswp, "%s:exit\n", __func__);
2601 			return;
2602 		}
2603 	}
2604 	RW_EXIT(&vswp->if_lockrw);
2605 	/*
2606 	 * If COPYMSG flag is passed, then make a copy
2607 	 * of the message chain and send up the copy.
2608 	 */
2609 	if (flags & VSW_MACRX_COPYMSG) {
2610 		mp = copymsgchain(mp);
2611 		if (mp == NULL) {
2612 			D1(vswp, "%s:exit\n", __func__);
2613 			return;
2614 		}
2615 	}
2616 
2617 	D2(vswp, "%s: sending up stack", __func__);
2618 
2619 	mpt = NULL;
2620 	(void) vsw_vlan_frame_untag(vswp, VSW_LOCALDEV, &mp, &mpt);
2621 	if (mp != NULL) {
2622 		mac_rx(vswp->if_mh, mrh, mp);
2623 	}
2624 	D1(vswp, "%s:exit\n", __func__);
2625 }
2626 
2627 /* copy mac address of vsw into soft state structure */
2628 static void
2629 vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr)
2630 {
2631 	int	i;
2632 
2633 	WRITE_ENTER(&vswp->if_lockrw);
2634 	for (i = ETHERADDRL - 1; i >= 0; i--) {
2635 		vswp->if_addr.ether_addr_octet[i] = macaddr & 0xFF;
2636 		macaddr >>= 8;
2637 	}
2638 	RW_EXIT(&vswp->if_lockrw);
2639 }
2640