xref: /titanic_51/usr/src/uts/sun4v/io/vsw.c (revision 342440ec94087b8c751c580ab9ed6c693d31d418)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/errno.h>
29 #include <sys/debug.h>
30 #include <sys/time.h>
31 #include <sys/sysmacros.h>
32 #include <sys/systm.h>
33 #include <sys/user.h>
34 #include <sys/stropts.h>
35 #include <sys/stream.h>
36 #include <sys/strlog.h>
37 #include <sys/strsubr.h>
38 #include <sys/cmn_err.h>
39 #include <sys/cpu.h>
40 #include <sys/kmem.h>
41 #include <sys/conf.h>
42 #include <sys/ddi.h>
43 #include <sys/sunddi.h>
44 #include <sys/ksynch.h>
45 #include <sys/stat.h>
46 #include <sys/kstat.h>
47 #include <sys/vtrace.h>
48 #include <sys/strsun.h>
49 #include <sys/dlpi.h>
50 #include <sys/ethernet.h>
51 #include <net/if.h>
52 #include <sys/varargs.h>
53 #include <sys/machsystm.h>
54 #include <sys/modctl.h>
55 #include <sys/modhash.h>
56 #include <sys/mac.h>
57 #include <sys/mac_ether.h>
58 #include <sys/taskq.h>
59 #include <sys/note.h>
60 #include <sys/mach_descrip.h>
61 #include <sys/mac.h>
62 #include <sys/mdeg.h>
63 #include <sys/ldc.h>
64 #include <sys/vsw_fdb.h>
65 #include <sys/vsw.h>
66 #include <sys/vio_mailbox.h>
67 #include <sys/vnet_mailbox.h>
68 #include <sys/vnet_common.h>
69 #include <sys/vio_util.h>
70 #include <sys/sdt.h>
71 #include <sys/atomic.h>
72 #include <sys/callb.h>
73 #include <sys/vlan.h>
74 
75 /*
76  * Function prototypes.
77  */
78 static	int vsw_attach(dev_info_t *, ddi_attach_cmd_t);
79 static	int vsw_detach(dev_info_t *, ddi_detach_cmd_t);
80 static	int vsw_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
81 static	int vsw_get_md_physname(vsw_t *, md_t *, mde_cookie_t, char *);
82 static	int vsw_get_md_smodes(vsw_t *, md_t *, mde_cookie_t, uint8_t *, int *);
83 
84 /* MDEG routines */
85 static	int vsw_mdeg_register(vsw_t *vswp);
86 static	void vsw_mdeg_unregister(vsw_t *vswp);
87 static	int vsw_mdeg_cb(void *cb_argp, mdeg_result_t *);
88 static	int vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *);
89 static	int vsw_get_initial_md_properties(vsw_t *vswp, md_t *, mde_cookie_t);
90 static	int vsw_read_mdprops(vsw_t *vswp);
91 static	void vsw_vlan_read_ids(void *arg, int type, md_t *mdp,
92 	mde_cookie_t node, uint16_t *pvidp, uint16_t **vidspp,
93 	uint16_t *nvidsp, uint16_t *default_idp);
94 static	int vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp,
95 	md_t *mdp, mde_cookie_t *node);
96 static	void vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp,
97 	mde_cookie_t node);
98 static	void vsw_mtu_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node,
99 	uint32_t *mtu);
100 static	int vsw_mtu_update(vsw_t *vswp, uint32_t mtu);
101 static	void vsw_update_md_prop(vsw_t *, md_t *, mde_cookie_t);
102 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr);
103 
104 /* Mac driver related routines */
105 static int vsw_mac_register(vsw_t *);
106 static int vsw_mac_unregister(vsw_t *);
107 static int vsw_m_stat(void *, uint_t, uint64_t *);
108 static void vsw_m_stop(void *arg);
109 static int vsw_m_start(void *arg);
110 static int vsw_m_unicst(void *arg, const uint8_t *);
111 static int vsw_m_multicst(void *arg, boolean_t, const uint8_t *);
112 static int vsw_m_promisc(void *arg, boolean_t);
113 static mblk_t *vsw_m_tx(void *arg, mblk_t *);
114 void vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh,
115     mblk_t *mp, vsw_macrx_flags_t flags);
116 
117 /*
118  * Functions imported from other files.
119  */
120 extern void vsw_setup_switching_timeout(void *arg);
121 extern void vsw_stop_switching_timeout(vsw_t *vswp);
122 extern int vsw_setup_switching(vsw_t *);
123 extern void vsw_switch_frame_nop(vsw_t *vswp, mblk_t *mp, int caller,
124     vsw_port_t *port, mac_resource_handle_t mrh);
125 extern int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *);
126 extern int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *);
127 extern void vsw_del_mcst_vsw(vsw_t *);
128 extern mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr);
129 extern int vsw_detach_ports(vsw_t *vswp);
130 extern int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node);
131 extern int vsw_port_detach(vsw_t *vswp, int p_instance);
132 static int vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex,
133 	md_t *prev_mdp, mde_cookie_t prev_mdex);
134 extern	int vsw_port_attach(vsw_port_t *port);
135 extern vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance);
136 extern int vsw_mac_attach(vsw_t *vswp);
137 extern void vsw_mac_detach(vsw_t *vswp);
138 extern int vsw_mac_open(vsw_t *vswp);
139 extern void vsw_mac_close(vsw_t *vswp);
140 extern int vsw_set_hw(vsw_t *, vsw_port_t *, int);
141 extern int vsw_unset_hw(vsw_t *, vsw_port_t *, int);
142 extern void vsw_reconfig_hw(vsw_t *);
143 extern void vsw_unset_addrs(vsw_t *vswp);
144 extern void vsw_set_addrs(vsw_t *vswp);
145 extern void vsw_create_vlans(void *arg, int type);
146 extern void vsw_destroy_vlans(void *arg, int type);
147 extern void vsw_vlan_add_ids(void *arg, int type);
148 extern void vsw_vlan_remove_ids(void *arg, int type);
149 extern void vsw_vlan_unaware_port_reset(vsw_port_t *portp);
150 extern uint32_t vsw_vlan_frame_untag(void *arg, int type, mblk_t **np,
151 	mblk_t **npt);
152 extern mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp);
153 extern void vsw_hio_cleanup(vsw_t *vswp);
154 extern void vsw_hio_start_ports(vsw_t *vswp);
155 extern void vsw_reset_ports(vsw_t *vswp);
156 extern void vsw_port_reset(vsw_port_t *portp);
157 void vsw_hio_port_update(vsw_port_t *portp, boolean_t hio_enabled);
158 
159 /*
160  * Internal tunables.
161  */
162 int	vsw_num_handshakes = VNET_NUM_HANDSHAKES; /* # of handshake attempts */
163 int	vsw_wretries = 100;		/* # of write attempts */
164 int	vsw_desc_delay = 0;		/* delay in us */
165 int	vsw_read_attempts = 5;		/* # of reads of descriptor */
166 int	vsw_setup_switching_delay = 3;	/* setup sw timeout interval in sec */
167 int	vsw_mac_open_retries = 300;	/* max # of mac_open() retries */
168 					/* 300*3 = 900sec(15min) of max tmout */
169 int	vsw_ldc_tx_delay = 5;		/* delay(ticks) for tx retries */
170 int	vsw_ldc_tx_retries = 10;	/* # of ldc tx retries */
171 boolean_t vsw_ldc_rxthr_enabled = B_TRUE;	/* LDC Rx thread enabled */
172 boolean_t vsw_ldc_txthr_enabled = B_TRUE;	/* LDC Tx thread enabled */
173 
174 uint32_t	vsw_fdb_nchains = 8;	/* # of chains in fdb hash table */
175 uint32_t	vsw_vlan_nchains = 4;	/* # of chains in vlan id hash table */
176 uint32_t	vsw_ethermtu = 1500;	/* mtu of the device */
177 
178 /* sw timeout for boot delay only, in milliseconds */
179 int vsw_setup_switching_boot_delay = 100 * MILLISEC;
180 
181 /* delay in usec to wait for all references on a fdb entry to be dropped */
182 uint32_t vsw_fdbe_refcnt_delay = 10;
183 
184 /*
185  * Default vlan id. This is only used internally when the "default-vlan-id"
186  * property is not present in the MD device node. Therefore, this should not be
187  * used as a tunable; if this value is changed, the corresponding variable
188  * should be updated to the same value in all vnets connected to this vsw.
189  */
190 uint16_t	vsw_default_vlan_id = 1;
191 
192 /*
193  * Workaround for a version handshake bug in obp's vnet.
194  * If vsw initiates version negotiation starting from the highest version,
195  * obp sends a nack and terminates version handshake. To workaround
196  * this, we do not initiate version handshake when the channel comes up.
197  * Instead, we wait for the peer to send its version info msg and go through
198  * the version protocol exchange. If we successfully negotiate a version,
199  * before sending the ack, we send our version info msg to the peer
200  * using the <major,minor> version that we are about to ack.
201  */
202 boolean_t vsw_obp_ver_proto_workaround = B_TRUE;
203 
204 /*
205  * In the absence of "priority-ether-types" property in MD, the following
206  * internal tunable can be set to specify a single priority ethertype.
207  */
208 uint64_t vsw_pri_eth_type = 0;
209 
210 /*
211  * Number of transmit priority buffers that are preallocated per device.
212  * This number is chosen to be a small value to throttle transmission
213  * of priority packets. Note: Must be a power of 2 for vio_create_mblks().
214  */
215 uint32_t vsw_pri_tx_nmblks = 64;
216 
217 /*
218  * Number of RARP packets sent to announce macaddr to the physical switch,
219  * after vsw's physical device is changed dynamically or after a guest (client
220  * vnet) is live migrated in.
221  */
222 uint32_t vsw_publish_macaddr_count = 3;
223 
224 boolean_t vsw_hio_enabled = B_TRUE;	/* Enable/disable HybridIO */
225 int vsw_hio_max_cleanup_retries = 10;	/* Max retries for HybridIO cleanp */
226 int vsw_hio_cleanup_delay = 10000;	/* 10ms */
227 
228 /*
229  * External tunables.
230  */
231 /*
232  * Enable/disable thread per ring. This is a mode selection
233  * that is done a vsw driver attach time.
234  */
235 boolean_t vsw_multi_ring_enable = B_FALSE;
236 int vsw_mac_rx_rings = VSW_MAC_RX_RINGS;
237 
238 /* Number of transmit descriptors -  must be power of 2 */
239 uint32_t vsw_ntxds = VSW_RING_NUM_EL;
240 
241 /*
242  * Max number of mblks received in one receive operation.
243  */
244 uint32_t vsw_chain_len = (VSW_NUM_MBLKS * 0.6);
245 
246 /*
247  * Internal tunables for receive buffer pools, that is,  the size and number of
248  * mblks for each pool. At least 3 sizes must be specified if these are used.
249  * The sizes must be specified in increasing order. Non-zero value of the first
250  * size will be used as a hint to use these values instead of the algorithm
251  * that determines the sizes based on MTU.
252  */
253 uint32_t vsw_mblk_size1 = 0;
254 uint32_t vsw_mblk_size2 = 0;
255 uint32_t vsw_mblk_size3 = 0;
256 uint32_t vsw_mblk_size4 = 0;
257 uint32_t vsw_num_mblks1 = VSW_NUM_MBLKS;	/* number of mblks for pool1 */
258 uint32_t vsw_num_mblks2 = VSW_NUM_MBLKS;	/* number of mblks for pool2 */
259 uint32_t vsw_num_mblks3 = VSW_NUM_MBLKS;	/* number of mblks for pool3 */
260 uint32_t vsw_num_mblks4 = VSW_NUM_MBLKS;	/* number of mblks for pool4 */
261 
262 /*
263  * Set this to non-zero to enable additional internal receive buffer pools
264  * based on the MTU of the device for better performance at the cost of more
265  * memory consumption. This is turned off by default, to use allocb(9F) for
266  * receive buffer allocations of sizes > 2K.
267  */
268 boolean_t vsw_jumbo_rxpools = B_FALSE;
269 
270 /*
271  * vsw_max_tx_qcount is the maximum # of packets that can be queued
272  * before the tx worker thread begins processing the queue. Its value
273  * is chosen to be 4x the default length of tx descriptor ring.
274  */
275 uint32_t vsw_max_tx_qcount = 4 * VSW_RING_NUM_EL;
276 
277 /*
278  * MAC callbacks
279  */
280 static	mac_callbacks_t	vsw_m_callbacks = {
281 	0,
282 	vsw_m_stat,
283 	vsw_m_start,
284 	vsw_m_stop,
285 	vsw_m_promisc,
286 	vsw_m_multicst,
287 	vsw_m_unicst,
288 	vsw_m_tx,
289 	NULL,
290 	NULL,
291 	NULL
292 };
293 
294 static	struct	cb_ops	vsw_cb_ops = {
295 	nulldev,			/* cb_open */
296 	nulldev,			/* cb_close */
297 	nodev,				/* cb_strategy */
298 	nodev,				/* cb_print */
299 	nodev,				/* cb_dump */
300 	nodev,				/* cb_read */
301 	nodev,				/* cb_write */
302 	nodev,				/* cb_ioctl */
303 	nodev,				/* cb_devmap */
304 	nodev,				/* cb_mmap */
305 	nodev,				/* cb_segmap */
306 	nochpoll,			/* cb_chpoll */
307 	ddi_prop_op,			/* cb_prop_op */
308 	NULL,				/* cb_stream */
309 	D_MP,				/* cb_flag */
310 	CB_REV,				/* rev */
311 	nodev,				/* int (*cb_aread)() */
312 	nodev				/* int (*cb_awrite)() */
313 };
314 
315 static	struct	dev_ops	vsw_ops = {
316 	DEVO_REV,		/* devo_rev */
317 	0,			/* devo_refcnt */
318 	vsw_getinfo,		/* devo_getinfo */
319 	nulldev,		/* devo_identify */
320 	nulldev,		/* devo_probe */
321 	vsw_attach,		/* devo_attach */
322 	vsw_detach,		/* devo_detach */
323 	nodev,			/* devo_reset */
324 	&vsw_cb_ops,		/* devo_cb_ops */
325 	(struct bus_ops *)NULL,	/* devo_bus_ops */
326 	ddi_power		/* devo_power */
327 };
328 
329 extern	struct	mod_ops	mod_driverops;
330 static struct modldrv vswmodldrv = {
331 	&mod_driverops,
332 	"sun4v Virtual Switch",
333 	&vsw_ops,
334 };
335 
336 #define	LDC_ENTER_LOCK(ldcp)	\
337 				mutex_enter(&((ldcp)->ldc_cblock));\
338 				mutex_enter(&((ldcp)->ldc_rxlock));\
339 				mutex_enter(&((ldcp)->ldc_txlock));
340 #define	LDC_EXIT_LOCK(ldcp)	\
341 				mutex_exit(&((ldcp)->ldc_txlock));\
342 				mutex_exit(&((ldcp)->ldc_rxlock));\
343 				mutex_exit(&((ldcp)->ldc_cblock));
344 
345 /* Driver soft state ptr  */
346 static void	*vsw_state;
347 
348 /*
349  * Linked list of "vsw_t" structures - one per instance.
350  */
351 vsw_t		*vsw_head = NULL;
352 krwlock_t	vsw_rw;
353 
354 /*
355  * Property names
356  */
357 static char vdev_propname[] = "virtual-device";
358 static char vsw_propname[] = "virtual-network-switch";
359 static char physdev_propname[] = "vsw-phys-dev";
360 static char smode_propname[] = "vsw-switch-mode";
361 static char macaddr_propname[] = "local-mac-address";
362 static char remaddr_propname[] = "remote-mac-address";
363 static char ldcids_propname[] = "ldc-ids";
364 static char chan_propname[] = "channel-endpoint";
365 static char id_propname[] = "id";
366 static char reg_propname[] = "reg";
367 static char pri_types_propname[] = "priority-ether-types";
368 static char vsw_pvid_propname[] = "port-vlan-id";
369 static char vsw_vid_propname[] = "vlan-id";
370 static char vsw_dvid_propname[] = "default-vlan-id";
371 static char port_pvid_propname[] = "remote-port-vlan-id";
372 static char port_vid_propname[] = "remote-vlan-id";
373 static char hybrid_propname[] = "hybrid";
374 static char vsw_mtu_propname[] = "mtu";
375 
376 /*
377  * Matching criteria passed to the MDEG to register interest
378  * in changes to 'virtual-device-port' nodes identified by their
379  * 'id' property.
380  */
381 static md_prop_match_t vport_prop_match[] = {
382 	{ MDET_PROP_VAL,    "id"   },
383 	{ MDET_LIST_END,    NULL    }
384 };
385 
386 static mdeg_node_match_t vport_match = { "virtual-device-port",
387 						vport_prop_match };
388 
389 /*
390  * Matching criteria passed to the MDEG to register interest
391  * in changes to 'virtual-device' nodes (i.e. vsw nodes) identified
392  * by their 'name' and 'cfg-handle' properties.
393  */
394 static md_prop_match_t vdev_prop_match[] = {
395 	{ MDET_PROP_STR,    "name"   },
396 	{ MDET_PROP_VAL,    "cfg-handle" },
397 	{ MDET_LIST_END,    NULL    }
398 };
399 
400 static mdeg_node_match_t vdev_match = { "virtual-device",
401 						vdev_prop_match };
402 
403 
404 /*
405  * Specification of an MD node passed to the MDEG to filter any
406  * 'vport' nodes that do not belong to the specified node. This
407  * template is copied for each vsw instance and filled in with
408  * the appropriate 'cfg-handle' value before being passed to the MDEG.
409  */
410 static mdeg_prop_spec_t vsw_prop_template[] = {
411 	{ MDET_PROP_STR,    "name",		vsw_propname },
412 	{ MDET_PROP_VAL,    "cfg-handle",	NULL	},
413 	{ MDET_LIST_END,    NULL,		NULL	}
414 };
415 
416 #define	VSW_SET_MDEG_PROP_INST(specp, val)	(specp)[1].ps_val = (val);
417 
418 #ifdef	DEBUG
419 /*
420  * Print debug messages - set to 0x1f to enable all msgs
421  * or 0x0 to turn all off.
422  */
423 int vswdbg = 0x0;
424 
425 /*
426  * debug levels:
427  * 0x01:	Function entry/exit tracing
428  * 0x02:	Internal function messages
429  * 0x04:	Verbose internal messages
430  * 0x08:	Warning messages
431  * 0x10:	Error messages
432  */
433 
434 void
435 vswdebug(vsw_t *vswp, const char *fmt, ...)
436 {
437 	char buf[512];
438 	va_list ap;
439 
440 	va_start(ap, fmt);
441 	(void) vsprintf(buf, fmt, ap);
442 	va_end(ap);
443 
444 	if (vswp == NULL)
445 		cmn_err(CE_CONT, "%s\n", buf);
446 	else
447 		cmn_err(CE_CONT, "vsw%d: %s\n", vswp->instance, buf);
448 }
449 
450 #endif	/* DEBUG */
451 
452 static struct modlinkage modlinkage = {
453 	MODREV_1,
454 	&vswmodldrv,
455 	NULL
456 };
457 
458 int
459 _init(void)
460 {
461 	int status;
462 
463 	rw_init(&vsw_rw, NULL, RW_DRIVER, NULL);
464 
465 	status = ddi_soft_state_init(&vsw_state, sizeof (vsw_t), 1);
466 	if (status != 0) {
467 		return (status);
468 	}
469 
470 	mac_init_ops(&vsw_ops, DRV_NAME);
471 	status = mod_install(&modlinkage);
472 	if (status != 0) {
473 		ddi_soft_state_fini(&vsw_state);
474 	}
475 	return (status);
476 }
477 
478 int
479 _fini(void)
480 {
481 	int status;
482 
483 	status = mod_remove(&modlinkage);
484 	if (status != 0)
485 		return (status);
486 	mac_fini_ops(&vsw_ops);
487 	ddi_soft_state_fini(&vsw_state);
488 
489 	rw_destroy(&vsw_rw);
490 
491 	return (status);
492 }
493 
494 int
495 _info(struct modinfo *modinfop)
496 {
497 	return (mod_info(&modlinkage, modinfop));
498 }
499 
500 static int
501 vsw_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
502 {
503 	vsw_t		*vswp;
504 	int		instance;
505 	char		hashname[MAXNAMELEN];
506 	char		qname[TASKQ_NAMELEN];
507 	enum		{ PROG_init = 0x00,
508 				PROG_locks = 0x01,
509 				PROG_readmd = 0x02,
510 				PROG_fdb = 0x04,
511 				PROG_mfdb = 0x08,
512 				PROG_taskq = 0x10,
513 				PROG_swmode = 0x20,
514 				PROG_macreg = 0x40,
515 				PROG_mdreg = 0x80}
516 			progress;
517 
518 	progress = PROG_init;
519 	int		rv;
520 
521 	switch (cmd) {
522 	case DDI_ATTACH:
523 		break;
524 	case DDI_RESUME:
525 		/* nothing to do for this non-device */
526 		return (DDI_SUCCESS);
527 	case DDI_PM_RESUME:
528 	default:
529 		return (DDI_FAILURE);
530 	}
531 
532 	instance = ddi_get_instance(dip);
533 	if (ddi_soft_state_zalloc(vsw_state, instance) != DDI_SUCCESS) {
534 		DERR(NULL, "vsw%d: ddi_soft_state_zalloc failed", instance);
535 		return (DDI_FAILURE);
536 	}
537 	vswp = ddi_get_soft_state(vsw_state, instance);
538 
539 	if (vswp == NULL) {
540 		DERR(NULL, "vsw%d: ddi_get_soft_state failed", instance);
541 		goto vsw_attach_fail;
542 	}
543 
544 	vswp->dip = dip;
545 	vswp->instance = instance;
546 	ddi_set_driver_private(dip, (caddr_t)vswp);
547 
548 	mutex_init(&vswp->hw_lock, NULL, MUTEX_DRIVER, NULL);
549 	mutex_init(&vswp->mca_lock, NULL, MUTEX_DRIVER, NULL);
550 	mutex_init(&vswp->swtmout_lock, NULL, MUTEX_DRIVER, NULL);
551 	rw_init(&vswp->if_lockrw, NULL, RW_DRIVER, NULL);
552 	rw_init(&vswp->mac_rwlock, NULL, RW_DRIVER, NULL);
553 	rw_init(&vswp->mfdbrw, NULL, RW_DRIVER, NULL);
554 	rw_init(&vswp->plist.lockrw, NULL, RW_DRIVER, NULL);
555 
556 	progress |= PROG_locks;
557 
558 	rv = vsw_read_mdprops(vswp);
559 	if (rv != 0)
560 		goto vsw_attach_fail;
561 
562 	progress |= PROG_readmd;
563 
564 	/* setup the unicast forwarding database  */
565 	(void) snprintf(hashname, MAXNAMELEN, "vsw_unicst_table-%d",
566 	    vswp->instance);
567 	D2(vswp, "creating unicast hash table (%s)...", hashname);
568 	vswp->fdb_nchains = vsw_fdb_nchains;
569 	vswp->fdb_hashp = mod_hash_create_ptrhash(hashname, vswp->fdb_nchains,
570 	    mod_hash_null_valdtor, sizeof (void *));
571 	vsw_create_vlans((void *)vswp, VSW_LOCALDEV);
572 	progress |= PROG_fdb;
573 
574 	/* setup the multicast fowarding database */
575 	(void) snprintf(hashname, MAXNAMELEN, "vsw_mcst_table-%d",
576 	    vswp->instance);
577 	D2(vswp, "creating multicast hash table %s)...", hashname);
578 	vswp->mfdb = mod_hash_create_ptrhash(hashname, vsw_fdb_nchains,
579 	    mod_hash_null_valdtor, sizeof (void *));
580 
581 	progress |= PROG_mfdb;
582 
583 	/*
584 	 * Create the taskq which will process all the VIO
585 	 * control messages.
586 	 */
587 	(void) snprintf(qname, TASKQ_NAMELEN, "vsw_taskq%d", vswp->instance);
588 	if ((vswp->taskq_p = ddi_taskq_create(vswp->dip, qname, 1,
589 	    TASKQ_DEFAULTPRI, 0)) == NULL) {
590 		cmn_err(CE_WARN, "!vsw%d: Unable to create task queue",
591 		    vswp->instance);
592 		goto vsw_attach_fail;
593 	}
594 
595 	progress |= PROG_taskq;
596 
597 	/* prevent auto-detaching */
598 	if (ddi_prop_update_int(DDI_DEV_T_NONE, vswp->dip,
599 	    DDI_NO_AUTODETACH, 1) != DDI_SUCCESS) {
600 		cmn_err(CE_NOTE, "!Unable to set \"%s\" property for "
601 		    "instance %u", DDI_NO_AUTODETACH, instance);
602 	}
603 
604 	/*
605 	 * The null switching function is set to avoid panic until
606 	 * switch mode is setup.
607 	 */
608 	vswp->vsw_switch_frame = vsw_switch_frame_nop;
609 
610 	/*
611 	 * Setup the required switching mode,
612 	 * based on the mdprops that we read earlier.
613 	 * schedule a short timeout (0.1 sec) for the first time
614 	 * setup and avoid calling mac_open() directly here,
615 	 * others are regular timeout 3 secs.
616 	 */
617 	mutex_enter(&vswp->swtmout_lock);
618 
619 	vswp->swtmout_enabled = B_TRUE;
620 	vswp->swtmout_id = timeout(vsw_setup_switching_timeout, vswp,
621 	    drv_usectohz(vsw_setup_switching_boot_delay));
622 
623 	mutex_exit(&vswp->swtmout_lock);
624 
625 	progress |= PROG_swmode;
626 
627 	/* Register with mac layer as a provider */
628 	rv = vsw_mac_register(vswp);
629 	if (rv != 0)
630 		goto vsw_attach_fail;
631 
632 	progress |= PROG_macreg;
633 
634 	/*
635 	 * Now we have everything setup, register an interest in
636 	 * specific MD nodes.
637 	 *
638 	 * The callback is invoked in 2 cases, firstly if upon mdeg
639 	 * registration there are existing nodes which match our specified
640 	 * criteria, and secondly if the MD is changed (and again, there
641 	 * are nodes which we are interested in present within it. Note
642 	 * that our callback will be invoked even if our specified nodes
643 	 * have not actually changed).
644 	 *
645 	 */
646 	rv = vsw_mdeg_register(vswp);
647 	if (rv != 0)
648 		goto vsw_attach_fail;
649 
650 	progress |= PROG_mdreg;
651 
652 	WRITE_ENTER(&vsw_rw);
653 	vswp->next = vsw_head;
654 	vsw_head = vswp;
655 	RW_EXIT(&vsw_rw);
656 
657 	ddi_report_dev(vswp->dip);
658 	return (DDI_SUCCESS);
659 
660 vsw_attach_fail:
661 	DERR(NULL, "vsw_attach: failed");
662 
663 	if (progress & PROG_mdreg) {
664 		vsw_mdeg_unregister(vswp);
665 		(void) vsw_detach_ports(vswp);
666 	}
667 
668 	if (progress & PROG_macreg)
669 		(void) vsw_mac_unregister(vswp);
670 
671 	if (progress & PROG_swmode) {
672 		vsw_stop_switching_timeout(vswp);
673 		vsw_hio_cleanup(vswp);
674 		WRITE_ENTER(&vswp->mac_rwlock);
675 		vsw_mac_detach(vswp);
676 		vsw_mac_close(vswp);
677 		RW_EXIT(&vswp->mac_rwlock);
678 	}
679 
680 	if (progress & PROG_taskq)
681 		ddi_taskq_destroy(vswp->taskq_p);
682 
683 	if (progress & PROG_mfdb)
684 		mod_hash_destroy_hash(vswp->mfdb);
685 
686 	if (progress & PROG_fdb) {
687 		vsw_destroy_vlans(vswp, VSW_LOCALDEV);
688 		mod_hash_destroy_hash(vswp->fdb_hashp);
689 	}
690 
691 	if (progress & PROG_readmd) {
692 		if (VSW_PRI_ETH_DEFINED(vswp)) {
693 			kmem_free(vswp->pri_types,
694 			    sizeof (uint16_t) * vswp->pri_num_types);
695 		}
696 		(void) vio_destroy_mblks(vswp->pri_tx_vmp);
697 	}
698 
699 	if (progress & PROG_locks) {
700 		rw_destroy(&vswp->plist.lockrw);
701 		rw_destroy(&vswp->mfdbrw);
702 		rw_destroy(&vswp->mac_rwlock);
703 		rw_destroy(&vswp->if_lockrw);
704 		mutex_destroy(&vswp->swtmout_lock);
705 		mutex_destroy(&vswp->mca_lock);
706 		mutex_destroy(&vswp->hw_lock);
707 	}
708 
709 	ddi_soft_state_free(vsw_state, instance);
710 	return (DDI_FAILURE);
711 }
712 
713 static int
714 vsw_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
715 {
716 	vio_mblk_pool_t		*poolp, *npoolp;
717 	vsw_t			**vswpp, *vswp;
718 	int 			instance;
719 
720 	instance = ddi_get_instance(dip);
721 	vswp = ddi_get_soft_state(vsw_state, instance);
722 
723 	if (vswp == NULL) {
724 		return (DDI_FAILURE);
725 	}
726 
727 	switch (cmd) {
728 	case DDI_DETACH:
729 		break;
730 	case DDI_SUSPEND:
731 	case DDI_PM_SUSPEND:
732 	default:
733 		return (DDI_FAILURE);
734 	}
735 
736 	D2(vswp, "detaching instance %d", instance);
737 
738 	/* Stop any pending timeout to setup switching mode. */
739 	vsw_stop_switching_timeout(vswp);
740 
741 	if (vswp->if_state & VSW_IF_REG) {
742 		if (vsw_mac_unregister(vswp) != 0) {
743 			cmn_err(CE_WARN, "!vsw%d: Unable to detach from "
744 			    "MAC layer", vswp->instance);
745 			return (DDI_FAILURE);
746 		}
747 	}
748 
749 	vsw_mdeg_unregister(vswp);
750 
751 	/* remove mac layer callback */
752 	WRITE_ENTER(&vswp->mac_rwlock);
753 	if ((vswp->mh != NULL) && (vswp->mrh != NULL)) {
754 		mac_rx_remove(vswp->mh, vswp->mrh, B_TRUE);
755 		vswp->mrh = NULL;
756 	}
757 	RW_EXIT(&vswp->mac_rwlock);
758 
759 	if (vsw_detach_ports(vswp) != 0) {
760 		cmn_err(CE_WARN, "!vsw%d: Unable to unconfigure ports",
761 		    vswp->instance);
762 		return (DDI_FAILURE);
763 	}
764 
765 	rw_destroy(&vswp->if_lockrw);
766 
767 	/* cleanup HybridIO */
768 	vsw_hio_cleanup(vswp);
769 
770 	mutex_destroy(&vswp->hw_lock);
771 
772 	/*
773 	 * Now that the ports have been deleted, stop and close
774 	 * the physical device.
775 	 */
776 	WRITE_ENTER(&vswp->mac_rwlock);
777 
778 	vsw_mac_detach(vswp);
779 	vsw_mac_close(vswp);
780 
781 	RW_EXIT(&vswp->mac_rwlock);
782 
783 	rw_destroy(&vswp->mac_rwlock);
784 	mutex_destroy(&vswp->swtmout_lock);
785 
786 	/*
787 	 * Destroy any free pools that may still exist.
788 	 */
789 	poolp = vswp->rxh;
790 	while (poolp != NULL) {
791 		npoolp = vswp->rxh = poolp->nextp;
792 		if (vio_destroy_mblks(poolp) != 0) {
793 			vswp->rxh = poolp;
794 			return (DDI_FAILURE);
795 		}
796 		poolp = npoolp;
797 	}
798 
799 	/*
800 	 * Remove this instance from any entries it may be on in
801 	 * the hash table by using the list of addresses maintained
802 	 * in the vsw_t structure.
803 	 */
804 	vsw_del_mcst_vsw(vswp);
805 
806 	vswp->mcap = NULL;
807 	mutex_destroy(&vswp->mca_lock);
808 
809 	/*
810 	 * By now any pending tasks have finished and the underlying
811 	 * ldc's have been destroyed, so its safe to delete the control
812 	 * message taskq.
813 	 */
814 	if (vswp->taskq_p != NULL)
815 		ddi_taskq_destroy(vswp->taskq_p);
816 
817 	/*
818 	 * At this stage all the data pointers in the hash table
819 	 * should be NULL, as all the ports have been removed and will
820 	 * have deleted themselves from the port lists which the data
821 	 * pointers point to. Hence we can destroy the table using the
822 	 * default destructors.
823 	 */
824 	D2(vswp, "vsw_detach: destroying hash tables..");
825 	vsw_destroy_vlans(vswp, VSW_LOCALDEV);
826 	mod_hash_destroy_hash(vswp->fdb_hashp);
827 	vswp->fdb_hashp = NULL;
828 
829 	WRITE_ENTER(&vswp->mfdbrw);
830 	mod_hash_destroy_hash(vswp->mfdb);
831 	vswp->mfdb = NULL;
832 	RW_EXIT(&vswp->mfdbrw);
833 	rw_destroy(&vswp->mfdbrw);
834 
835 	/* free pri_types table */
836 	if (VSW_PRI_ETH_DEFINED(vswp)) {
837 		kmem_free(vswp->pri_types,
838 		    sizeof (uint16_t) * vswp->pri_num_types);
839 		(void) vio_destroy_mblks(vswp->pri_tx_vmp);
840 	}
841 
842 	ddi_remove_minor_node(dip, NULL);
843 
844 	rw_destroy(&vswp->plist.lockrw);
845 	WRITE_ENTER(&vsw_rw);
846 	for (vswpp = &vsw_head; *vswpp; vswpp = &(*vswpp)->next) {
847 		if (*vswpp == vswp) {
848 			*vswpp = vswp->next;
849 			break;
850 		}
851 	}
852 	RW_EXIT(&vsw_rw);
853 	ddi_soft_state_free(vsw_state, instance);
854 
855 	return (DDI_SUCCESS);
856 }
857 
858 static int
859 vsw_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
860 {
861 	_NOTE(ARGUNUSED(dip))
862 
863 	vsw_t	*vswp = NULL;
864 	dev_t	dev = (dev_t)arg;
865 	int	instance;
866 
867 	instance = getminor(dev);
868 
869 	switch (infocmd) {
870 	case DDI_INFO_DEVT2DEVINFO:
871 		if ((vswp = ddi_get_soft_state(vsw_state, instance)) == NULL) {
872 			*result = NULL;
873 			return (DDI_FAILURE);
874 		}
875 		*result = vswp->dip;
876 		return (DDI_SUCCESS);
877 
878 	case DDI_INFO_DEVT2INSTANCE:
879 		*result = (void *)(uintptr_t)instance;
880 		return (DDI_SUCCESS);
881 
882 	default:
883 		*result = NULL;
884 		return (DDI_FAILURE);
885 	}
886 }
887 
888 /*
889  * Get the value of the "vsw-phys-dev" property in the specified
890  * node. This property is the name of the physical device that
891  * the virtual switch will use to talk to the outside world.
892  *
893  * Note it is valid for this property to be NULL (but the property
894  * itself must exist). Callers of this routine should verify that
895  * the value returned is what they expected (i.e. either NULL or non NULL).
896  *
897  * On success returns value of the property in region pointed to by
898  * the 'name' argument, and with return value of 0. Otherwise returns 1.
899  */
900 static int
901 vsw_get_md_physname(vsw_t *vswp, md_t *mdp, mde_cookie_t node, char *name)
902 {
903 	int		len = 0;
904 	int		instance;
905 	char		*physname = NULL;
906 	char		*dev;
907 	const char	*dev_name;
908 	char		myname[MAXNAMELEN];
909 
910 	dev_name = ddi_driver_name(vswp->dip);
911 	instance = ddi_get_instance(vswp->dip);
912 	(void) snprintf(myname, MAXNAMELEN, "%s%d", dev_name, instance);
913 
914 	if (md_get_prop_data(mdp, node, physdev_propname,
915 	    (uint8_t **)(&physname), &len) != 0) {
916 		cmn_err(CE_WARN, "!vsw%d: Unable to get name(s) of physical "
917 		    "device(s) from MD", vswp->instance);
918 		return (1);
919 	} else if ((strlen(physname) + 1) > LIFNAMSIZ) {
920 		cmn_err(CE_WARN, "!vsw%d: %s is too long a device name",
921 		    vswp->instance, physname);
922 		return (1);
923 	} else if (strcmp(myname, physname) == 0) {
924 		/*
925 		 * Prevent the vswitch from opening itself as the
926 		 * network device.
927 		 */
928 		cmn_err(CE_WARN, "!vsw%d: %s is an invalid device name",
929 		    vswp->instance, physname);
930 		return (1);
931 	} else {
932 		(void) strncpy(name, physname, strlen(physname) + 1);
933 		D2(vswp, "%s: using first device specified (%s)",
934 		    __func__, physname);
935 	}
936 
937 #ifdef DEBUG
938 	/*
939 	 * As a temporary measure to aid testing we check to see if there
940 	 * is a vsw.conf file present. If there is we use the value of the
941 	 * vsw_physname property in the file as the name of the physical
942 	 * device, overriding the value from the MD.
943 	 *
944 	 * There may be multiple devices listed, but for the moment
945 	 * we just use the first one.
946 	 */
947 	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vswp->dip, 0,
948 	    "vsw_physname", &dev) == DDI_PROP_SUCCESS) {
949 		if ((strlen(dev) + 1) > LIFNAMSIZ) {
950 			cmn_err(CE_WARN, "vsw%d: %s is too long a device name",
951 			    vswp->instance, dev);
952 			ddi_prop_free(dev);
953 			return (1);
954 		} else {
955 			cmn_err(CE_NOTE, "vsw%d: Using device name (%s) from "
956 			    "config file", vswp->instance, dev);
957 
958 			(void) strncpy(name, dev, strlen(dev) + 1);
959 		}
960 
961 		ddi_prop_free(dev);
962 	}
963 #endif
964 
965 	return (0);
966 }
967 
968 /*
969  * Read the 'vsw-switch-mode' property from the specified MD node.
970  *
971  * Returns 0 on success and the number of modes found in 'found',
972  * otherwise returns 1.
973  */
974 static int
975 vsw_get_md_smodes(vsw_t *vswp, md_t *mdp, mde_cookie_t node,
976 						uint8_t *modes, int *found)
977 {
978 	int		len = 0;
979 	int		smode_num = 0;
980 	char		*smode = NULL;
981 	char		*curr_mode = NULL;
982 
983 	D1(vswp, "%s: enter", __func__);
984 
985 	/*
986 	 * Get the switch-mode property. The modes are listed in
987 	 * decreasing order of preference, i.e. prefered mode is
988 	 * first item in list.
989 	 */
990 	len = 0;
991 	smode_num = 0;
992 	if (md_get_prop_data(mdp, node, smode_propname,
993 	    (uint8_t **)(&smode), &len) != 0) {
994 		/*
995 		 * Unable to get switch-mode property from MD, nothing
996 		 * more we can do.
997 		 */
998 		cmn_err(CE_WARN, "!vsw%d: Unable to get switch mode property"
999 		    " from the MD", vswp->instance);
1000 		*found = 0;
1001 		return (1);
1002 	}
1003 
1004 	curr_mode = smode;
1005 	/*
1006 	 * Modes of operation:
1007 	 * 'switched'	 - layer 2 switching, underlying HW in
1008 	 *			programmed mode.
1009 	 * 'promiscuous' - layer 2 switching, underlying HW in
1010 	 *			promiscuous mode.
1011 	 * 'routed'	 - layer 3 (i.e. IP) routing, underlying HW
1012 	 *			in non-promiscuous mode.
1013 	 */
1014 	while ((curr_mode < (smode + len)) && (smode_num < NUM_SMODES)) {
1015 		D2(vswp, "%s: curr_mode = [%s]", __func__, curr_mode);
1016 		if (strcmp(curr_mode, "switched") == 0) {
1017 			modes[smode_num++] = VSW_LAYER2;
1018 		} else if (strcmp(curr_mode, "promiscuous") == 0) {
1019 			modes[smode_num++] = VSW_LAYER2_PROMISC;
1020 		} else if (strcmp(curr_mode, "routed") == 0) {
1021 			modes[smode_num++] = VSW_LAYER3;
1022 		} else {
1023 			DWARN(vswp, "%s: Unknown switch mode %s, "
1024 			    "setting to default 'switched' mode",
1025 			    __func__, curr_mode);
1026 			modes[smode_num++] = VSW_LAYER2;
1027 		}
1028 		curr_mode += strlen(curr_mode) + 1;
1029 	}
1030 	*found = smode_num;
1031 
1032 	D2(vswp, "%s: %d modes found", __func__, smode_num);
1033 
1034 	D1(vswp, "%s: exit", __func__);
1035 
1036 	return (0);
1037 }
1038 
1039 /*
1040  * Register with the MAC layer as a network device, so we
1041  * can be plumbed if necessary.
1042  */
1043 static int
1044 vsw_mac_register(vsw_t *vswp)
1045 {
1046 	mac_register_t	*macp;
1047 	int		rv;
1048 
1049 	D1(vswp, "%s: enter", __func__);
1050 
1051 	if ((macp = mac_alloc(MAC_VERSION)) == NULL)
1052 		return (EINVAL);
1053 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1054 	macp->m_driver = vswp;
1055 	macp->m_dip = vswp->dip;
1056 	macp->m_src_addr = (uint8_t *)&vswp->if_addr;
1057 	macp->m_callbacks = &vsw_m_callbacks;
1058 	macp->m_min_sdu = 0;
1059 	macp->m_max_sdu = vswp->mtu;
1060 	macp->m_margin = VLAN_TAGSZ;
1061 	rv = mac_register(macp, &vswp->if_mh);
1062 	mac_free(macp);
1063 	if (rv != 0) {
1064 		/*
1065 		 * Treat this as a non-fatal error as we may be
1066 		 * able to operate in some other mode.
1067 		 */
1068 		cmn_err(CE_NOTE, "!vsw%d: Unable to register as "
1069 		    "a provider with MAC layer", vswp->instance);
1070 		return (rv);
1071 	}
1072 
1073 	vswp->if_state |= VSW_IF_REG;
1074 
1075 	D1(vswp, "%s: exit", __func__);
1076 
1077 	return (rv);
1078 }
1079 
1080 static int
1081 vsw_mac_unregister(vsw_t *vswp)
1082 {
1083 	int		rv = 0;
1084 
1085 	D1(vswp, "%s: enter", __func__);
1086 
1087 	WRITE_ENTER(&vswp->if_lockrw);
1088 
1089 	if (vswp->if_state & VSW_IF_REG) {
1090 		rv = mac_unregister(vswp->if_mh);
1091 		if (rv != 0) {
1092 			DWARN(vswp, "%s: unable to unregister from MAC "
1093 			    "framework", __func__);
1094 
1095 			RW_EXIT(&vswp->if_lockrw);
1096 			D1(vswp, "%s: fail exit", __func__);
1097 			return (rv);
1098 		}
1099 
1100 		/* mark i/f as down and unregistered */
1101 		vswp->if_state &= ~(VSW_IF_UP | VSW_IF_REG);
1102 	}
1103 	RW_EXIT(&vswp->if_lockrw);
1104 
1105 	D1(vswp, "%s: exit", __func__);
1106 
1107 	return (rv);
1108 }
1109 
1110 static int
1111 vsw_m_stat(void *arg, uint_t stat, uint64_t *val)
1112 {
1113 	vsw_t			*vswp = (vsw_t *)arg;
1114 
1115 	D1(vswp, "%s: enter", __func__);
1116 
1117 	WRITE_ENTER(&vswp->mac_rwlock);
1118 	if (vswp->mh == NULL) {
1119 		RW_EXIT(&vswp->mac_rwlock);
1120 		return (EINVAL);
1121 	}
1122 
1123 	/* return stats from underlying device */
1124 	*val = mac_stat_get(vswp->mh, stat);
1125 
1126 	RW_EXIT(&vswp->mac_rwlock);
1127 
1128 	return (0);
1129 }
1130 
1131 static void
1132 vsw_m_stop(void *arg)
1133 {
1134 	vsw_t	*vswp = (vsw_t *)arg;
1135 
1136 	D1(vswp, "%s: enter", __func__);
1137 
1138 	WRITE_ENTER(&vswp->if_lockrw);
1139 	vswp->if_state &= ~VSW_IF_UP;
1140 	RW_EXIT(&vswp->if_lockrw);
1141 
1142 	mutex_enter(&vswp->hw_lock);
1143 
1144 	(void) vsw_unset_hw(vswp, NULL, VSW_LOCALDEV);
1145 
1146 	if (vswp->recfg_reqd)
1147 		vsw_reconfig_hw(vswp);
1148 
1149 	mutex_exit(&vswp->hw_lock);
1150 
1151 	D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state);
1152 }
1153 
1154 static int
1155 vsw_m_start(void *arg)
1156 {
1157 	vsw_t		*vswp = (vsw_t *)arg;
1158 
1159 	D1(vswp, "%s: enter", __func__);
1160 
1161 	WRITE_ENTER(&vswp->if_lockrw);
1162 
1163 	vswp->if_state |= VSW_IF_UP;
1164 
1165 	if (vswp->switching_setup_done == B_FALSE) {
1166 		/*
1167 		 * If the switching mode has not been setup yet, just
1168 		 * return. The unicast address will be programmed
1169 		 * after the physical device is successfully setup by the
1170 		 * timeout handler.
1171 		 */
1172 		RW_EXIT(&vswp->if_lockrw);
1173 		return (0);
1174 	}
1175 
1176 	/* if in layer2 mode, program unicast address. */
1177 	if (vswp->mh != NULL) {
1178 		mutex_enter(&vswp->hw_lock);
1179 		(void) vsw_set_hw(vswp, NULL, VSW_LOCALDEV);
1180 		mutex_exit(&vswp->hw_lock);
1181 	}
1182 
1183 	RW_EXIT(&vswp->if_lockrw);
1184 
1185 	D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state);
1186 	return (0);
1187 }
1188 
1189 /*
1190  * Change the local interface address.
1191  *
1192  * Note: we don't support this entry point. The local
1193  * mac address of the switch can only be changed via its
1194  * MD node properties.
1195  */
1196 static int
1197 vsw_m_unicst(void *arg, const uint8_t *macaddr)
1198 {
1199 	_NOTE(ARGUNUSED(arg, macaddr))
1200 
1201 	return (DDI_FAILURE);
1202 }
1203 
1204 static int
1205 vsw_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
1206 {
1207 	vsw_t		*vswp = (vsw_t *)arg;
1208 	mcst_addr_t	*mcst_p = NULL;
1209 	uint64_t	addr = 0x0;
1210 	int		i, ret = 0;
1211 
1212 	D1(vswp, "%s: enter", __func__);
1213 
1214 	/*
1215 	 * Convert address into form that can be used
1216 	 * as hash table key.
1217 	 */
1218 	for (i = 0; i < ETHERADDRL; i++) {
1219 		addr = (addr << 8) | mca[i];
1220 	}
1221 
1222 	D2(vswp, "%s: addr = 0x%llx", __func__, addr);
1223 
1224 	if (add) {
1225 		D2(vswp, "%s: adding multicast", __func__);
1226 		if (vsw_add_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) {
1227 			/*
1228 			 * Update the list of multicast addresses
1229 			 * contained within the vsw_t structure to
1230 			 * include this new one.
1231 			 */
1232 			mcst_p = kmem_zalloc(sizeof (mcst_addr_t), KM_NOSLEEP);
1233 			if (mcst_p == NULL) {
1234 				DERR(vswp, "%s unable to alloc mem", __func__);
1235 				(void) vsw_del_mcst(vswp,
1236 				    VSW_LOCALDEV, addr, NULL);
1237 				return (1);
1238 			}
1239 			mcst_p->addr = addr;
1240 			ether_copy(mca, &mcst_p->mca);
1241 
1242 			/*
1243 			 * Call into the underlying driver to program the
1244 			 * address into HW.
1245 			 */
1246 			WRITE_ENTER(&vswp->mac_rwlock);
1247 			if (vswp->mh != NULL) {
1248 				ret = mac_multicst_add(vswp->mh, mca);
1249 				if (ret != 0) {
1250 					cmn_err(CE_NOTE, "!vsw%d: unable to "
1251 					    "add multicast address",
1252 					    vswp->instance);
1253 					RW_EXIT(&vswp->mac_rwlock);
1254 					(void) vsw_del_mcst(vswp,
1255 					    VSW_LOCALDEV, addr, NULL);
1256 					kmem_free(mcst_p, sizeof (*mcst_p));
1257 					return (ret);
1258 				}
1259 				mcst_p->mac_added = B_TRUE;
1260 			}
1261 			RW_EXIT(&vswp->mac_rwlock);
1262 
1263 			mutex_enter(&vswp->mca_lock);
1264 			mcst_p->nextp = vswp->mcap;
1265 			vswp->mcap = mcst_p;
1266 			mutex_exit(&vswp->mca_lock);
1267 		} else {
1268 			cmn_err(CE_NOTE, "!vsw%d: unable to add multicast "
1269 			    "address", vswp->instance);
1270 		}
1271 		return (ret);
1272 	}
1273 
1274 	D2(vswp, "%s: removing multicast", __func__);
1275 	/*
1276 	 * Remove the address from the hash table..
1277 	 */
1278 	if (vsw_del_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) {
1279 
1280 		/*
1281 		 * ..and then from the list maintained in the
1282 		 * vsw_t structure.
1283 		 */
1284 		mcst_p = vsw_del_addr(VSW_LOCALDEV, vswp, addr);
1285 		ASSERT(mcst_p != NULL);
1286 
1287 		WRITE_ENTER(&vswp->mac_rwlock);
1288 		if (vswp->mh != NULL && mcst_p->mac_added) {
1289 			(void) mac_multicst_remove(vswp->mh, mca);
1290 			mcst_p->mac_added = B_FALSE;
1291 		}
1292 		RW_EXIT(&vswp->mac_rwlock);
1293 		kmem_free(mcst_p, sizeof (*mcst_p));
1294 	}
1295 
1296 	D1(vswp, "%s: exit", __func__);
1297 
1298 	return (0);
1299 }
1300 
1301 static int
1302 vsw_m_promisc(void *arg, boolean_t on)
1303 {
1304 	vsw_t		*vswp = (vsw_t *)arg;
1305 
1306 	D1(vswp, "%s: enter", __func__);
1307 
1308 	WRITE_ENTER(&vswp->if_lockrw);
1309 	if (on)
1310 		vswp->if_state |= VSW_IF_PROMISC;
1311 	else
1312 		vswp->if_state &= ~VSW_IF_PROMISC;
1313 	RW_EXIT(&vswp->if_lockrw);
1314 
1315 	D1(vswp, "%s: exit", __func__);
1316 
1317 	return (0);
1318 }
1319 
1320 static mblk_t *
1321 vsw_m_tx(void *arg, mblk_t *mp)
1322 {
1323 	vsw_t		*vswp = (vsw_t *)arg;
1324 
1325 	D1(vswp, "%s: enter", __func__);
1326 
1327 	mp = vsw_vlan_frame_pretag(vswp, VSW_LOCALDEV, mp);
1328 
1329 	if (mp == NULL) {
1330 		return (NULL);
1331 	}
1332 
1333 	vswp->vsw_switch_frame(vswp, mp, VSW_LOCALDEV, NULL, NULL);
1334 
1335 	D1(vswp, "%s: exit", __func__);
1336 
1337 	return (NULL);
1338 }
1339 
1340 /*
1341  * Register for machine description (MD) updates.
1342  *
1343  * Returns 0 on success, 1 on failure.
1344  */
1345 static int
1346 vsw_mdeg_register(vsw_t *vswp)
1347 {
1348 	mdeg_prop_spec_t	*pspecp;
1349 	mdeg_node_spec_t	*inst_specp;
1350 	mdeg_handle_t		mdeg_hdl, mdeg_port_hdl;
1351 	size_t			templatesz;
1352 	int			rv;
1353 
1354 	D1(vswp, "%s: enter", __func__);
1355 
1356 	/*
1357 	 * Allocate and initialize a per-instance copy
1358 	 * of the global property spec array that will
1359 	 * uniquely identify this vsw instance.
1360 	 */
1361 	templatesz = sizeof (vsw_prop_template);
1362 	pspecp = kmem_zalloc(templatesz, KM_SLEEP);
1363 
1364 	bcopy(vsw_prop_template, pspecp, templatesz);
1365 
1366 	VSW_SET_MDEG_PROP_INST(pspecp, vswp->regprop);
1367 
1368 	/* initialize the complete prop spec structure */
1369 	inst_specp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP);
1370 	inst_specp->namep = "virtual-device";
1371 	inst_specp->specp = pspecp;
1372 
1373 	D2(vswp, "%s: instance %d registering with mdeg", __func__,
1374 	    vswp->regprop);
1375 	/*
1376 	 * Register an interest in 'virtual-device' nodes with a
1377 	 * 'name' property of 'virtual-network-switch'
1378 	 */
1379 	rv = mdeg_register(inst_specp, &vdev_match, vsw_mdeg_cb,
1380 	    (void *)vswp, &mdeg_hdl);
1381 	if (rv != MDEG_SUCCESS) {
1382 		DERR(vswp, "%s: mdeg_register failed (%d) for vsw node",
1383 		    __func__, rv);
1384 		goto mdeg_reg_fail;
1385 	}
1386 
1387 	/*
1388 	 * Register an interest in 'vsw-port' nodes.
1389 	 */
1390 	rv = mdeg_register(inst_specp, &vport_match, vsw_port_mdeg_cb,
1391 	    (void *)vswp, &mdeg_port_hdl);
1392 	if (rv != MDEG_SUCCESS) {
1393 		DERR(vswp, "%s: mdeg_register failed (%d)\n", __func__, rv);
1394 		(void) mdeg_unregister(mdeg_hdl);
1395 		goto mdeg_reg_fail;
1396 	}
1397 
1398 	/* save off data that will be needed later */
1399 	vswp->inst_spec = inst_specp;
1400 	vswp->mdeg_hdl = mdeg_hdl;
1401 	vswp->mdeg_port_hdl = mdeg_port_hdl;
1402 
1403 	D1(vswp, "%s: exit", __func__);
1404 	return (0);
1405 
1406 mdeg_reg_fail:
1407 	cmn_err(CE_WARN, "!vsw%d: Unable to register MDEG callbacks",
1408 	    vswp->instance);
1409 	kmem_free(pspecp, templatesz);
1410 	kmem_free(inst_specp, sizeof (mdeg_node_spec_t));
1411 
1412 	vswp->mdeg_hdl = NULL;
1413 	vswp->mdeg_port_hdl = NULL;
1414 
1415 	return (1);
1416 }
1417 
1418 static void
1419 vsw_mdeg_unregister(vsw_t *vswp)
1420 {
1421 	D1(vswp, "vsw_mdeg_unregister: enter");
1422 
1423 	if (vswp->mdeg_hdl != NULL)
1424 		(void) mdeg_unregister(vswp->mdeg_hdl);
1425 
1426 	if (vswp->mdeg_port_hdl != NULL)
1427 		(void) mdeg_unregister(vswp->mdeg_port_hdl);
1428 
1429 	if (vswp->inst_spec != NULL) {
1430 		if (vswp->inst_spec->specp != NULL) {
1431 			(void) kmem_free(vswp->inst_spec->specp,
1432 			    sizeof (vsw_prop_template));
1433 			vswp->inst_spec->specp = NULL;
1434 		}
1435 
1436 		(void) kmem_free(vswp->inst_spec, sizeof (mdeg_node_spec_t));
1437 		vswp->inst_spec = NULL;
1438 	}
1439 
1440 	D1(vswp, "vsw_mdeg_unregister: exit");
1441 }
1442 
1443 /*
1444  * Mdeg callback invoked for the vsw node itself.
1445  */
1446 static int
1447 vsw_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1448 {
1449 	vsw_t		*vswp;
1450 	md_t		*mdp;
1451 	mde_cookie_t	node;
1452 	uint64_t	inst;
1453 	char		*node_name = NULL;
1454 
1455 	if (resp == NULL)
1456 		return (MDEG_FAILURE);
1457 
1458 	vswp = (vsw_t *)cb_argp;
1459 
1460 	D1(vswp, "%s: added %d : removed %d : curr matched %d"
1461 	    " : prev matched %d", __func__, resp->added.nelem,
1462 	    resp->removed.nelem, resp->match_curr.nelem,
1463 	    resp->match_prev.nelem);
1464 
1465 	/*
1466 	 * We get an initial callback for this node as 'added'
1467 	 * after registering with mdeg. Note that we would have
1468 	 * already gathered information about this vsw node by
1469 	 * walking MD earlier during attach (in vsw_read_mdprops()).
1470 	 * So, there is a window where the properties of this
1471 	 * node might have changed when we get this initial 'added'
1472 	 * callback. We handle this as if an update occured
1473 	 * and invoke the same function which handles updates to
1474 	 * the properties of this vsw-node if any.
1475 	 *
1476 	 * A non-zero 'match' value indicates that the MD has been
1477 	 * updated and that a virtual-network-switch node is
1478 	 * present which may or may not have been updated. It is
1479 	 * up to the clients to examine their own nodes and
1480 	 * determine if they have changed.
1481 	 */
1482 	if (resp->added.nelem != 0) {
1483 
1484 		if (resp->added.nelem != 1) {
1485 			cmn_err(CE_NOTE, "!vsw%d: number of nodes added "
1486 			    "invalid: %d\n", vswp->instance, resp->added.nelem);
1487 			return (MDEG_FAILURE);
1488 		}
1489 
1490 		mdp = resp->added.mdp;
1491 		node = resp->added.mdep[0];
1492 
1493 	} else if (resp->match_curr.nelem != 0) {
1494 
1495 		if (resp->match_curr.nelem != 1) {
1496 			cmn_err(CE_NOTE, "!vsw%d: number of nodes updated "
1497 			    "invalid: %d\n", vswp->instance,
1498 			    resp->match_curr.nelem);
1499 			return (MDEG_FAILURE);
1500 		}
1501 
1502 		mdp = resp->match_curr.mdp;
1503 		node = resp->match_curr.mdep[0];
1504 
1505 	} else {
1506 		return (MDEG_FAILURE);
1507 	}
1508 
1509 	/* Validate name and instance */
1510 	if (md_get_prop_str(mdp, node, "name", &node_name) != 0) {
1511 		DERR(vswp, "%s: unable to get node name\n",  __func__);
1512 		return (MDEG_FAILURE);
1513 	}
1514 
1515 	/* is this a virtual-network-switch? */
1516 	if (strcmp(node_name, vsw_propname) != 0) {
1517 		DERR(vswp, "%s: Invalid node name: %s\n",
1518 		    __func__, node_name);
1519 		return (MDEG_FAILURE);
1520 	}
1521 
1522 	if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) {
1523 		DERR(vswp, "%s: prop(cfg-handle) not found\n",
1524 		    __func__);
1525 		return (MDEG_FAILURE);
1526 	}
1527 
1528 	/* is this the right instance of vsw? */
1529 	if (inst != vswp->regprop) {
1530 		DERR(vswp, "%s: Invalid cfg-handle: %lx\n",
1531 		    __func__, inst);
1532 		return (MDEG_FAILURE);
1533 	}
1534 
1535 	vsw_update_md_prop(vswp, mdp, node);
1536 
1537 	return (MDEG_SUCCESS);
1538 }
1539 
1540 /*
1541  * Mdeg callback invoked for changes to the vsw-port nodes
1542  * under the vsw node.
1543  */
1544 static int
1545 vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1546 {
1547 	vsw_t		*vswp;
1548 	int		idx;
1549 	md_t		*mdp;
1550 	mde_cookie_t	node;
1551 	uint64_t	inst;
1552 	int		rv;
1553 
1554 	if ((resp == NULL) || (cb_argp == NULL))
1555 		return (MDEG_FAILURE);
1556 
1557 	vswp = (vsw_t *)cb_argp;
1558 
1559 	D2(vswp, "%s: added %d : removed %d : curr matched %d"
1560 	    " : prev matched %d", __func__, resp->added.nelem,
1561 	    resp->removed.nelem, resp->match_curr.nelem,
1562 	    resp->match_prev.nelem);
1563 
1564 	/* process added ports */
1565 	for (idx = 0; idx < resp->added.nelem; idx++) {
1566 		mdp = resp->added.mdp;
1567 		node = resp->added.mdep[idx];
1568 
1569 		D2(vswp, "%s: adding node(%d) 0x%lx", __func__, idx, node);
1570 
1571 		if ((rv = vsw_port_add(vswp, mdp, &node)) != 0) {
1572 			cmn_err(CE_WARN, "!vsw%d: Unable to add new port "
1573 			    "(0x%lx), err=%d", vswp->instance, node, rv);
1574 		}
1575 	}
1576 
1577 	/* process removed ports */
1578 	for (idx = 0; idx < resp->removed.nelem; idx++) {
1579 		mdp = resp->removed.mdp;
1580 		node = resp->removed.mdep[idx];
1581 
1582 		if (md_get_prop_val(mdp, node, id_propname, &inst)) {
1583 			DERR(vswp, "%s: prop(%s) not found in port(%d)",
1584 			    __func__, id_propname, idx);
1585 			continue;
1586 		}
1587 
1588 		D2(vswp, "%s: removing node(%d) 0x%lx", __func__, idx, node);
1589 
1590 		if (vsw_port_detach(vswp, inst) != 0) {
1591 			cmn_err(CE_WARN, "!vsw%d: Unable to remove port %ld",
1592 			    vswp->instance, inst);
1593 		}
1594 	}
1595 
1596 	for (idx = 0; idx < resp->match_curr.nelem; idx++) {
1597 		(void) vsw_port_update(vswp, resp->match_curr.mdp,
1598 		    resp->match_curr.mdep[idx],
1599 		    resp->match_prev.mdp,
1600 		    resp->match_prev.mdep[idx]);
1601 	}
1602 
1603 	D1(vswp, "%s: exit", __func__);
1604 
1605 	return (MDEG_SUCCESS);
1606 }
1607 
1608 /*
1609  * Scan the machine description for this instance of vsw
1610  * and read its properties. Called only from vsw_attach().
1611  * Returns: 0 on success, 1 on failure.
1612  */
1613 static int
1614 vsw_read_mdprops(vsw_t *vswp)
1615 {
1616 	md_t		*mdp = NULL;
1617 	mde_cookie_t	rootnode;
1618 	mde_cookie_t	*listp = NULL;
1619 	uint64_t	inst;
1620 	uint64_t	cfgh;
1621 	char		*name;
1622 	int		rv = 1;
1623 	int		num_nodes = 0;
1624 	int		num_devs = 0;
1625 	int		listsz = 0;
1626 	int		i;
1627 
1628 	/*
1629 	 * In each 'virtual-device' node in the MD there is a
1630 	 * 'cfg-handle' property which is the MD's concept of
1631 	 * an instance number (this may be completely different from
1632 	 * the device drivers instance #). OBP reads that value and
1633 	 * stores it in the 'reg' property of the appropriate node in
1634 	 * the device tree. We first read this reg property and use this
1635 	 * to compare against the 'cfg-handle' property of vsw nodes
1636 	 * in MD to get to this specific vsw instance and then read
1637 	 * other properties that we are interested in.
1638 	 * We also cache the value of 'reg' property and use it later
1639 	 * to register callbacks with mdeg (see vsw_mdeg_register())
1640 	 */
1641 	inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip,
1642 	    DDI_PROP_DONTPASS, reg_propname, -1);
1643 	if (inst == -1) {
1644 		cmn_err(CE_NOTE, "!vsw%d: Unable to read %s property from "
1645 		    "OBP device tree", vswp->instance, reg_propname);
1646 		return (rv);
1647 	}
1648 
1649 	vswp->regprop = inst;
1650 
1651 	if ((mdp = md_get_handle()) == NULL) {
1652 		DWARN(vswp, "%s: cannot init MD\n", __func__);
1653 		return (rv);
1654 	}
1655 
1656 	num_nodes = md_node_count(mdp);
1657 	ASSERT(num_nodes > 0);
1658 
1659 	listsz = num_nodes * sizeof (mde_cookie_t);
1660 	listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP);
1661 
1662 	rootnode = md_root_node(mdp);
1663 
1664 	/* search for all "virtual_device" nodes */
1665 	num_devs = md_scan_dag(mdp, rootnode,
1666 	    md_find_name(mdp, vdev_propname),
1667 	    md_find_name(mdp, "fwd"), listp);
1668 	if (num_devs <= 0) {
1669 		DWARN(vswp, "%s: invalid num_devs:%d\n", __func__, num_devs);
1670 		goto vsw_readmd_exit;
1671 	}
1672 
1673 	/*
1674 	 * Now loop through the list of virtual-devices looking for
1675 	 * devices with name "virtual-network-switch" and for each
1676 	 * such device compare its instance with what we have from
1677 	 * the 'reg' property to find the right node in MD and then
1678 	 * read all its properties.
1679 	 */
1680 	for (i = 0; i < num_devs; i++) {
1681 
1682 		if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) {
1683 			DWARN(vswp, "%s: name property not found\n",
1684 			    __func__);
1685 			goto vsw_readmd_exit;
1686 		}
1687 
1688 		/* is this a virtual-network-switch? */
1689 		if (strcmp(name, vsw_propname) != 0)
1690 			continue;
1691 
1692 		if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) {
1693 			DWARN(vswp, "%s: cfg-handle property not found\n",
1694 			    __func__);
1695 			goto vsw_readmd_exit;
1696 		}
1697 
1698 		/* is this the required instance of vsw? */
1699 		if (inst != cfgh)
1700 			continue;
1701 
1702 		/* now read all properties of this vsw instance */
1703 		rv = vsw_get_initial_md_properties(vswp, mdp, listp[i]);
1704 		break;
1705 	}
1706 
1707 vsw_readmd_exit:
1708 
1709 	kmem_free(listp, listsz);
1710 	(void) md_fini_handle(mdp);
1711 	return (rv);
1712 }
1713 
1714 /*
1715  * Read the initial start-of-day values from the specified MD node.
1716  */
1717 static int
1718 vsw_get_initial_md_properties(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
1719 {
1720 	int		i;
1721 	uint64_t 	macaddr = 0;
1722 
1723 	D1(vswp, "%s: enter", __func__);
1724 
1725 	if (vsw_get_md_physname(vswp, mdp, node, vswp->physname) != 0) {
1726 		return (1);
1727 	}
1728 
1729 	/* mac address for vswitch device itself */
1730 	if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) {
1731 		cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD",
1732 		    vswp->instance);
1733 		return (1);
1734 	}
1735 
1736 	vsw_save_lmacaddr(vswp, macaddr);
1737 
1738 	if (vsw_get_md_smodes(vswp, mdp, node, vswp->smode, &vswp->smode_num)) {
1739 		DWARN(vswp, "%s: Unable to read %s property from MD, "
1740 		    "defaulting to 'switched' mode",
1741 		    __func__, smode_propname);
1742 
1743 		for (i = 0; i < NUM_SMODES; i++)
1744 			vswp->smode[i] = VSW_LAYER2;
1745 
1746 		vswp->smode_num = NUM_SMODES;
1747 	} else {
1748 		ASSERT(vswp->smode_num != 0);
1749 	}
1750 
1751 	/* read mtu */
1752 	vsw_mtu_read(vswp, mdp, node, &vswp->mtu);
1753 	if (vswp->mtu < ETHERMTU || vswp->mtu > VNET_MAX_MTU) {
1754 		vswp->mtu = ETHERMTU;
1755 	}
1756 	vswp->max_frame_size = vswp->mtu + sizeof (struct ether_header) +
1757 	    VLAN_TAGSZ;
1758 
1759 	/* read vlan id properties of this vsw instance */
1760 	vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &vswp->pvid,
1761 	    &vswp->vids, &vswp->nvids, &vswp->default_vlan_id);
1762 
1763 	/* read priority-ether-types */
1764 	vsw_read_pri_eth_types(vswp, mdp, node);
1765 
1766 	D1(vswp, "%s: exit", __func__);
1767 	return (0);
1768 }
1769 
1770 /*
1771  * Read vlan id properties of the given MD node.
1772  * Arguments:
1773  *   arg:          device argument(vsw device or a port)
1774  *   type:         type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port)
1775  *   mdp:          machine description
1776  *   node:         md node cookie
1777  *
1778  * Returns:
1779  *   pvidp:        port-vlan-id of the node
1780  *   vidspp:       list of vlan-ids of the node
1781  *   nvidsp:       # of vlan-ids in the list
1782  *   default_idp:  default-vlan-id of the node(if node is vsw device)
1783  */
1784 static void
1785 vsw_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node,
1786 	uint16_t *pvidp, uint16_t **vidspp, uint16_t *nvidsp,
1787 	uint16_t *default_idp)
1788 {
1789 	vsw_t		*vswp;
1790 	vsw_port_t	*portp;
1791 	char		*pvid_propname;
1792 	char		*vid_propname;
1793 	uint_t		nvids = 0;
1794 	uint32_t	vids_size;
1795 	int		rv;
1796 	int		i;
1797 	uint64_t	*data;
1798 	uint64_t	val;
1799 	int		size;
1800 	int		inst;
1801 
1802 	if (type == VSW_LOCALDEV) {
1803 
1804 		vswp = (vsw_t *)arg;
1805 		pvid_propname = vsw_pvid_propname;
1806 		vid_propname = vsw_vid_propname;
1807 		inst = vswp->instance;
1808 
1809 	} else if (type == VSW_VNETPORT) {
1810 
1811 		portp = (vsw_port_t *)arg;
1812 		vswp = portp->p_vswp;
1813 		pvid_propname = port_pvid_propname;
1814 		vid_propname = port_vid_propname;
1815 		inst = portp->p_instance;
1816 
1817 	} else {
1818 		return;
1819 	}
1820 
1821 	if (type == VSW_LOCALDEV && default_idp != NULL) {
1822 		rv = md_get_prop_val(mdp, node, vsw_dvid_propname, &val);
1823 		if (rv != 0) {
1824 			DWARN(vswp, "%s: prop(%s) not found", __func__,
1825 			    vsw_dvid_propname);
1826 
1827 			*default_idp = vsw_default_vlan_id;
1828 		} else {
1829 			*default_idp = val & 0xFFF;
1830 			D2(vswp, "%s: %s(%d): (%d)\n", __func__,
1831 			    vsw_dvid_propname, inst, *default_idp);
1832 		}
1833 	}
1834 
1835 	rv = md_get_prop_val(mdp, node, pvid_propname, &val);
1836 	if (rv != 0) {
1837 		DWARN(vswp, "%s: prop(%s) not found", __func__, pvid_propname);
1838 		*pvidp = vsw_default_vlan_id;
1839 	} else {
1840 
1841 		*pvidp = val & 0xFFF;
1842 		D2(vswp, "%s: %s(%d): (%d)\n", __func__,
1843 		    pvid_propname, inst, *pvidp);
1844 	}
1845 
1846 	rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data,
1847 	    &size);
1848 	if (rv != 0) {
1849 		D2(vswp, "%s: prop(%s) not found", __func__, vid_propname);
1850 		size = 0;
1851 	} else {
1852 		size /= sizeof (uint64_t);
1853 	}
1854 	nvids = size;
1855 
1856 	if (nvids != 0) {
1857 		D2(vswp, "%s: %s(%d): ", __func__, vid_propname, inst);
1858 		vids_size = sizeof (uint16_t) * nvids;
1859 		*vidspp = kmem_zalloc(vids_size, KM_SLEEP);
1860 		for (i = 0; i < nvids; i++) {
1861 			(*vidspp)[i] = data[i] & 0xFFFF;
1862 			D2(vswp, " %d ", (*vidspp)[i]);
1863 		}
1864 		D2(vswp, "\n");
1865 	}
1866 
1867 	*nvidsp = nvids;
1868 }
1869 
1870 /*
1871  * This function reads "priority-ether-types" property from md. This property
1872  * is used to enable support for priority frames. Applications which need
1873  * guaranteed and timely delivery of certain high priority frames to/from
1874  * a vnet or vsw within ldoms, should configure this property by providing
1875  * the ether type(s) for which the priority facility is needed.
1876  * Normal data frames are delivered over a ldc channel using the descriptor
1877  * ring mechanism which is constrained by factors such as descriptor ring size,
1878  * the rate at which the ring is processed at the peer ldc end point, etc.
1879  * The priority mechanism provides an Out-Of-Band path to send/receive frames
1880  * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the
1881  * descriptor ring path and enables a more reliable and timely delivery of
1882  * frames to the peer.
1883  */
1884 static void
1885 vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
1886 {
1887 	int		rv;
1888 	uint16_t	*types;
1889 	uint64_t	*data;
1890 	int		size;
1891 	int		i;
1892 	size_t		mblk_sz;
1893 
1894 	rv = md_get_prop_data(mdp, node, pri_types_propname,
1895 	    (uint8_t **)&data, &size);
1896 	if (rv != 0) {
1897 		/*
1898 		 * Property may not exist if we are running pre-ldoms1.1 f/w.
1899 		 * Check if 'vsw_pri_eth_type' has been set in that case.
1900 		 */
1901 		if (vsw_pri_eth_type != 0) {
1902 			size = sizeof (vsw_pri_eth_type);
1903 			data = &vsw_pri_eth_type;
1904 		} else {
1905 			D3(vswp, "%s: prop(%s) not found", __func__,
1906 			    pri_types_propname);
1907 			size = 0;
1908 		}
1909 	}
1910 
1911 	if (size == 0) {
1912 		vswp->pri_num_types = 0;
1913 		return;
1914 	}
1915 
1916 	/*
1917 	 * we have some priority-ether-types defined;
1918 	 * allocate a table of these types and also
1919 	 * allocate a pool of mblks to transmit these
1920 	 * priority packets.
1921 	 */
1922 	size /= sizeof (uint64_t);
1923 	vswp->pri_num_types = size;
1924 	vswp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP);
1925 	for (i = 0, types = vswp->pri_types; i < size; i++) {
1926 		types[i] = data[i] & 0xFFFF;
1927 	}
1928 	mblk_sz = (VIO_PKT_DATA_HDRSIZE + ETHERMAX + 7) & ~7;
1929 	(void) vio_create_mblks(vsw_pri_tx_nmblks, mblk_sz, &vswp->pri_tx_vmp);
1930 }
1931 
1932 static void
1933 vsw_mtu_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node, uint32_t *mtu)
1934 {
1935 	int		rv;
1936 	int		inst;
1937 	uint64_t	val;
1938 	char		*mtu_propname;
1939 
1940 	mtu_propname = vsw_mtu_propname;
1941 	inst = vswp->instance;
1942 
1943 	rv = md_get_prop_val(mdp, node, mtu_propname, &val);
1944 	if (rv != 0) {
1945 		D3(vswp, "%s: prop(%s) not found", __func__, mtu_propname);
1946 		*mtu = vsw_ethermtu;
1947 	} else {
1948 
1949 		*mtu = val & 0xFFFF;
1950 		D2(vswp, "%s: %s(%d): (%d)\n", __func__,
1951 		    mtu_propname, inst, *mtu);
1952 	}
1953 }
1954 
1955 /*
1956  * Update the mtu of the vsw device. We first check if the device has been
1957  * plumbed and if so fail the mtu update. Otherwise, we continue to update the
1958  * new mtu and reset all ports to initiate handshake re-negotiation with peers
1959  * using the new mtu.
1960  */
1961 static int
1962 vsw_mtu_update(vsw_t *vswp, uint32_t mtu)
1963 {
1964 	int	rv;
1965 
1966 	WRITE_ENTER(&vswp->if_lockrw);
1967 
1968 	if (vswp->if_state & VSW_IF_UP) {
1969 
1970 		RW_EXIT(&vswp->if_lockrw);
1971 
1972 		cmn_err(CE_NOTE, "!vsw%d: Unable to process mtu update"
1973 		    " as the device is plumbed\n", vswp->instance);
1974 		return (EBUSY);
1975 
1976 	} else {
1977 
1978 		D2(vswp, "%s: curr_mtu(%d) new_mtu(%d)\n",
1979 		    __func__, vswp->mtu, mtu);
1980 
1981 		vswp->mtu = mtu;
1982 		vswp->max_frame_size = vswp->mtu +
1983 		    sizeof (struct ether_header) + VLAN_TAGSZ;
1984 
1985 		rv = mac_maxsdu_update(vswp->if_mh, mtu);
1986 		if (rv != 0) {
1987 			cmn_err(CE_NOTE,
1988 			    "!vsw%d: Unable to update mtu with mac"
1989 			    " layer\n", vswp->instance);
1990 		}
1991 
1992 		RW_EXIT(&vswp->if_lockrw);
1993 
1994 		WRITE_ENTER(&vswp->mac_rwlock);
1995 
1996 		if (vswp->mh == 0) {
1997 			/*
1998 			 * Physical device is not available yet; mtu will be
1999 			 * updated after we open it successfully, as we have
2000 			 * saved the new mtu.
2001 			 */
2002 			D2(vswp, "%s: Physical device:%s is not "
2003 			    "available yet; can't update its mtu\n",
2004 			    __func__, vswp->physname);
2005 
2006 		} else {
2007 
2008 			/*
2009 			 * Stop and restart to enable the
2010 			 * new mtu in the physical device.
2011 			 */
2012 			vsw_mac_detach(vswp);
2013 			rv = vsw_mac_attach(vswp);
2014 			if (rv != 0) {
2015 				RW_EXIT(&vswp->mac_rwlock);
2016 				return (EIO);
2017 			}
2018 
2019 		}
2020 
2021 		RW_EXIT(&vswp->mac_rwlock);
2022 
2023 		/* Reset ports to renegotiate with the new mtu */
2024 		vsw_reset_ports(vswp);
2025 
2026 	}
2027 
2028 	return (0);
2029 }
2030 
2031 /*
2032  * Check to see if the relevant properties in the specified node have
2033  * changed, and if so take the appropriate action.
2034  *
2035  * If any of the properties are missing or invalid we don't take
2036  * any action, as this function should only be invoked when modifications
2037  * have been made to what we assume is a working configuration, which
2038  * we leave active.
2039  *
2040  * Note it is legal for this routine to be invoked even if none of the
2041  * properties in the port node within the MD have actually changed.
2042  */
2043 static void
2044 vsw_update_md_prop(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
2045 {
2046 	char		physname[LIFNAMSIZ];
2047 	char		drv[LIFNAMSIZ];
2048 	uint_t		ddi_instance;
2049 	uint8_t		new_smode[NUM_SMODES];
2050 	int		i, smode_num = 0;
2051 	uint64_t 	macaddr = 0;
2052 	enum		{MD_init = 0x1,
2053 				MD_physname = 0x2,
2054 				MD_macaddr = 0x4,
2055 				MD_smode = 0x8,
2056 				MD_vlans = 0x10,
2057 				MD_mtu = 0x20} updated;
2058 	int		rv;
2059 	uint16_t	pvid;
2060 	uint16_t	*vids;
2061 	uint16_t	nvids;
2062 	uint32_t	mtu;
2063 
2064 	updated = MD_init;
2065 
2066 	D1(vswp, "%s: enter", __func__);
2067 
2068 	/*
2069 	 * Check if name of physical device in MD has changed.
2070 	 */
2071 	if (vsw_get_md_physname(vswp, mdp, node, (char *)&physname) == 0) {
2072 		/*
2073 		 * Do basic sanity check on new device name/instance,
2074 		 * if its non NULL. It is valid for the device name to
2075 		 * have changed from a non NULL to a NULL value, i.e.
2076 		 * the vsw is being changed to 'routed' mode.
2077 		 */
2078 		if ((strlen(physname) != 0) &&
2079 		    (ddi_parse(physname, drv,
2080 		    &ddi_instance) != DDI_SUCCESS)) {
2081 			cmn_err(CE_WARN, "!vsw%d: physical device %s is not"
2082 			    " a valid device name/instance",
2083 			    vswp->instance, physname);
2084 			goto fail_reconf;
2085 		}
2086 
2087 		if (strcmp(physname, vswp->physname)) {
2088 			D2(vswp, "%s: device name changed from %s to %s",
2089 			    __func__, vswp->physname, physname);
2090 
2091 			updated |= MD_physname;
2092 		} else {
2093 			D2(vswp, "%s: device name unchanged at %s",
2094 			    __func__, vswp->physname);
2095 		}
2096 	} else {
2097 		cmn_err(CE_WARN, "!vsw%d: Unable to read name of physical "
2098 		    "device from updated MD.", vswp->instance);
2099 		goto fail_reconf;
2100 	}
2101 
2102 	/*
2103 	 * Check if MAC address has changed.
2104 	 */
2105 	if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) {
2106 		cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD",
2107 		    vswp->instance);
2108 		goto fail_reconf;
2109 	} else {
2110 		uint64_t maddr = macaddr;
2111 		READ_ENTER(&vswp->if_lockrw);
2112 		for (i = ETHERADDRL - 1; i >= 0; i--) {
2113 			if (vswp->if_addr.ether_addr_octet[i]
2114 			    != (macaddr & 0xFF)) {
2115 				D2(vswp, "%s: octet[%d] 0x%x != 0x%x",
2116 				    __func__, i,
2117 				    vswp->if_addr.ether_addr_octet[i],
2118 				    (macaddr & 0xFF));
2119 				updated |= MD_macaddr;
2120 				macaddr = maddr;
2121 				break;
2122 			}
2123 			macaddr >>= 8;
2124 		}
2125 		RW_EXIT(&vswp->if_lockrw);
2126 		if (updated & MD_macaddr) {
2127 			vsw_save_lmacaddr(vswp, macaddr);
2128 		}
2129 	}
2130 
2131 	/*
2132 	 * Check if switching modes have changed.
2133 	 */
2134 	if (vsw_get_md_smodes(vswp, mdp, node,
2135 	    new_smode, &smode_num)) {
2136 		cmn_err(CE_WARN, "!vsw%d: Unable to read %s property from MD",
2137 		    vswp->instance, smode_propname);
2138 		goto fail_reconf;
2139 	} else {
2140 		ASSERT(smode_num != 0);
2141 		if (smode_num != vswp->smode_num) {
2142 			D2(vswp, "%s: number of modes changed from %d to %d",
2143 			    __func__, vswp->smode_num, smode_num);
2144 		}
2145 
2146 		for (i = 0; i < smode_num; i++) {
2147 			if (new_smode[i] != vswp->smode[i]) {
2148 				D2(vswp, "%s: mode changed from %d to %d",
2149 				    __func__, vswp->smode[i], new_smode[i]);
2150 				updated |= MD_smode;
2151 				break;
2152 			}
2153 		}
2154 	}
2155 
2156 	/* Read the vlan ids */
2157 	vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &pvid, &vids,
2158 	    &nvids, NULL);
2159 
2160 	/* Determine if there are any vlan id updates */
2161 	if ((pvid != vswp->pvid) ||		/* pvid changed? */
2162 	    (nvids != vswp->nvids) ||		/* # of vids changed? */
2163 	    ((nvids != 0) && (vswp->nvids != 0) &&	/* vids changed? */
2164 	    bcmp(vids, vswp->vids, sizeof (uint16_t) * nvids))) {
2165 		updated |= MD_vlans;
2166 	}
2167 
2168 	/* Read mtu */
2169 	vsw_mtu_read(vswp, mdp, node, &mtu);
2170 	if (mtu != vswp->mtu) {
2171 		if (mtu >= ETHERMTU && mtu <= VNET_MAX_MTU) {
2172 			updated |= MD_mtu;
2173 		} else {
2174 			cmn_err(CE_NOTE, "!vsw%d: Unable to process mtu update"
2175 			    " as the specified value:%d is invalid\n",
2176 			    vswp->instance, mtu);
2177 		}
2178 	}
2179 
2180 	/*
2181 	 * Now make any changes which are needed...
2182 	 */
2183 
2184 	if (updated & (MD_physname | MD_smode)) {
2185 
2186 		/*
2187 		 * Stop any pending timeout to setup switching mode.
2188 		 */
2189 		vsw_stop_switching_timeout(vswp);
2190 
2191 		/* Cleanup HybridIO */
2192 		vsw_hio_cleanup(vswp);
2193 
2194 		/*
2195 		 * Remove unicst, mcst addrs of vsw interface
2196 		 * and ports from the physdev.
2197 		 */
2198 		vsw_unset_addrs(vswp);
2199 
2200 		/*
2201 		 * Stop, detach and close the old device..
2202 		 */
2203 		WRITE_ENTER(&vswp->mac_rwlock);
2204 
2205 		vsw_mac_detach(vswp);
2206 		vsw_mac_close(vswp);
2207 
2208 		RW_EXIT(&vswp->mac_rwlock);
2209 
2210 		/*
2211 		 * Update phys name.
2212 		 */
2213 		if (updated & MD_physname) {
2214 			cmn_err(CE_NOTE, "!vsw%d: changing from %s to %s",
2215 			    vswp->instance, vswp->physname, physname);
2216 			(void) strncpy(vswp->physname,
2217 			    physname, strlen(physname) + 1);
2218 		}
2219 
2220 		/*
2221 		 * Update array with the new switch mode values.
2222 		 */
2223 		if (updated & MD_smode) {
2224 			for (i = 0; i < smode_num; i++)
2225 				vswp->smode[i] = new_smode[i];
2226 
2227 			vswp->smode_num = smode_num;
2228 			vswp->smode_idx = 0;
2229 		}
2230 
2231 		/*
2232 		 * ..and attach, start the new device.
2233 		 */
2234 		rv = vsw_setup_switching(vswp);
2235 		if (rv == EAGAIN) {
2236 			/*
2237 			 * Unable to setup switching mode.
2238 			 * As the error is EAGAIN, schedule a timeout to retry
2239 			 * and return. Programming addresses of ports and
2240 			 * vsw interface will be done when the timeout handler
2241 			 * completes successfully.
2242 			 */
2243 			mutex_enter(&vswp->swtmout_lock);
2244 
2245 			vswp->swtmout_enabled = B_TRUE;
2246 			vswp->swtmout_id =
2247 			    timeout(vsw_setup_switching_timeout, vswp,
2248 			    (vsw_setup_switching_delay *
2249 			    drv_usectohz(MICROSEC)));
2250 
2251 			mutex_exit(&vswp->swtmout_lock);
2252 
2253 			return;
2254 
2255 		} else if (rv) {
2256 			goto fail_update;
2257 		}
2258 
2259 		/*
2260 		 * program unicst, mcst addrs of vsw interface
2261 		 * and ports in the physdev.
2262 		 */
2263 		vsw_set_addrs(vswp);
2264 
2265 		/* Start HIO for ports that have already connected */
2266 		vsw_hio_start_ports(vswp);
2267 
2268 	} else if (updated & MD_macaddr) {
2269 		/*
2270 		 * We enter here if only MD_macaddr is exclusively updated.
2271 		 * If MD_physname and/or MD_smode are also updated, then
2272 		 * as part of that, we would have implicitly processed
2273 		 * MD_macaddr update (above).
2274 		 */
2275 		cmn_err(CE_NOTE, "!vsw%d: changing mac address to 0x%lx",
2276 		    vswp->instance, macaddr);
2277 
2278 		READ_ENTER(&vswp->if_lockrw);
2279 		if (vswp->if_state & VSW_IF_UP) {
2280 
2281 			mutex_enter(&vswp->hw_lock);
2282 			/*
2283 			 * Remove old mac address of vsw interface
2284 			 * from the physdev
2285 			 */
2286 			(void) vsw_unset_hw(vswp, NULL, VSW_LOCALDEV);
2287 			/*
2288 			 * Program new mac address of vsw interface
2289 			 * in the physdev
2290 			 */
2291 			rv = vsw_set_hw(vswp, NULL, VSW_LOCALDEV);
2292 			mutex_exit(&vswp->hw_lock);
2293 			if (rv != 0) {
2294 				cmn_err(CE_NOTE,
2295 				    "!vsw%d: failed to program interface "
2296 				    "unicast address\n", vswp->instance);
2297 			}
2298 			/*
2299 			 * Notify the MAC layer of the changed address.
2300 			 */
2301 			mac_unicst_update(vswp->if_mh,
2302 			    (uint8_t *)&vswp->if_addr);
2303 
2304 		}
2305 		RW_EXIT(&vswp->if_lockrw);
2306 
2307 	}
2308 
2309 	if (updated & MD_vlans) {
2310 		/* Remove existing vlan ids from the hash table. */
2311 		vsw_vlan_remove_ids(vswp, VSW_LOCALDEV);
2312 
2313 		/* save the new vlan ids */
2314 		vswp->pvid = pvid;
2315 		if (vswp->nvids != 0) {
2316 			kmem_free(vswp->vids, sizeof (uint16_t) * vswp->nvids);
2317 			vswp->nvids = 0;
2318 		}
2319 		if (nvids != 0) {
2320 			vswp->nvids = nvids;
2321 			vswp->vids = vids;
2322 		}
2323 
2324 		/* add these new vlan ids into hash table */
2325 		vsw_vlan_add_ids(vswp, VSW_LOCALDEV);
2326 	} else {
2327 		if (nvids != 0) {
2328 			kmem_free(vids, sizeof (uint16_t) * nvids);
2329 		}
2330 	}
2331 
2332 	if (updated & MD_mtu) {
2333 
2334 		rv = vsw_mtu_update(vswp, mtu);
2335 		if (rv != 0) {
2336 			goto fail_update;
2337 		}
2338 
2339 	}
2340 
2341 	return;
2342 
2343 fail_reconf:
2344 	cmn_err(CE_WARN, "!vsw%d: configuration unchanged", vswp->instance);
2345 	return;
2346 
2347 fail_update:
2348 	cmn_err(CE_WARN, "!vsw%d: re-configuration failed",
2349 	    vswp->instance);
2350 }
2351 
2352 /*
2353  * Read the port's md properties.
2354  */
2355 static int
2356 vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp,
2357 	md_t *mdp, mde_cookie_t *node)
2358 {
2359 	uint64_t		ldc_id;
2360 	uint8_t			*addrp;
2361 	int			i, addrsz;
2362 	int			num_nodes = 0, nchan = 0;
2363 	int			listsz = 0;
2364 	mde_cookie_t		*listp = NULL;
2365 	struct ether_addr	ea;
2366 	uint64_t		macaddr;
2367 	uint64_t		inst = 0;
2368 	uint64_t		val;
2369 
2370 	if (md_get_prop_val(mdp, *node, id_propname, &inst)) {
2371 		DWARN(vswp, "%s: prop(%s) not found", __func__,
2372 		    id_propname);
2373 		return (1);
2374 	}
2375 
2376 	/*
2377 	 * Find the channel endpoint node(s) (which should be under this
2378 	 * port node) which contain the channel id(s).
2379 	 */
2380 	if ((num_nodes = md_node_count(mdp)) <= 0) {
2381 		DERR(vswp, "%s: invalid number of nodes found (%d)",
2382 		    __func__, num_nodes);
2383 		return (1);
2384 	}
2385 
2386 	D2(vswp, "%s: %d nodes found", __func__, num_nodes);
2387 
2388 	/* allocate enough space for node list */
2389 	listsz = num_nodes * sizeof (mde_cookie_t);
2390 	listp = kmem_zalloc(listsz, KM_SLEEP);
2391 
2392 	nchan = md_scan_dag(mdp, *node, md_find_name(mdp, chan_propname),
2393 	    md_find_name(mdp, "fwd"), listp);
2394 
2395 	if (nchan <= 0) {
2396 		DWARN(vswp, "%s: no %s nodes found", __func__, chan_propname);
2397 		kmem_free(listp, listsz);
2398 		return (1);
2399 	}
2400 
2401 	D2(vswp, "%s: %d %s nodes found", __func__, nchan, chan_propname);
2402 
2403 	/* use property from first node found */
2404 	if (md_get_prop_val(mdp, listp[0], id_propname, &ldc_id)) {
2405 		DWARN(vswp, "%s: prop(%s) not found\n", __func__,
2406 		    id_propname);
2407 		kmem_free(listp, listsz);
2408 		return (1);
2409 	}
2410 
2411 	/* don't need list any more */
2412 	kmem_free(listp, listsz);
2413 
2414 	D2(vswp, "%s: ldc_id 0x%llx", __func__, ldc_id);
2415 
2416 	/* read mac-address property */
2417 	if (md_get_prop_data(mdp, *node, remaddr_propname,
2418 	    &addrp, &addrsz)) {
2419 		DWARN(vswp, "%s: prop(%s) not found",
2420 		    __func__, remaddr_propname);
2421 		return (1);
2422 	}
2423 
2424 	if (addrsz < ETHERADDRL) {
2425 		DWARN(vswp, "%s: invalid address size", __func__);
2426 		return (1);
2427 	}
2428 
2429 	macaddr = *((uint64_t *)addrp);
2430 	D2(vswp, "%s: remote mac address 0x%llx", __func__, macaddr);
2431 
2432 	for (i = ETHERADDRL - 1; i >= 0; i--) {
2433 		ea.ether_addr_octet[i] = macaddr & 0xFF;
2434 		macaddr >>= 8;
2435 	}
2436 
2437 	/* now update all properties into the port */
2438 	portp->p_vswp = vswp;
2439 	portp->p_instance = inst;
2440 	portp->addr_set = VSW_ADDR_UNSET;
2441 	ether_copy(&ea, &portp->p_macaddr);
2442 	if (nchan > VSW_PORT_MAX_LDCS) {
2443 		D2(vswp, "%s: using first of %d ldc ids",
2444 		    __func__, nchan);
2445 		nchan = VSW_PORT_MAX_LDCS;
2446 	}
2447 	portp->num_ldcs = nchan;
2448 	portp->ldc_ids =
2449 	    kmem_zalloc(sizeof (uint64_t) * nchan, KM_SLEEP);
2450 	bcopy(&ldc_id, (portp->ldc_ids), sizeof (uint64_t) * nchan);
2451 
2452 	/* read vlan id properties of this port node */
2453 	vsw_vlan_read_ids(portp, VSW_VNETPORT, mdp, *node, &portp->pvid,
2454 	    &portp->vids, &portp->nvids, NULL);
2455 
2456 	/* Check if hybrid property is present */
2457 	if (md_get_prop_val(mdp, *node, hybrid_propname, &val) == 0) {
2458 		D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname);
2459 		portp->p_hio_enabled = B_TRUE;
2460 	} else {
2461 		portp->p_hio_enabled = B_FALSE;
2462 	}
2463 	/*
2464 	 * Port hio capability determined after version
2465 	 * negotiation, i.e., when we know the peer is HybridIO capable.
2466 	 */
2467 	portp->p_hio_capable = B_FALSE;
2468 	return (0);
2469 }
2470 
2471 /*
2472  * Add a new port to the system.
2473  *
2474  * Returns 0 on success, 1 on failure.
2475  */
2476 int
2477 vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node)
2478 {
2479 	vsw_port_t	*portp;
2480 	int		rv;
2481 
2482 	portp = kmem_zalloc(sizeof (vsw_port_t), KM_SLEEP);
2483 
2484 	rv = vsw_port_read_props(portp, vswp, mdp, node);
2485 	if (rv != 0) {
2486 		kmem_free(portp, sizeof (*portp));
2487 		return (1);
2488 	}
2489 
2490 	rv = vsw_port_attach(portp);
2491 	if (rv != 0) {
2492 		DERR(vswp, "%s: failed to attach port", __func__);
2493 		return (1);
2494 	}
2495 
2496 	return (0);
2497 }
2498 
2499 static int
2500 vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex,
2501 	md_t *prev_mdp, mde_cookie_t prev_mdex)
2502 {
2503 	uint64_t	cport_num;
2504 	uint64_t	pport_num;
2505 	vsw_port_list_t	*plistp;
2506 	vsw_port_t	*portp;
2507 	boolean_t	updated_vlans = B_FALSE;
2508 	uint16_t	pvid;
2509 	uint16_t	*vids;
2510 	uint16_t	nvids;
2511 	uint64_t	val;
2512 	boolean_t	hio_enabled = B_FALSE;
2513 
2514 	/*
2515 	 * For now, we get port updates only if vlan ids changed.
2516 	 * We read the port num and do some sanity check.
2517 	 */
2518 	if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) {
2519 		return (1);
2520 	}
2521 
2522 	if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) {
2523 		return (1);
2524 	}
2525 	if (cport_num != pport_num)
2526 		return (1);
2527 
2528 	plistp = &(vswp->plist);
2529 
2530 	READ_ENTER(&plistp->lockrw);
2531 
2532 	portp = vsw_lookup_port(vswp, cport_num);
2533 	if (portp == NULL) {
2534 		RW_EXIT(&plistp->lockrw);
2535 		return (1);
2536 	}
2537 
2538 	/* Read the vlan ids */
2539 	vsw_vlan_read_ids(portp, VSW_VNETPORT, curr_mdp, curr_mdex, &pvid,
2540 	    &vids, &nvids, NULL);
2541 
2542 	/* Determine if there are any vlan id updates */
2543 	if ((pvid != portp->pvid) ||		/* pvid changed? */
2544 	    (nvids != portp->nvids) ||		/* # of vids changed? */
2545 	    ((nvids != 0) && (portp->nvids != 0) &&	/* vids changed? */
2546 	    bcmp(vids, portp->vids, sizeof (uint16_t) * nvids))) {
2547 		updated_vlans = B_TRUE;
2548 	}
2549 
2550 	if (updated_vlans == B_TRUE) {
2551 
2552 		/* Remove existing vlan ids from the hash table. */
2553 		vsw_vlan_remove_ids(portp, VSW_VNETPORT);
2554 
2555 		/* save the new vlan ids */
2556 		portp->pvid = pvid;
2557 		if (portp->nvids != 0) {
2558 			kmem_free(portp->vids,
2559 			    sizeof (uint16_t) * portp->nvids);
2560 			portp->nvids = 0;
2561 		}
2562 		if (nvids != 0) {
2563 			portp->vids = kmem_zalloc(sizeof (uint16_t) *
2564 			    nvids, KM_SLEEP);
2565 			bcopy(vids, portp->vids, sizeof (uint16_t) * nvids);
2566 			portp->nvids = nvids;
2567 			kmem_free(vids, sizeof (uint16_t) * nvids);
2568 		}
2569 
2570 		/* add these new vlan ids into hash table */
2571 		vsw_vlan_add_ids(portp, VSW_VNETPORT);
2572 
2573 		/* reset the port if it is vlan unaware (ver < 1.3) */
2574 		vsw_vlan_unaware_port_reset(portp);
2575 	}
2576 
2577 	/* Check if hybrid property is present */
2578 	if (md_get_prop_val(curr_mdp, curr_mdex, hybrid_propname, &val) == 0) {
2579 		D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname);
2580 		hio_enabled = B_TRUE;
2581 	}
2582 
2583 	if (portp->p_hio_enabled != hio_enabled) {
2584 		vsw_hio_port_update(portp, hio_enabled);
2585 	}
2586 
2587 	RW_EXIT(&plistp->lockrw);
2588 
2589 	return (0);
2590 }
2591 
2592 /*
2593  * vsw_mac_rx -- A common function to send packets to the interface.
2594  * By default this function check if the interface is UP or not, the
2595  * rest of the behaviour depends on the flags as below:
2596  *
2597  *	VSW_MACRX_PROMISC -- Check if the promisc mode set or not.
2598  *	VSW_MACRX_COPYMSG -- Make a copy of the message(s).
2599  *	VSW_MACRX_FREEMSG -- Free if the messages cannot be sent up the stack.
2600  */
2601 void
2602 vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh,
2603     mblk_t *mp, vsw_macrx_flags_t flags)
2604 {
2605 	mblk_t		*mpt;
2606 
2607 	D1(vswp, "%s:enter\n", __func__);
2608 	READ_ENTER(&vswp->if_lockrw);
2609 	/* Check if the interface is up */
2610 	if (!(vswp->if_state & VSW_IF_UP)) {
2611 		RW_EXIT(&vswp->if_lockrw);
2612 		/* Free messages only if FREEMSG flag specified */
2613 		if (flags & VSW_MACRX_FREEMSG) {
2614 			freemsgchain(mp);
2615 		}
2616 		D1(vswp, "%s:exit\n", __func__);
2617 		return;
2618 	}
2619 	/*
2620 	 * If PROMISC flag is passed, then check if
2621 	 * the interface is in the PROMISC mode.
2622 	 * If not, drop the messages.
2623 	 */
2624 	if (flags & VSW_MACRX_PROMISC) {
2625 		if (!(vswp->if_state & VSW_IF_PROMISC)) {
2626 			RW_EXIT(&vswp->if_lockrw);
2627 			/* Free messages only if FREEMSG flag specified */
2628 			if (flags & VSW_MACRX_FREEMSG) {
2629 				freemsgchain(mp);
2630 			}
2631 			D1(vswp, "%s:exit\n", __func__);
2632 			return;
2633 		}
2634 	}
2635 	RW_EXIT(&vswp->if_lockrw);
2636 	/*
2637 	 * If COPYMSG flag is passed, then make a copy
2638 	 * of the message chain and send up the copy.
2639 	 */
2640 	if (flags & VSW_MACRX_COPYMSG) {
2641 		mp = copymsgchain(mp);
2642 		if (mp == NULL) {
2643 			D1(vswp, "%s:exit\n", __func__);
2644 			return;
2645 		}
2646 	}
2647 
2648 	D2(vswp, "%s: sending up stack", __func__);
2649 
2650 	mpt = NULL;
2651 	(void) vsw_vlan_frame_untag(vswp, VSW_LOCALDEV, &mp, &mpt);
2652 	if (mp != NULL) {
2653 		mac_rx(vswp->if_mh, mrh, mp);
2654 	}
2655 	D1(vswp, "%s:exit\n", __func__);
2656 }
2657 
2658 /* copy mac address of vsw into soft state structure */
2659 static void
2660 vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr)
2661 {
2662 	int	i;
2663 
2664 	WRITE_ENTER(&vswp->if_lockrw);
2665 	for (i = ETHERADDRL - 1; i >= 0; i--) {
2666 		vswp->if_addr.ether_addr_octet[i] = macaddr & 0xFF;
2667 		macaddr >>= 8;
2668 	}
2669 	RW_EXIT(&vswp->if_lockrw);
2670 }
2671