xref: /titanic_51/usr/src/uts/sun4v/io/vsw.c (revision 60c45ed01d4f99571d468c42f609d11a099fab1e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/errno.h>
31 #include <sys/debug.h>
32 #include <sys/time.h>
33 #include <sys/sysmacros.h>
34 #include <sys/systm.h>
35 #include <sys/user.h>
36 #include <sys/stropts.h>
37 #include <sys/stream.h>
38 #include <sys/strlog.h>
39 #include <sys/strsubr.h>
40 #include <sys/cmn_err.h>
41 #include <sys/cpu.h>
42 #include <sys/kmem.h>
43 #include <sys/conf.h>
44 #include <sys/ddi.h>
45 #include <sys/sunddi.h>
46 #include <sys/ksynch.h>
47 #include <sys/stat.h>
48 #include <sys/kstat.h>
49 #include <sys/vtrace.h>
50 #include <sys/strsun.h>
51 #include <sys/dlpi.h>
52 #include <sys/ethernet.h>
53 #include <net/if.h>
54 #include <sys/varargs.h>
55 #include <sys/machsystm.h>
56 #include <sys/modctl.h>
57 #include <sys/modhash.h>
58 #include <sys/mac.h>
59 #include <sys/mac_ether.h>
60 #include <sys/taskq.h>
61 #include <sys/note.h>
62 #include <sys/mach_descrip.h>
63 #include <sys/mac.h>
64 #include <sys/mdeg.h>
65 #include <sys/ldc.h>
66 #include <sys/vsw_fdb.h>
67 #include <sys/vsw.h>
68 #include <sys/vio_mailbox.h>
69 #include <sys/vnet_mailbox.h>
70 #include <sys/vnet_common.h>
71 #include <sys/vio_util.h>
72 #include <sys/sdt.h>
73 #include <sys/atomic.h>
74 #include <sys/callb.h>
75 #include <sys/vlan.h>
76 
77 /*
78  * Function prototypes.
79  */
80 static	int vsw_attach(dev_info_t *, ddi_attach_cmd_t);
81 static	int vsw_detach(dev_info_t *, ddi_detach_cmd_t);
82 static	int vsw_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
83 static	int vsw_get_md_physname(vsw_t *, md_t *, mde_cookie_t, char *);
84 static	int vsw_get_md_smodes(vsw_t *, md_t *, mde_cookie_t, uint8_t *, int *);
85 
86 /* MDEG routines */
87 static	int vsw_mdeg_register(vsw_t *vswp);
88 static	void vsw_mdeg_unregister(vsw_t *vswp);
89 static	int vsw_mdeg_cb(void *cb_argp, mdeg_result_t *);
90 static	int vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *);
91 static	int vsw_get_initial_md_properties(vsw_t *vswp, md_t *, mde_cookie_t);
92 static	int vsw_read_mdprops(vsw_t *vswp);
93 static	void vsw_vlan_read_ids(void *arg, int type, md_t *mdp,
94 	mde_cookie_t node, uint16_t *pvidp, uint16_t **vidspp,
95 	uint16_t *nvidsp, uint16_t *default_idp);
96 static	int vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp,
97 	md_t *mdp, mde_cookie_t *node);
98 static	void vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp,
99 	mde_cookie_t node);
100 static	void vsw_update_md_prop(vsw_t *, md_t *, mde_cookie_t);
101 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr);
102 
103 /* Mac driver related routines */
104 static int vsw_mac_register(vsw_t *);
105 static int vsw_mac_unregister(vsw_t *);
106 static int vsw_m_stat(void *, uint_t, uint64_t *);
107 static void vsw_m_stop(void *arg);
108 static int vsw_m_start(void *arg);
109 static int vsw_m_unicst(void *arg, const uint8_t *);
110 static int vsw_m_multicst(void *arg, boolean_t, const uint8_t *);
111 static int vsw_m_promisc(void *arg, boolean_t);
112 static mblk_t *vsw_m_tx(void *arg, mblk_t *);
113 void vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh,
114     mblk_t *mp, vsw_macrx_flags_t flags);
115 
116 /*
117  * Functions imported from other files.
118  */
119 extern void vsw_setup_switching_timeout(void *arg);
120 extern void vsw_stop_switching_timeout(vsw_t *vswp);
121 extern int vsw_setup_switching(vsw_t *);
122 extern int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *);
123 extern int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *);
124 extern void vsw_del_mcst_vsw(vsw_t *);
125 extern mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr);
126 extern int vsw_detach_ports(vsw_t *vswp);
127 extern int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node);
128 extern int vsw_port_detach(vsw_t *vswp, int p_instance);
129 static int vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex,
130 	md_t *prev_mdp, mde_cookie_t prev_mdex);
131 extern	int vsw_port_attach(vsw_port_t *port);
132 extern vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance);
133 extern int vsw_mac_attach(vsw_t *vswp);
134 extern void vsw_mac_detach(vsw_t *vswp);
135 extern int vsw_mac_open(vsw_t *vswp);
136 extern void vsw_mac_close(vsw_t *vswp);
137 extern int vsw_set_hw(vsw_t *, vsw_port_t *, int);
138 extern int vsw_unset_hw(vsw_t *, vsw_port_t *, int);
139 extern void vsw_reconfig_hw(vsw_t *);
140 extern void vsw_unset_addrs(vsw_t *vswp);
141 extern void vsw_set_addrs(vsw_t *vswp);
142 extern void vsw_create_vlans(void *arg, int type);
143 extern void vsw_destroy_vlans(void *arg, int type);
144 extern void vsw_vlan_add_ids(void *arg, int type);
145 extern void vsw_vlan_remove_ids(void *arg, int type);
146 extern void vsw_vlan_unaware_port_reset(vsw_port_t *portp);
147 extern uint32_t vsw_vlan_frame_untag(void *arg, int type, mblk_t **np,
148 	mblk_t **npt);
149 extern mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp);
150 
151 /*
152  * Internal tunables.
153  */
154 int	vsw_num_handshakes = VNET_NUM_HANDSHAKES; /* # of handshake attempts */
155 int	vsw_wretries = 100;		/* # of write attempts */
156 int	vsw_desc_delay = 0;		/* delay in us */
157 int	vsw_read_attempts = 5;		/* # of reads of descriptor */
158 int	vsw_mac_open_retries = 20;	/* max # of mac_open() retries */
159 int	vsw_setup_switching_delay = 3;	/* setup sw timeout interval in sec */
160 int	vsw_ldc_tx_delay = 5;		/* delay(ticks) for tx retries */
161 int	vsw_ldc_tx_retries = 10;	/* # of ldc tx retries */
162 boolean_t vsw_ldc_rxthr_enabled = B_TRUE;	/* LDC Rx thread enabled */
163 boolean_t vsw_ldc_txthr_enabled = B_TRUE;	/* LDC Tx thread enabled */
164 
165 uint32_t	vsw_fdb_nchains = 8;	/* # of chains in fdb hash table */
166 uint32_t	vsw_vlan_nchains = 4;	/* # of chains in vlan id hash table */
167 uint32_t	vsw_ethermtu = 1500;	/* mtu of the device */
168 
169 /* delay in usec to wait for all references on a fdb entry to be dropped */
170 uint32_t vsw_fdbe_refcnt_delay = 10;
171 
172 /*
173  * Default vlan id. This is only used internally when the "default-vlan-id"
174  * property is not present in the MD device node. Therefore, this should not be
175  * used as a tunable; if this value is changed, the corresponding variable
176  * should be updated to the same value in all vnets connected to this vsw.
177  */
178 uint16_t	vsw_default_vlan_id = 1;
179 
180 /*
181  * Workaround for a version handshake bug in obp's vnet.
182  * If vsw initiates version negotiation starting from the highest version,
183  * obp sends a nack and terminates version handshake. To workaround
184  * this, we do not initiate version handshake when the channel comes up.
185  * Instead, we wait for the peer to send its version info msg and go through
186  * the version protocol exchange. If we successfully negotiate a version,
187  * before sending the ack, we send our version info msg to the peer
188  * using the <major,minor> version that we are about to ack.
189  */
190 boolean_t vsw_obp_ver_proto_workaround = B_TRUE;
191 
192 /*
193  * In the absence of "priority-ether-types" property in MD, the following
194  * internal tunable can be set to specify a single priority ethertype.
195  */
196 uint64_t vsw_pri_eth_type = 0;
197 
198 /*
199  * Number of transmit priority buffers that are preallocated per device.
200  * This number is chosen to be a small value to throttle transmission
201  * of priority packets. Note: Must be a power of 2 for vio_create_mblks().
202  */
203 uint32_t vsw_pri_tx_nmblks = 64;
204 
205 /*
206  * External tunables.
207  */
208 /*
209  * Enable/disable thread per ring. This is a mode selection
210  * that is done a vsw driver attach time.
211  */
212 boolean_t vsw_multi_ring_enable = B_FALSE;
213 int vsw_mac_rx_rings = VSW_MAC_RX_RINGS;
214 
215 /* Number of transmit descriptors -  must be power of 2 */
216 uint32_t vsw_ntxds = VSW_RING_NUM_EL;
217 
218 /*
219  * Max number of mblks received in one receive operation.
220  */
221 uint32_t vsw_chain_len = (VSW_NUM_MBLKS * 0.6);
222 
223 /*
224  * Tunables for three different pools, that is, the size and
225  * number of mblks for each pool.
226  */
227 uint32_t vsw_mblk_size1 = VSW_MBLK_SZ_128;	/* size=128 for pool1 */
228 uint32_t vsw_mblk_size2 = VSW_MBLK_SZ_256;	/* size=256 for pool2 */
229 uint32_t vsw_mblk_size3 = VSW_MBLK_SZ_2048;	/* size=2048 for pool3 */
230 uint32_t vsw_num_mblks1 = VSW_NUM_MBLKS;	/* number of mblks for pool1 */
231 uint32_t vsw_num_mblks2 = VSW_NUM_MBLKS;	/* number of mblks for pool2 */
232 uint32_t vsw_num_mblks3 = VSW_NUM_MBLKS;	/* number of mblks for pool3 */
233 
234 /*
235  * vsw_max_tx_qcount is the maximum # of packets that can be queued
236  * before the tx worker thread begins processing the queue. Its value
237  * is chosen to be 4x the default length of tx descriptor ring.
238  */
239 uint32_t vsw_max_tx_qcount = 4 * VSW_RING_NUM_EL;
240 
241 /*
242  * MAC callbacks
243  */
244 static	mac_callbacks_t	vsw_m_callbacks = {
245 	0,
246 	vsw_m_stat,
247 	vsw_m_start,
248 	vsw_m_stop,
249 	vsw_m_promisc,
250 	vsw_m_multicst,
251 	vsw_m_unicst,
252 	vsw_m_tx,
253 	NULL,
254 	NULL,
255 	NULL
256 };
257 
258 static	struct	cb_ops	vsw_cb_ops = {
259 	nulldev,			/* cb_open */
260 	nulldev,			/* cb_close */
261 	nodev,				/* cb_strategy */
262 	nodev,				/* cb_print */
263 	nodev,				/* cb_dump */
264 	nodev,				/* cb_read */
265 	nodev,				/* cb_write */
266 	nodev,				/* cb_ioctl */
267 	nodev,				/* cb_devmap */
268 	nodev,				/* cb_mmap */
269 	nodev,				/* cb_segmap */
270 	nochpoll,			/* cb_chpoll */
271 	ddi_prop_op,			/* cb_prop_op */
272 	NULL,				/* cb_stream */
273 	D_MP,				/* cb_flag */
274 	CB_REV,				/* rev */
275 	nodev,				/* int (*cb_aread)() */
276 	nodev				/* int (*cb_awrite)() */
277 };
278 
279 static	struct	dev_ops	vsw_ops = {
280 	DEVO_REV,		/* devo_rev */
281 	0,			/* devo_refcnt */
282 	vsw_getinfo,		/* devo_getinfo */
283 	nulldev,		/* devo_identify */
284 	nulldev,		/* devo_probe */
285 	vsw_attach,		/* devo_attach */
286 	vsw_detach,		/* devo_detach */
287 	nodev,			/* devo_reset */
288 	&vsw_cb_ops,		/* devo_cb_ops */
289 	(struct bus_ops *)NULL,	/* devo_bus_ops */
290 	ddi_power		/* devo_power */
291 };
292 
293 extern	struct	mod_ops	mod_driverops;
294 static struct modldrv vswmodldrv = {
295 	&mod_driverops,
296 	"sun4v Virtual Switch",
297 	&vsw_ops,
298 };
299 
300 #define	LDC_ENTER_LOCK(ldcp)	\
301 				mutex_enter(&((ldcp)->ldc_cblock));\
302 				mutex_enter(&((ldcp)->ldc_rxlock));\
303 				mutex_enter(&((ldcp)->ldc_txlock));
304 #define	LDC_EXIT_LOCK(ldcp)	\
305 				mutex_exit(&((ldcp)->ldc_txlock));\
306 				mutex_exit(&((ldcp)->ldc_rxlock));\
307 				mutex_exit(&((ldcp)->ldc_cblock));
308 
309 /* Driver soft state ptr  */
310 static void	*vsw_state;
311 
312 /*
313  * Linked list of "vsw_t" structures - one per instance.
314  */
315 vsw_t		*vsw_head = NULL;
316 krwlock_t	vsw_rw;
317 
318 /*
319  * Property names
320  */
321 static char vdev_propname[] = "virtual-device";
322 static char vsw_propname[] = "virtual-network-switch";
323 static char physdev_propname[] = "vsw-phys-dev";
324 static char smode_propname[] = "vsw-switch-mode";
325 static char macaddr_propname[] = "local-mac-address";
326 static char remaddr_propname[] = "remote-mac-address";
327 static char ldcids_propname[] = "ldc-ids";
328 static char chan_propname[] = "channel-endpoint";
329 static char id_propname[] = "id";
330 static char reg_propname[] = "reg";
331 static char pri_types_propname[] = "priority-ether-types";
332 static char vsw_pvid_propname[] = "port-vlan-id";
333 static char vsw_vid_propname[] = "vlan-id";
334 static char vsw_dvid_propname[] = "default-vlan-id";
335 static char port_pvid_propname[] = "remote-port-vlan-id";
336 static char port_vid_propname[] = "remote-vlan-id";
337 
338 /*
339  * Matching criteria passed to the MDEG to register interest
340  * in changes to 'virtual-device-port' nodes identified by their
341  * 'id' property.
342  */
343 static md_prop_match_t vport_prop_match[] = {
344 	{ MDET_PROP_VAL,    "id"   },
345 	{ MDET_LIST_END,    NULL    }
346 };
347 
348 static mdeg_node_match_t vport_match = { "virtual-device-port",
349 						vport_prop_match };
350 
351 /*
352  * Matching criteria passed to the MDEG to register interest
353  * in changes to 'virtual-device' nodes (i.e. vsw nodes) identified
354  * by their 'name' and 'cfg-handle' properties.
355  */
356 static md_prop_match_t vdev_prop_match[] = {
357 	{ MDET_PROP_STR,    "name"   },
358 	{ MDET_PROP_VAL,    "cfg-handle" },
359 	{ MDET_LIST_END,    NULL    }
360 };
361 
362 static mdeg_node_match_t vdev_match = { "virtual-device",
363 						vdev_prop_match };
364 
365 
366 /*
367  * Specification of an MD node passed to the MDEG to filter any
368  * 'vport' nodes that do not belong to the specified node. This
369  * template is copied for each vsw instance and filled in with
370  * the appropriate 'cfg-handle' value before being passed to the MDEG.
371  */
372 static mdeg_prop_spec_t vsw_prop_template[] = {
373 	{ MDET_PROP_STR,    "name",		vsw_propname },
374 	{ MDET_PROP_VAL,    "cfg-handle",	NULL	},
375 	{ MDET_LIST_END,    NULL,		NULL	}
376 };
377 
378 #define	VSW_SET_MDEG_PROP_INST(specp, val)	(specp)[1].ps_val = (val);
379 
380 #ifdef	DEBUG
381 /*
382  * Print debug messages - set to 0x1f to enable all msgs
383  * or 0x0 to turn all off.
384  */
385 int vswdbg = 0x0;
386 
387 /*
388  * debug levels:
389  * 0x01:	Function entry/exit tracing
390  * 0x02:	Internal function messages
391  * 0x04:	Verbose internal messages
392  * 0x08:	Warning messages
393  * 0x10:	Error messages
394  */
395 
396 void
397 vswdebug(vsw_t *vswp, const char *fmt, ...)
398 {
399 	char buf[512];
400 	va_list ap;
401 
402 	va_start(ap, fmt);
403 	(void) vsprintf(buf, fmt, ap);
404 	va_end(ap);
405 
406 	if (vswp == NULL)
407 		cmn_err(CE_CONT, "%s\n", buf);
408 	else
409 		cmn_err(CE_CONT, "vsw%d: %s\n", vswp->instance, buf);
410 }
411 
412 #endif	/* DEBUG */
413 
414 static struct modlinkage modlinkage = {
415 	MODREV_1,
416 	&vswmodldrv,
417 	NULL
418 };
419 
420 int
421 _init(void)
422 {
423 	int status;
424 
425 	rw_init(&vsw_rw, NULL, RW_DRIVER, NULL);
426 
427 	status = ddi_soft_state_init(&vsw_state, sizeof (vsw_t), 1);
428 	if (status != 0) {
429 		return (status);
430 	}
431 
432 	mac_init_ops(&vsw_ops, DRV_NAME);
433 	status = mod_install(&modlinkage);
434 	if (status != 0) {
435 		ddi_soft_state_fini(&vsw_state);
436 	}
437 	return (status);
438 }
439 
440 int
441 _fini(void)
442 {
443 	int status;
444 
445 	status = mod_remove(&modlinkage);
446 	if (status != 0)
447 		return (status);
448 	mac_fini_ops(&vsw_ops);
449 	ddi_soft_state_fini(&vsw_state);
450 
451 	rw_destroy(&vsw_rw);
452 
453 	return (status);
454 }
455 
456 int
457 _info(struct modinfo *modinfop)
458 {
459 	return (mod_info(&modlinkage, modinfop));
460 }
461 
462 static int
463 vsw_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
464 {
465 	vsw_t		*vswp;
466 	int		instance;
467 	char		hashname[MAXNAMELEN];
468 	char		qname[TASKQ_NAMELEN];
469 	enum		{ PROG_init = 0x00,
470 				PROG_locks = 0x01,
471 				PROG_readmd = 0x02,
472 				PROG_fdb = 0x04,
473 				PROG_mfdb = 0x08,
474 				PROG_taskq = 0x10,
475 				PROG_swmode = 0x20,
476 				PROG_macreg = 0x40,
477 				PROG_mdreg = 0x80}
478 			progress;
479 
480 	progress = PROG_init;
481 	int		rv;
482 
483 	switch (cmd) {
484 	case DDI_ATTACH:
485 		break;
486 	case DDI_RESUME:
487 		/* nothing to do for this non-device */
488 		return (DDI_SUCCESS);
489 	case DDI_PM_RESUME:
490 	default:
491 		return (DDI_FAILURE);
492 	}
493 
494 	instance = ddi_get_instance(dip);
495 	if (ddi_soft_state_zalloc(vsw_state, instance) != DDI_SUCCESS) {
496 		DERR(NULL, "vsw%d: ddi_soft_state_zalloc failed", instance);
497 		return (DDI_FAILURE);
498 	}
499 	vswp = ddi_get_soft_state(vsw_state, instance);
500 
501 	if (vswp == NULL) {
502 		DERR(NULL, "vsw%d: ddi_get_soft_state failed", instance);
503 		goto vsw_attach_fail;
504 	}
505 
506 	vswp->dip = dip;
507 	vswp->instance = instance;
508 	ddi_set_driver_private(dip, (caddr_t)vswp);
509 
510 	mutex_init(&vswp->hw_lock, NULL, MUTEX_DRIVER, NULL);
511 	mutex_init(&vswp->mac_lock, NULL, MUTEX_DRIVER, NULL);
512 	mutex_init(&vswp->mca_lock, NULL, MUTEX_DRIVER, NULL);
513 	mutex_init(&vswp->swtmout_lock, NULL, MUTEX_DRIVER, NULL);
514 	rw_init(&vswp->if_lockrw, NULL, RW_DRIVER, NULL);
515 	rw_init(&vswp->mfdbrw, NULL, RW_DRIVER, NULL);
516 	rw_init(&vswp->plist.lockrw, NULL, RW_DRIVER, NULL);
517 
518 	progress |= PROG_locks;
519 
520 	rv = vsw_read_mdprops(vswp);
521 	if (rv != 0)
522 		goto vsw_attach_fail;
523 
524 	progress |= PROG_readmd;
525 
526 	/* setup the unicast forwarding database  */
527 	(void) snprintf(hashname, MAXNAMELEN, "vsw_unicst_table-%d",
528 	    vswp->instance);
529 	D2(vswp, "creating unicast hash table (%s)...", hashname);
530 	vswp->fdb_nchains = vsw_fdb_nchains;
531 	vswp->fdb_hashp = mod_hash_create_ptrhash(hashname, vswp->fdb_nchains,
532 	    mod_hash_null_valdtor, sizeof (void *));
533 	vsw_create_vlans((void *)vswp, VSW_LOCALDEV);
534 	progress |= PROG_fdb;
535 
536 	/* setup the multicast fowarding database */
537 	(void) snprintf(hashname, MAXNAMELEN, "vsw_mcst_table-%d",
538 	    vswp->instance);
539 	D2(vswp, "creating multicast hash table %s)...", hashname);
540 	vswp->mfdb = mod_hash_create_ptrhash(hashname, vsw_fdb_nchains,
541 	    mod_hash_null_valdtor, sizeof (void *));
542 
543 	progress |= PROG_mfdb;
544 
545 	/*
546 	 * Create the taskq which will process all the VIO
547 	 * control messages.
548 	 */
549 	(void) snprintf(qname, TASKQ_NAMELEN, "vsw_taskq%d", vswp->instance);
550 	if ((vswp->taskq_p = ddi_taskq_create(vswp->dip, qname, 1,
551 	    TASKQ_DEFAULTPRI, 0)) == NULL) {
552 		cmn_err(CE_WARN, "!vsw%d: Unable to create task queue",
553 		    vswp->instance);
554 		goto vsw_attach_fail;
555 	}
556 
557 	progress |= PROG_taskq;
558 
559 	/* prevent auto-detaching */
560 	if (ddi_prop_update_int(DDI_DEV_T_NONE, vswp->dip,
561 	    DDI_NO_AUTODETACH, 1) != DDI_SUCCESS) {
562 		cmn_err(CE_NOTE, "!Unable to set \"%s\" property for "
563 		    "instance %u", DDI_NO_AUTODETACH, instance);
564 	}
565 
566 	/*
567 	 * Setup the required switching mode,
568 	 * based on the mdprops that we read earlier.
569 	 */
570 	rv = vsw_setup_switching(vswp);
571 	if (rv == EAGAIN) {
572 		/*
573 		 * Unable to setup switching mode;
574 		 * as the error is EAGAIN, schedule a timeout to retry.
575 		 */
576 		mutex_enter(&vswp->swtmout_lock);
577 
578 		vswp->swtmout_enabled = B_TRUE;
579 		vswp->swtmout_id =
580 		    timeout(vsw_setup_switching_timeout, vswp,
581 		    (vsw_setup_switching_delay * drv_usectohz(MICROSEC)));
582 
583 		mutex_exit(&vswp->swtmout_lock);
584 	} else if (rv != 0) {
585 		goto vsw_attach_fail;
586 	}
587 
588 	progress |= PROG_swmode;
589 
590 	/* Register with mac layer as a provider */
591 	rv = vsw_mac_register(vswp);
592 	if (rv != 0)
593 		goto vsw_attach_fail;
594 
595 	progress |= PROG_macreg;
596 
597 	/*
598 	 * Now we have everything setup, register an interest in
599 	 * specific MD nodes.
600 	 *
601 	 * The callback is invoked in 2 cases, firstly if upon mdeg
602 	 * registration there are existing nodes which match our specified
603 	 * criteria, and secondly if the MD is changed (and again, there
604 	 * are nodes which we are interested in present within it. Note
605 	 * that our callback will be invoked even if our specified nodes
606 	 * have not actually changed).
607 	 *
608 	 */
609 	rv = vsw_mdeg_register(vswp);
610 	if (rv != 0)
611 		goto vsw_attach_fail;
612 
613 	progress |= PROG_mdreg;
614 
615 	WRITE_ENTER(&vsw_rw);
616 	vswp->next = vsw_head;
617 	vsw_head = vswp;
618 	RW_EXIT(&vsw_rw);
619 
620 	ddi_report_dev(vswp->dip);
621 	return (DDI_SUCCESS);
622 
623 vsw_attach_fail:
624 	DERR(NULL, "vsw_attach: failed");
625 
626 	if (progress & PROG_mdreg) {
627 		vsw_mdeg_unregister(vswp);
628 		(void) vsw_detach_ports(vswp);
629 	}
630 
631 	if (progress & PROG_macreg)
632 		(void) vsw_mac_unregister(vswp);
633 
634 	if (progress & PROG_swmode) {
635 		vsw_stop_switching_timeout(vswp);
636 		mutex_enter(&vswp->mac_lock);
637 		vsw_mac_detach(vswp);
638 		vsw_mac_close(vswp);
639 		mutex_exit(&vswp->mac_lock);
640 	}
641 
642 	if (progress & PROG_taskq)
643 		ddi_taskq_destroy(vswp->taskq_p);
644 
645 	if (progress & PROG_mfdb)
646 		mod_hash_destroy_hash(vswp->mfdb);
647 
648 	if (progress & PROG_fdb) {
649 		vsw_destroy_vlans(vswp, VSW_LOCALDEV);
650 		mod_hash_destroy_hash(vswp->fdb_hashp);
651 	}
652 
653 	if (progress & PROG_readmd) {
654 		if (VSW_PRI_ETH_DEFINED(vswp)) {
655 			kmem_free(vswp->pri_types,
656 			    sizeof (uint16_t) * vswp->pri_num_types);
657 		}
658 		(void) vio_destroy_mblks(vswp->pri_tx_vmp);
659 	}
660 
661 	if (progress & PROG_locks) {
662 		rw_destroy(&vswp->plist.lockrw);
663 		rw_destroy(&vswp->mfdbrw);
664 		rw_destroy(&vswp->if_lockrw);
665 		mutex_destroy(&vswp->swtmout_lock);
666 		mutex_destroy(&vswp->mca_lock);
667 		mutex_destroy(&vswp->mac_lock);
668 		mutex_destroy(&vswp->hw_lock);
669 	}
670 
671 	ddi_soft_state_free(vsw_state, instance);
672 	return (DDI_FAILURE);
673 }
674 
675 static int
676 vsw_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
677 {
678 	vio_mblk_pool_t		*poolp, *npoolp;
679 	vsw_t			**vswpp, *vswp;
680 	int 			instance;
681 
682 	instance = ddi_get_instance(dip);
683 	vswp = ddi_get_soft_state(vsw_state, instance);
684 
685 	if (vswp == NULL) {
686 		return (DDI_FAILURE);
687 	}
688 
689 	switch (cmd) {
690 	case DDI_DETACH:
691 		break;
692 	case DDI_SUSPEND:
693 	case DDI_PM_SUSPEND:
694 	default:
695 		return (DDI_FAILURE);
696 	}
697 
698 	D2(vswp, "detaching instance %d", instance);
699 
700 	/* Stop any pending timeout to setup switching mode. */
701 	vsw_stop_switching_timeout(vswp);
702 
703 	if (vswp->if_state & VSW_IF_REG) {
704 		if (vsw_mac_unregister(vswp) != 0) {
705 			cmn_err(CE_WARN, "!vsw%d: Unable to detach from "
706 			    "MAC layer", vswp->instance);
707 			return (DDI_FAILURE);
708 		}
709 	}
710 
711 	vsw_mdeg_unregister(vswp);
712 
713 	/* remove mac layer callback */
714 	mutex_enter(&vswp->mac_lock);
715 	if ((vswp->mh != NULL) && (vswp->mrh != NULL)) {
716 		mac_rx_remove(vswp->mh, vswp->mrh, B_TRUE);
717 		vswp->mrh = NULL;
718 	}
719 	mutex_exit(&vswp->mac_lock);
720 
721 	if (vsw_detach_ports(vswp) != 0) {
722 		cmn_err(CE_WARN, "!vsw%d: Unable to unconfigure ports",
723 		    vswp->instance);
724 		return (DDI_FAILURE);
725 	}
726 
727 	rw_destroy(&vswp->if_lockrw);
728 
729 	mutex_destroy(&vswp->hw_lock);
730 
731 	/*
732 	 * Now that the ports have been deleted, stop and close
733 	 * the physical device.
734 	 */
735 	mutex_enter(&vswp->mac_lock);
736 
737 	vsw_mac_detach(vswp);
738 	vsw_mac_close(vswp);
739 
740 	mutex_exit(&vswp->mac_lock);
741 
742 	mutex_destroy(&vswp->mac_lock);
743 	mutex_destroy(&vswp->swtmout_lock);
744 
745 	/*
746 	 * Destroy any free pools that may still exist.
747 	 */
748 	poolp = vswp->rxh;
749 	while (poolp != NULL) {
750 		npoolp = vswp->rxh = poolp->nextp;
751 		if (vio_destroy_mblks(poolp) != 0) {
752 			vswp->rxh = poolp;
753 			return (DDI_FAILURE);
754 		}
755 		poolp = npoolp;
756 	}
757 
758 	/*
759 	 * Remove this instance from any entries it may be on in
760 	 * the hash table by using the list of addresses maintained
761 	 * in the vsw_t structure.
762 	 */
763 	vsw_del_mcst_vsw(vswp);
764 
765 	vswp->mcap = NULL;
766 	mutex_destroy(&vswp->mca_lock);
767 
768 	/*
769 	 * By now any pending tasks have finished and the underlying
770 	 * ldc's have been destroyed, so its safe to delete the control
771 	 * message taskq.
772 	 */
773 	if (vswp->taskq_p != NULL)
774 		ddi_taskq_destroy(vswp->taskq_p);
775 
776 	/*
777 	 * At this stage all the data pointers in the hash table
778 	 * should be NULL, as all the ports have been removed and will
779 	 * have deleted themselves from the port lists which the data
780 	 * pointers point to. Hence we can destroy the table using the
781 	 * default destructors.
782 	 */
783 	D2(vswp, "vsw_detach: destroying hash tables..");
784 	vsw_destroy_vlans(vswp, VSW_LOCALDEV);
785 	mod_hash_destroy_hash(vswp->fdb_hashp);
786 	vswp->fdb_hashp = NULL;
787 
788 	WRITE_ENTER(&vswp->mfdbrw);
789 	mod_hash_destroy_hash(vswp->mfdb);
790 	vswp->mfdb = NULL;
791 	RW_EXIT(&vswp->mfdbrw);
792 	rw_destroy(&vswp->mfdbrw);
793 
794 	/* free pri_types table */
795 	if (VSW_PRI_ETH_DEFINED(vswp)) {
796 		kmem_free(vswp->pri_types,
797 		    sizeof (uint16_t) * vswp->pri_num_types);
798 		(void) vio_destroy_mblks(vswp->pri_tx_vmp);
799 	}
800 
801 	ddi_remove_minor_node(dip, NULL);
802 
803 	rw_destroy(&vswp->plist.lockrw);
804 	WRITE_ENTER(&vsw_rw);
805 	for (vswpp = &vsw_head; *vswpp; vswpp = &(*vswpp)->next) {
806 		if (*vswpp == vswp) {
807 			*vswpp = vswp->next;
808 			break;
809 		}
810 	}
811 	RW_EXIT(&vsw_rw);
812 	ddi_soft_state_free(vsw_state, instance);
813 
814 	return (DDI_SUCCESS);
815 }
816 
817 static int
818 vsw_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
819 {
820 	_NOTE(ARGUNUSED(dip))
821 
822 	vsw_t	*vswp = NULL;
823 	dev_t	dev = (dev_t)arg;
824 	int	instance;
825 
826 	instance = getminor(dev);
827 
828 	switch (infocmd) {
829 	case DDI_INFO_DEVT2DEVINFO:
830 		if ((vswp = ddi_get_soft_state(vsw_state, instance)) == NULL) {
831 			*result = NULL;
832 			return (DDI_FAILURE);
833 		}
834 		*result = vswp->dip;
835 		return (DDI_SUCCESS);
836 
837 	case DDI_INFO_DEVT2INSTANCE:
838 		*result = (void *)(uintptr_t)instance;
839 		return (DDI_SUCCESS);
840 
841 	default:
842 		*result = NULL;
843 		return (DDI_FAILURE);
844 	}
845 }
846 
847 /*
848  * Get the value of the "vsw-phys-dev" property in the specified
849  * node. This property is the name of the physical device that
850  * the virtual switch will use to talk to the outside world.
851  *
852  * Note it is valid for this property to be NULL (but the property
853  * itself must exist). Callers of this routine should verify that
854  * the value returned is what they expected (i.e. either NULL or non NULL).
855  *
856  * On success returns value of the property in region pointed to by
857  * the 'name' argument, and with return value of 0. Otherwise returns 1.
858  */
859 static int
860 vsw_get_md_physname(vsw_t *vswp, md_t *mdp, mde_cookie_t node, char *name)
861 {
862 	int		len = 0;
863 	int		instance;
864 	char		*physname = NULL;
865 	char		*dev;
866 	const char	*dev_name;
867 	char		myname[MAXNAMELEN];
868 
869 	dev_name = ddi_driver_name(vswp->dip);
870 	instance = ddi_get_instance(vswp->dip);
871 	(void) snprintf(myname, MAXNAMELEN, "%s%d", dev_name, instance);
872 
873 	if (md_get_prop_data(mdp, node, physdev_propname,
874 	    (uint8_t **)(&physname), &len) != 0) {
875 		cmn_err(CE_WARN, "!vsw%d: Unable to get name(s) of physical "
876 		    "device(s) from MD", vswp->instance);
877 		return (1);
878 	} else if ((strlen(physname) + 1) > LIFNAMSIZ) {
879 		cmn_err(CE_WARN, "!vsw%d: %s is too long a device name",
880 		    vswp->instance, physname);
881 		return (1);
882 	} else if (strcmp(myname, physname) == 0) {
883 		/*
884 		 * Prevent the vswitch from opening itself as the
885 		 * network device.
886 		 */
887 		cmn_err(CE_WARN, "!vsw%d: %s is an invalid device name",
888 		    vswp->instance, physname);
889 		return (1);
890 	} else {
891 		(void) strncpy(name, physname, strlen(physname) + 1);
892 		D2(vswp, "%s: using first device specified (%s)",
893 		    __func__, physname);
894 	}
895 
896 #ifdef DEBUG
897 	/*
898 	 * As a temporary measure to aid testing we check to see if there
899 	 * is a vsw.conf file present. If there is we use the value of the
900 	 * vsw_physname property in the file as the name of the physical
901 	 * device, overriding the value from the MD.
902 	 *
903 	 * There may be multiple devices listed, but for the moment
904 	 * we just use the first one.
905 	 */
906 	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vswp->dip, 0,
907 	    "vsw_physname", &dev) == DDI_PROP_SUCCESS) {
908 		if ((strlen(dev) + 1) > LIFNAMSIZ) {
909 			cmn_err(CE_WARN, "vsw%d: %s is too long a device name",
910 			    vswp->instance, dev);
911 			ddi_prop_free(dev);
912 			return (1);
913 		} else {
914 			cmn_err(CE_NOTE, "vsw%d: Using device name (%s) from "
915 			    "config file", vswp->instance, dev);
916 
917 			(void) strncpy(name, dev, strlen(dev) + 1);
918 		}
919 
920 		ddi_prop_free(dev);
921 	}
922 #endif
923 
924 	return (0);
925 }
926 
927 /*
928  * Read the 'vsw-switch-mode' property from the specified MD node.
929  *
930  * Returns 0 on success and the number of modes found in 'found',
931  * otherwise returns 1.
932  */
933 static int
934 vsw_get_md_smodes(vsw_t *vswp, md_t *mdp, mde_cookie_t node,
935 						uint8_t *modes, int *found)
936 {
937 	int		len = 0;
938 	int		smode_num = 0;
939 	char		*smode = NULL;
940 	char		*curr_mode = NULL;
941 
942 	D1(vswp, "%s: enter", __func__);
943 
944 	/*
945 	 * Get the switch-mode property. The modes are listed in
946 	 * decreasing order of preference, i.e. prefered mode is
947 	 * first item in list.
948 	 */
949 	len = 0;
950 	smode_num = 0;
951 	if (md_get_prop_data(mdp, node, smode_propname,
952 	    (uint8_t **)(&smode), &len) != 0) {
953 		/*
954 		 * Unable to get switch-mode property from MD, nothing
955 		 * more we can do.
956 		 */
957 		cmn_err(CE_WARN, "!vsw%d: Unable to get switch mode property"
958 		    " from the MD", vswp->instance);
959 		*found = 0;
960 		return (1);
961 	}
962 
963 	curr_mode = smode;
964 	/*
965 	 * Modes of operation:
966 	 * 'switched'	 - layer 2 switching, underlying HW in
967 	 *			programmed mode.
968 	 * 'promiscuous' - layer 2 switching, underlying HW in
969 	 *			promiscuous mode.
970 	 * 'routed'	 - layer 3 (i.e. IP) routing, underlying HW
971 	 *			in non-promiscuous mode.
972 	 */
973 	while ((curr_mode < (smode + len)) && (smode_num < NUM_SMODES)) {
974 		D2(vswp, "%s: curr_mode = [%s]", __func__, curr_mode);
975 		if (strcmp(curr_mode, "switched") == 0) {
976 			modes[smode_num++] = VSW_LAYER2;
977 		} else if (strcmp(curr_mode, "promiscuous") == 0) {
978 			modes[smode_num++] = VSW_LAYER2_PROMISC;
979 		} else if (strcmp(curr_mode, "routed") == 0) {
980 			modes[smode_num++] = VSW_LAYER3;
981 		} else {
982 			DWARN(vswp, "%s: Unknown switch mode %s, "
983 			    "setting to default 'switched' mode",
984 			    __func__, curr_mode);
985 			modes[smode_num++] = VSW_LAYER2;
986 		}
987 		curr_mode += strlen(curr_mode) + 1;
988 	}
989 	*found = smode_num;
990 
991 	D2(vswp, "%s: %d modes found", __func__, smode_num);
992 
993 	D1(vswp, "%s: exit", __func__);
994 
995 	return (0);
996 }
997 
998 /*
999  * Register with the MAC layer as a network device, so we
1000  * can be plumbed if necessary.
1001  */
1002 static int
1003 vsw_mac_register(vsw_t *vswp)
1004 {
1005 	mac_register_t	*macp;
1006 	int		rv;
1007 
1008 	D1(vswp, "%s: enter", __func__);
1009 
1010 	if ((macp = mac_alloc(MAC_VERSION)) == NULL)
1011 		return (EINVAL);
1012 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1013 	macp->m_driver = vswp;
1014 	macp->m_dip = vswp->dip;
1015 	macp->m_src_addr = (uint8_t *)&vswp->if_addr;
1016 	macp->m_callbacks = &vsw_m_callbacks;
1017 	macp->m_min_sdu = 0;
1018 	macp->m_max_sdu = vsw_ethermtu;
1019 	macp->m_margin = VLAN_TAGSZ;
1020 	rv = mac_register(macp, &vswp->if_mh);
1021 	mac_free(macp);
1022 	if (rv != 0) {
1023 		/*
1024 		 * Treat this as a non-fatal error as we may be
1025 		 * able to operate in some other mode.
1026 		 */
1027 		cmn_err(CE_NOTE, "!vsw%d: Unable to register as "
1028 		    "a provider with MAC layer", vswp->instance);
1029 		return (rv);
1030 	}
1031 
1032 	vswp->if_state |= VSW_IF_REG;
1033 
1034 	vswp->max_frame_size = vsw_ethermtu + sizeof (struct ether_header)
1035 	    + VLAN_TAGSZ;
1036 
1037 	D1(vswp, "%s: exit", __func__);
1038 
1039 	return (rv);
1040 }
1041 
1042 static int
1043 vsw_mac_unregister(vsw_t *vswp)
1044 {
1045 	int		rv = 0;
1046 
1047 	D1(vswp, "%s: enter", __func__);
1048 
1049 	WRITE_ENTER(&vswp->if_lockrw);
1050 
1051 	if (vswp->if_state & VSW_IF_REG) {
1052 		rv = mac_unregister(vswp->if_mh);
1053 		if (rv != 0) {
1054 			DWARN(vswp, "%s: unable to unregister from MAC "
1055 			    "framework", __func__);
1056 
1057 			RW_EXIT(&vswp->if_lockrw);
1058 			D1(vswp, "%s: fail exit", __func__);
1059 			return (rv);
1060 		}
1061 
1062 		/* mark i/f as down and unregistered */
1063 		vswp->if_state &= ~(VSW_IF_UP | VSW_IF_REG);
1064 	}
1065 	RW_EXIT(&vswp->if_lockrw);
1066 
1067 	D1(vswp, "%s: exit", __func__);
1068 
1069 	return (rv);
1070 }
1071 
1072 static int
1073 vsw_m_stat(void *arg, uint_t stat, uint64_t *val)
1074 {
1075 	vsw_t			*vswp = (vsw_t *)arg;
1076 
1077 	D1(vswp, "%s: enter", __func__);
1078 
1079 	mutex_enter(&vswp->mac_lock);
1080 	if (vswp->mh == NULL) {
1081 		mutex_exit(&vswp->mac_lock);
1082 		return (EINVAL);
1083 	}
1084 
1085 	/* return stats from underlying device */
1086 	*val = mac_stat_get(vswp->mh, stat);
1087 
1088 	mutex_exit(&vswp->mac_lock);
1089 
1090 	return (0);
1091 }
1092 
1093 static void
1094 vsw_m_stop(void *arg)
1095 {
1096 	vsw_t		*vswp = (vsw_t *)arg;
1097 
1098 	D1(vswp, "%s: enter", __func__);
1099 
1100 	WRITE_ENTER(&vswp->if_lockrw);
1101 	vswp->if_state &= ~VSW_IF_UP;
1102 	RW_EXIT(&vswp->if_lockrw);
1103 
1104 	mutex_enter(&vswp->hw_lock);
1105 
1106 	(void) vsw_unset_hw(vswp, NULL, VSW_LOCALDEV);
1107 
1108 	if (vswp->recfg_reqd)
1109 		vsw_reconfig_hw(vswp);
1110 
1111 	mutex_exit(&vswp->hw_lock);
1112 
1113 	D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state);
1114 }
1115 
1116 static int
1117 vsw_m_start(void *arg)
1118 {
1119 	vsw_t		*vswp = (vsw_t *)arg;
1120 
1121 	D1(vswp, "%s: enter", __func__);
1122 
1123 	WRITE_ENTER(&vswp->if_lockrw);
1124 
1125 	vswp->if_state |= VSW_IF_UP;
1126 
1127 	if (vswp->switching_setup_done == B_FALSE) {
1128 		/*
1129 		 * If the switching mode has not been setup yet, just
1130 		 * return. The unicast address will be programmed
1131 		 * after the physical device is successfully setup by the
1132 		 * timeout handler.
1133 		 */
1134 		RW_EXIT(&vswp->if_lockrw);
1135 		return (0);
1136 	}
1137 
1138 	/* if in layer2 mode, program unicast address. */
1139 	if (vswp->mh != NULL) {
1140 		mutex_enter(&vswp->hw_lock);
1141 		(void) vsw_set_hw(vswp, NULL, VSW_LOCALDEV);
1142 		mutex_exit(&vswp->hw_lock);
1143 	}
1144 
1145 	RW_EXIT(&vswp->if_lockrw);
1146 
1147 	D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state);
1148 	return (0);
1149 }
1150 
1151 /*
1152  * Change the local interface address.
1153  *
1154  * Note: we don't support this entry point. The local
1155  * mac address of the switch can only be changed via its
1156  * MD node properties.
1157  */
1158 static int
1159 vsw_m_unicst(void *arg, const uint8_t *macaddr)
1160 {
1161 	_NOTE(ARGUNUSED(arg, macaddr))
1162 
1163 	return (DDI_FAILURE);
1164 }
1165 
1166 static int
1167 vsw_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
1168 {
1169 	vsw_t		*vswp = (vsw_t *)arg;
1170 	mcst_addr_t	*mcst_p = NULL;
1171 	uint64_t	addr = 0x0;
1172 	int		i, ret = 0;
1173 
1174 	D1(vswp, "%s: enter", __func__);
1175 
1176 	/*
1177 	 * Convert address into form that can be used
1178 	 * as hash table key.
1179 	 */
1180 	for (i = 0; i < ETHERADDRL; i++) {
1181 		addr = (addr << 8) | mca[i];
1182 	}
1183 
1184 	D2(vswp, "%s: addr = 0x%llx", __func__, addr);
1185 
1186 	if (add) {
1187 		D2(vswp, "%s: adding multicast", __func__);
1188 		if (vsw_add_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) {
1189 			/*
1190 			 * Update the list of multicast addresses
1191 			 * contained within the vsw_t structure to
1192 			 * include this new one.
1193 			 */
1194 			mcst_p = kmem_zalloc(sizeof (mcst_addr_t), KM_NOSLEEP);
1195 			if (mcst_p == NULL) {
1196 				DERR(vswp, "%s unable to alloc mem", __func__);
1197 				(void) vsw_del_mcst(vswp,
1198 				    VSW_LOCALDEV, addr, NULL);
1199 				return (1);
1200 			}
1201 			mcst_p->addr = addr;
1202 			ether_copy(mca, &mcst_p->mca);
1203 
1204 			/*
1205 			 * Call into the underlying driver to program the
1206 			 * address into HW.
1207 			 */
1208 			mutex_enter(&vswp->mac_lock);
1209 			if (vswp->mh != NULL) {
1210 				ret = mac_multicst_add(vswp->mh, mca);
1211 				if (ret != 0) {
1212 					cmn_err(CE_NOTE, "!vsw%d: unable to "
1213 					    "add multicast address",
1214 					    vswp->instance);
1215 					mutex_exit(&vswp->mac_lock);
1216 					(void) vsw_del_mcst(vswp,
1217 					    VSW_LOCALDEV, addr, NULL);
1218 					kmem_free(mcst_p, sizeof (*mcst_p));
1219 					return (ret);
1220 				}
1221 				mcst_p->mac_added = B_TRUE;
1222 			}
1223 			mutex_exit(&vswp->mac_lock);
1224 
1225 			mutex_enter(&vswp->mca_lock);
1226 			mcst_p->nextp = vswp->mcap;
1227 			vswp->mcap = mcst_p;
1228 			mutex_exit(&vswp->mca_lock);
1229 		} else {
1230 			cmn_err(CE_NOTE, "!vsw%d: unable to add multicast "
1231 			    "address", vswp->instance);
1232 		}
1233 		return (ret);
1234 	}
1235 
1236 	D2(vswp, "%s: removing multicast", __func__);
1237 	/*
1238 	 * Remove the address from the hash table..
1239 	 */
1240 	if (vsw_del_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) {
1241 
1242 		/*
1243 		 * ..and then from the list maintained in the
1244 		 * vsw_t structure.
1245 		 */
1246 		mcst_p = vsw_del_addr(VSW_LOCALDEV, vswp, addr);
1247 		ASSERT(mcst_p != NULL);
1248 
1249 		mutex_enter(&vswp->mac_lock);
1250 		if (vswp->mh != NULL && mcst_p->mac_added) {
1251 			(void) mac_multicst_remove(vswp->mh, mca);
1252 			mcst_p->mac_added = B_FALSE;
1253 		}
1254 		mutex_exit(&vswp->mac_lock);
1255 		kmem_free(mcst_p, sizeof (*mcst_p));
1256 	}
1257 
1258 	D1(vswp, "%s: exit", __func__);
1259 
1260 	return (0);
1261 }
1262 
1263 static int
1264 vsw_m_promisc(void *arg, boolean_t on)
1265 {
1266 	vsw_t		*vswp = (vsw_t *)arg;
1267 
1268 	D1(vswp, "%s: enter", __func__);
1269 
1270 	WRITE_ENTER(&vswp->if_lockrw);
1271 	if (on)
1272 		vswp->if_state |= VSW_IF_PROMISC;
1273 	else
1274 		vswp->if_state &= ~VSW_IF_PROMISC;
1275 	RW_EXIT(&vswp->if_lockrw);
1276 
1277 	D1(vswp, "%s: exit", __func__);
1278 
1279 	return (0);
1280 }
1281 
1282 static mblk_t *
1283 vsw_m_tx(void *arg, mblk_t *mp)
1284 {
1285 	vsw_t		*vswp = (vsw_t *)arg;
1286 
1287 	D1(vswp, "%s: enter", __func__);
1288 
1289 	mp = vsw_vlan_frame_pretag(vswp, VSW_LOCALDEV, mp);
1290 
1291 	if (mp == NULL) {
1292 		return (NULL);
1293 	}
1294 
1295 	vswp->vsw_switch_frame(vswp, mp, VSW_LOCALDEV, NULL, NULL);
1296 
1297 	D1(vswp, "%s: exit", __func__);
1298 
1299 	return (NULL);
1300 }
1301 
1302 /*
1303  * Register for machine description (MD) updates.
1304  *
1305  * Returns 0 on success, 1 on failure.
1306  */
1307 static int
1308 vsw_mdeg_register(vsw_t *vswp)
1309 {
1310 	mdeg_prop_spec_t	*pspecp;
1311 	mdeg_node_spec_t	*inst_specp;
1312 	mdeg_handle_t		mdeg_hdl, mdeg_port_hdl;
1313 	size_t			templatesz;
1314 	int			rv;
1315 
1316 	D1(vswp, "%s: enter", __func__);
1317 
1318 	/*
1319 	 * Allocate and initialize a per-instance copy
1320 	 * of the global property spec array that will
1321 	 * uniquely identify this vsw instance.
1322 	 */
1323 	templatesz = sizeof (vsw_prop_template);
1324 	pspecp = kmem_zalloc(templatesz, KM_SLEEP);
1325 
1326 	bcopy(vsw_prop_template, pspecp, templatesz);
1327 
1328 	VSW_SET_MDEG_PROP_INST(pspecp, vswp->regprop);
1329 
1330 	/* initialize the complete prop spec structure */
1331 	inst_specp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP);
1332 	inst_specp->namep = "virtual-device";
1333 	inst_specp->specp = pspecp;
1334 
1335 	D2(vswp, "%s: instance %d registering with mdeg", __func__,
1336 	    vswp->regprop);
1337 	/*
1338 	 * Register an interest in 'virtual-device' nodes with a
1339 	 * 'name' property of 'virtual-network-switch'
1340 	 */
1341 	rv = mdeg_register(inst_specp, &vdev_match, vsw_mdeg_cb,
1342 	    (void *)vswp, &mdeg_hdl);
1343 	if (rv != MDEG_SUCCESS) {
1344 		DERR(vswp, "%s: mdeg_register failed (%d) for vsw node",
1345 		    __func__, rv);
1346 		goto mdeg_reg_fail;
1347 	}
1348 
1349 	/*
1350 	 * Register an interest in 'vsw-port' nodes.
1351 	 */
1352 	rv = mdeg_register(inst_specp, &vport_match, vsw_port_mdeg_cb,
1353 	    (void *)vswp, &mdeg_port_hdl);
1354 	if (rv != MDEG_SUCCESS) {
1355 		DERR(vswp, "%s: mdeg_register failed (%d)\n", __func__, rv);
1356 		(void) mdeg_unregister(mdeg_hdl);
1357 		goto mdeg_reg_fail;
1358 	}
1359 
1360 	/* save off data that will be needed later */
1361 	vswp->inst_spec = inst_specp;
1362 	vswp->mdeg_hdl = mdeg_hdl;
1363 	vswp->mdeg_port_hdl = mdeg_port_hdl;
1364 
1365 	D1(vswp, "%s: exit", __func__);
1366 	return (0);
1367 
1368 mdeg_reg_fail:
1369 	cmn_err(CE_WARN, "!vsw%d: Unable to register MDEG callbacks",
1370 	    vswp->instance);
1371 	kmem_free(pspecp, templatesz);
1372 	kmem_free(inst_specp, sizeof (mdeg_node_spec_t));
1373 
1374 	vswp->mdeg_hdl = NULL;
1375 	vswp->mdeg_port_hdl = NULL;
1376 
1377 	return (1);
1378 }
1379 
1380 static void
1381 vsw_mdeg_unregister(vsw_t *vswp)
1382 {
1383 	D1(vswp, "vsw_mdeg_unregister: enter");
1384 
1385 	if (vswp->mdeg_hdl != NULL)
1386 		(void) mdeg_unregister(vswp->mdeg_hdl);
1387 
1388 	if (vswp->mdeg_port_hdl != NULL)
1389 		(void) mdeg_unregister(vswp->mdeg_port_hdl);
1390 
1391 	if (vswp->inst_spec != NULL) {
1392 		if (vswp->inst_spec->specp != NULL) {
1393 			(void) kmem_free(vswp->inst_spec->specp,
1394 			    sizeof (vsw_prop_template));
1395 			vswp->inst_spec->specp = NULL;
1396 		}
1397 
1398 		(void) kmem_free(vswp->inst_spec, sizeof (mdeg_node_spec_t));
1399 		vswp->inst_spec = NULL;
1400 	}
1401 
1402 	D1(vswp, "vsw_mdeg_unregister: exit");
1403 }
1404 
1405 /*
1406  * Mdeg callback invoked for the vsw node itself.
1407  */
1408 static int
1409 vsw_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1410 {
1411 	vsw_t		*vswp;
1412 	md_t		*mdp;
1413 	mde_cookie_t	node;
1414 	uint64_t	inst;
1415 	char		*node_name = NULL;
1416 
1417 	if (resp == NULL)
1418 		return (MDEG_FAILURE);
1419 
1420 	vswp = (vsw_t *)cb_argp;
1421 
1422 	D1(vswp, "%s: added %d : removed %d : curr matched %d"
1423 	    " : prev matched %d", __func__, resp->added.nelem,
1424 	    resp->removed.nelem, resp->match_curr.nelem,
1425 	    resp->match_prev.nelem);
1426 
1427 	/*
1428 	 * We get an initial callback for this node as 'added'
1429 	 * after registering with mdeg. Note that we would have
1430 	 * already gathered information about this vsw node by
1431 	 * walking MD earlier during attach (in vsw_read_mdprops()).
1432 	 * So, there is a window where the properties of this
1433 	 * node might have changed when we get this initial 'added'
1434 	 * callback. We handle this as if an update occured
1435 	 * and invoke the same function which handles updates to
1436 	 * the properties of this vsw-node if any.
1437 	 *
1438 	 * A non-zero 'match' value indicates that the MD has been
1439 	 * updated and that a virtual-network-switch node is
1440 	 * present which may or may not have been updated. It is
1441 	 * up to the clients to examine their own nodes and
1442 	 * determine if they have changed.
1443 	 */
1444 	if (resp->added.nelem != 0) {
1445 
1446 		if (resp->added.nelem != 1) {
1447 			cmn_err(CE_NOTE, "!vsw%d: number of nodes added "
1448 			    "invalid: %d\n", vswp->instance, resp->added.nelem);
1449 			return (MDEG_FAILURE);
1450 		}
1451 
1452 		mdp = resp->added.mdp;
1453 		node = resp->added.mdep[0];
1454 
1455 	} else if (resp->match_curr.nelem != 0) {
1456 
1457 		if (resp->match_curr.nelem != 1) {
1458 			cmn_err(CE_NOTE, "!vsw%d: number of nodes updated "
1459 			    "invalid: %d\n", vswp->instance,
1460 			    resp->match_curr.nelem);
1461 			return (MDEG_FAILURE);
1462 		}
1463 
1464 		mdp = resp->match_curr.mdp;
1465 		node = resp->match_curr.mdep[0];
1466 
1467 	} else {
1468 		return (MDEG_FAILURE);
1469 	}
1470 
1471 	/* Validate name and instance */
1472 	if (md_get_prop_str(mdp, node, "name", &node_name) != 0) {
1473 		DERR(vswp, "%s: unable to get node name\n",  __func__);
1474 		return (MDEG_FAILURE);
1475 	}
1476 
1477 	/* is this a virtual-network-switch? */
1478 	if (strcmp(node_name, vsw_propname) != 0) {
1479 		DERR(vswp, "%s: Invalid node name: %s\n",
1480 		    __func__, node_name);
1481 		return (MDEG_FAILURE);
1482 	}
1483 
1484 	if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) {
1485 		DERR(vswp, "%s: prop(cfg-handle) not found\n",
1486 		    __func__);
1487 		return (MDEG_FAILURE);
1488 	}
1489 
1490 	/* is this the right instance of vsw? */
1491 	if (inst != vswp->regprop) {
1492 		DERR(vswp, "%s: Invalid cfg-handle: %lx\n",
1493 		    __func__, inst);
1494 		return (MDEG_FAILURE);
1495 	}
1496 
1497 	vsw_update_md_prop(vswp, mdp, node);
1498 
1499 	return (MDEG_SUCCESS);
1500 }
1501 
1502 /*
1503  * Mdeg callback invoked for changes to the vsw-port nodes
1504  * under the vsw node.
1505  */
1506 static int
1507 vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1508 {
1509 	vsw_t		*vswp;
1510 	int		idx;
1511 	md_t		*mdp;
1512 	mde_cookie_t	node;
1513 	uint64_t	inst;
1514 	int		rv;
1515 
1516 	if ((resp == NULL) || (cb_argp == NULL))
1517 		return (MDEG_FAILURE);
1518 
1519 	vswp = (vsw_t *)cb_argp;
1520 
1521 	D2(vswp, "%s: added %d : removed %d : curr matched %d"
1522 	    " : prev matched %d", __func__, resp->added.nelem,
1523 	    resp->removed.nelem, resp->match_curr.nelem,
1524 	    resp->match_prev.nelem);
1525 
1526 	/* process added ports */
1527 	for (idx = 0; idx < resp->added.nelem; idx++) {
1528 		mdp = resp->added.mdp;
1529 		node = resp->added.mdep[idx];
1530 
1531 		D2(vswp, "%s: adding node(%d) 0x%lx", __func__, idx, node);
1532 
1533 		if ((rv = vsw_port_add(vswp, mdp, &node)) != 0) {
1534 			cmn_err(CE_WARN, "!vsw%d: Unable to add new port "
1535 			    "(0x%lx), err=%d", vswp->instance, node, rv);
1536 		}
1537 	}
1538 
1539 	/* process removed ports */
1540 	for (idx = 0; idx < resp->removed.nelem; idx++) {
1541 		mdp = resp->removed.mdp;
1542 		node = resp->removed.mdep[idx];
1543 
1544 		if (md_get_prop_val(mdp, node, id_propname, &inst)) {
1545 			DERR(vswp, "%s: prop(%s) not found in port(%d)",
1546 			    __func__, id_propname, idx);
1547 			continue;
1548 		}
1549 
1550 		D2(vswp, "%s: removing node(%d) 0x%lx", __func__, idx, node);
1551 
1552 		if (vsw_port_detach(vswp, inst) != 0) {
1553 			cmn_err(CE_WARN, "!vsw%d: Unable to remove port %ld",
1554 			    vswp->instance, inst);
1555 		}
1556 	}
1557 
1558 	for (idx = 0; idx < resp->match_curr.nelem; idx++) {
1559 		(void) vsw_port_update(vswp, resp->match_curr.mdp,
1560 		    resp->match_curr.mdep[idx],
1561 		    resp->match_prev.mdp,
1562 		    resp->match_prev.mdep[idx]);
1563 	}
1564 
1565 	D1(vswp, "%s: exit", __func__);
1566 
1567 	return (MDEG_SUCCESS);
1568 }
1569 
1570 /*
1571  * Scan the machine description for this instance of vsw
1572  * and read its properties. Called only from vsw_attach().
1573  * Returns: 0 on success, 1 on failure.
1574  */
1575 static int
1576 vsw_read_mdprops(vsw_t *vswp)
1577 {
1578 	md_t		*mdp = NULL;
1579 	mde_cookie_t	rootnode;
1580 	mde_cookie_t	*listp = NULL;
1581 	uint64_t	inst;
1582 	uint64_t	cfgh;
1583 	char		*name;
1584 	int		rv = 1;
1585 	int		num_nodes = 0;
1586 	int		num_devs = 0;
1587 	int		listsz = 0;
1588 	int		i;
1589 
1590 	/*
1591 	 * In each 'virtual-device' node in the MD there is a
1592 	 * 'cfg-handle' property which is the MD's concept of
1593 	 * an instance number (this may be completely different from
1594 	 * the device drivers instance #). OBP reads that value and
1595 	 * stores it in the 'reg' property of the appropriate node in
1596 	 * the device tree. We first read this reg property and use this
1597 	 * to compare against the 'cfg-handle' property of vsw nodes
1598 	 * in MD to get to this specific vsw instance and then read
1599 	 * other properties that we are interested in.
1600 	 * We also cache the value of 'reg' property and use it later
1601 	 * to register callbacks with mdeg (see vsw_mdeg_register())
1602 	 */
1603 	inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip,
1604 	    DDI_PROP_DONTPASS, reg_propname, -1);
1605 	if (inst == -1) {
1606 		cmn_err(CE_NOTE, "!vsw%d: Unable to read %s property from "
1607 		    "OBP device tree", vswp->instance, reg_propname);
1608 		return (rv);
1609 	}
1610 
1611 	vswp->regprop = inst;
1612 
1613 	if ((mdp = md_get_handle()) == NULL) {
1614 		DWARN(vswp, "%s: cannot init MD\n", __func__);
1615 		return (rv);
1616 	}
1617 
1618 	num_nodes = md_node_count(mdp);
1619 	ASSERT(num_nodes > 0);
1620 
1621 	listsz = num_nodes * sizeof (mde_cookie_t);
1622 	listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP);
1623 
1624 	rootnode = md_root_node(mdp);
1625 
1626 	/* search for all "virtual_device" nodes */
1627 	num_devs = md_scan_dag(mdp, rootnode,
1628 	    md_find_name(mdp, vdev_propname),
1629 	    md_find_name(mdp, "fwd"), listp);
1630 	if (num_devs <= 0) {
1631 		DWARN(vswp, "%s: invalid num_devs:%d\n", __func__, num_devs);
1632 		goto vsw_readmd_exit;
1633 	}
1634 
1635 	/*
1636 	 * Now loop through the list of virtual-devices looking for
1637 	 * devices with name "virtual-network-switch" and for each
1638 	 * such device compare its instance with what we have from
1639 	 * the 'reg' property to find the right node in MD and then
1640 	 * read all its properties.
1641 	 */
1642 	for (i = 0; i < num_devs; i++) {
1643 
1644 		if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) {
1645 			DWARN(vswp, "%s: name property not found\n",
1646 			    __func__);
1647 			goto vsw_readmd_exit;
1648 		}
1649 
1650 		/* is this a virtual-network-switch? */
1651 		if (strcmp(name, vsw_propname) != 0)
1652 			continue;
1653 
1654 		if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) {
1655 			DWARN(vswp, "%s: cfg-handle property not found\n",
1656 			    __func__);
1657 			goto vsw_readmd_exit;
1658 		}
1659 
1660 		/* is this the required instance of vsw? */
1661 		if (inst != cfgh)
1662 			continue;
1663 
1664 		/* now read all properties of this vsw instance */
1665 		rv = vsw_get_initial_md_properties(vswp, mdp, listp[i]);
1666 		break;
1667 	}
1668 
1669 vsw_readmd_exit:
1670 
1671 	kmem_free(listp, listsz);
1672 	(void) md_fini_handle(mdp);
1673 	return (rv);
1674 }
1675 
1676 /*
1677  * Read the initial start-of-day values from the specified MD node.
1678  */
1679 static int
1680 vsw_get_initial_md_properties(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
1681 {
1682 	int		i;
1683 	uint64_t 	macaddr = 0;
1684 
1685 	D1(vswp, "%s: enter", __func__);
1686 
1687 	if (vsw_get_md_physname(vswp, mdp, node, vswp->physname) != 0) {
1688 		return (1);
1689 	}
1690 
1691 	/* mac address for vswitch device itself */
1692 	if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) {
1693 		cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD",
1694 		    vswp->instance);
1695 		return (1);
1696 	}
1697 
1698 	vsw_save_lmacaddr(vswp, macaddr);
1699 
1700 	if (vsw_get_md_smodes(vswp, mdp, node, vswp->smode, &vswp->smode_num)) {
1701 		DWARN(vswp, "%s: Unable to read %s property from MD, "
1702 		    "defaulting to 'switched' mode",
1703 		    __func__, smode_propname);
1704 
1705 		for (i = 0; i < NUM_SMODES; i++)
1706 			vswp->smode[i] = VSW_LAYER2;
1707 
1708 		vswp->smode_num = NUM_SMODES;
1709 	} else {
1710 		ASSERT(vswp->smode_num != 0);
1711 	}
1712 
1713 	/* read vlan id properties of this vsw instance */
1714 	vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &vswp->pvid,
1715 	    &vswp->vids, &vswp->nvids, &vswp->default_vlan_id);
1716 
1717 	/* read priority-ether-types */
1718 	vsw_read_pri_eth_types(vswp, mdp, node);
1719 
1720 	D1(vswp, "%s: exit", __func__);
1721 	return (0);
1722 }
1723 
1724 /*
1725  * Read vlan id properties of the given MD node.
1726  * Arguments:
1727  *   arg:          device argument(vsw device or a port)
1728  *   type:         type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port)
1729  *   mdp:          machine description
1730  *   node:         md node cookie
1731  *
1732  * Returns:
1733  *   pvidp:        port-vlan-id of the node
1734  *   vidspp:       list of vlan-ids of the node
1735  *   nvidsp:       # of vlan-ids in the list
1736  *   default_idp:  default-vlan-id of the node(if node is vsw device)
1737  */
1738 static void
1739 vsw_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node,
1740 	uint16_t *pvidp, uint16_t **vidspp, uint16_t *nvidsp,
1741 	uint16_t *default_idp)
1742 {
1743 	vsw_t		*vswp;
1744 	vsw_port_t	*portp;
1745 	char		*pvid_propname;
1746 	char		*vid_propname;
1747 	uint_t		nvids = 0;
1748 	uint32_t	vids_size;
1749 	int		rv;
1750 	int		i;
1751 	uint64_t	*data;
1752 	uint64_t	val;
1753 	int		size;
1754 	int		inst;
1755 
1756 	if (type == VSW_LOCALDEV) {
1757 
1758 		vswp = (vsw_t *)arg;
1759 		pvid_propname = vsw_pvid_propname;
1760 		vid_propname = vsw_vid_propname;
1761 		inst = vswp->instance;
1762 
1763 	} else if (type == VSW_VNETPORT) {
1764 
1765 		portp = (vsw_port_t *)arg;
1766 		vswp = portp->p_vswp;
1767 		pvid_propname = port_pvid_propname;
1768 		vid_propname = port_vid_propname;
1769 		inst = portp->p_instance;
1770 
1771 	} else {
1772 		return;
1773 	}
1774 
1775 	if (type == VSW_LOCALDEV && default_idp != NULL) {
1776 		rv = md_get_prop_val(mdp, node, vsw_dvid_propname, &val);
1777 		if (rv != 0) {
1778 			DWARN(vswp, "%s: prop(%s) not found", __func__,
1779 			    vsw_dvid_propname);
1780 
1781 			*default_idp = vsw_default_vlan_id;
1782 		} else {
1783 			*default_idp = val & 0xFFF;
1784 			D2(vswp, "%s: %s(%d): (%d)\n", __func__,
1785 			    vsw_dvid_propname, inst, *default_idp);
1786 		}
1787 	}
1788 
1789 	rv = md_get_prop_val(mdp, node, pvid_propname, &val);
1790 	if (rv != 0) {
1791 		DWARN(vswp, "%s: prop(%s) not found", __func__, pvid_propname);
1792 		*pvidp = vsw_default_vlan_id;
1793 	} else {
1794 
1795 		*pvidp = val & 0xFFF;
1796 		D2(vswp, "%s: %s(%d): (%d)\n", __func__,
1797 		    pvid_propname, inst, *pvidp);
1798 	}
1799 
1800 	rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data,
1801 	    &size);
1802 	if (rv != 0) {
1803 		D2(vswp, "%s: prop(%s) not found", __func__, vid_propname);
1804 		size = 0;
1805 	} else {
1806 		size /= sizeof (uint64_t);
1807 	}
1808 	nvids = size;
1809 
1810 	if (nvids != 0) {
1811 		D2(vswp, "%s: %s(%d): ", __func__, vid_propname, inst);
1812 		vids_size = sizeof (uint16_t) * nvids;
1813 		*vidspp = kmem_zalloc(vids_size, KM_SLEEP);
1814 		for (i = 0; i < nvids; i++) {
1815 			(*vidspp)[i] = data[i] & 0xFFFF;
1816 			D2(vswp, " %d ", (*vidspp)[i]);
1817 		}
1818 		D2(vswp, "\n");
1819 	}
1820 
1821 	*nvidsp = nvids;
1822 }
1823 
1824 /*
1825  * This function reads "priority-ether-types" property from md. This property
1826  * is used to enable support for priority frames. Applications which need
1827  * guaranteed and timely delivery of certain high priority frames to/from
1828  * a vnet or vsw within ldoms, should configure this property by providing
1829  * the ether type(s) for which the priority facility is needed.
1830  * Normal data frames are delivered over a ldc channel using the descriptor
1831  * ring mechanism which is constrained by factors such as descriptor ring size,
1832  * the rate at which the ring is processed at the peer ldc end point, etc.
1833  * The priority mechanism provides an Out-Of-Band path to send/receive frames
1834  * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the
1835  * descriptor ring path and enables a more reliable and timely delivery of
1836  * frames to the peer.
1837  */
1838 static void
1839 vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
1840 {
1841 	int		rv;
1842 	uint16_t	*types;
1843 	uint64_t	*data;
1844 	int		size;
1845 	int		i;
1846 	size_t		mblk_sz;
1847 
1848 	rv = md_get_prop_data(mdp, node, pri_types_propname,
1849 	    (uint8_t **)&data, &size);
1850 	if (rv != 0) {
1851 		/*
1852 		 * Property may not exist if we are running pre-ldoms1.1 f/w.
1853 		 * Check if 'vsw_pri_eth_type' has been set in that case.
1854 		 */
1855 		if (vsw_pri_eth_type != 0) {
1856 			size = sizeof (vsw_pri_eth_type);
1857 			data = &vsw_pri_eth_type;
1858 		} else {
1859 			D3(vswp, "%s: prop(%s) not found", __func__,
1860 			    pri_types_propname);
1861 			size = 0;
1862 		}
1863 	}
1864 
1865 	if (size == 0) {
1866 		vswp->pri_num_types = 0;
1867 		return;
1868 	}
1869 
1870 	/*
1871 	 * we have some priority-ether-types defined;
1872 	 * allocate a table of these types and also
1873 	 * allocate a pool of mblks to transmit these
1874 	 * priority packets.
1875 	 */
1876 	size /= sizeof (uint64_t);
1877 	vswp->pri_num_types = size;
1878 	vswp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP);
1879 	for (i = 0, types = vswp->pri_types; i < size; i++) {
1880 		types[i] = data[i] & 0xFFFF;
1881 	}
1882 	mblk_sz = (VIO_PKT_DATA_HDRSIZE + ETHERMAX + 7) & ~7;
1883 	(void) vio_create_mblks(vsw_pri_tx_nmblks, mblk_sz, &vswp->pri_tx_vmp);
1884 }
1885 
1886 /*
1887  * Check to see if the relevant properties in the specified node have
1888  * changed, and if so take the appropriate action.
1889  *
1890  * If any of the properties are missing or invalid we don't take
1891  * any action, as this function should only be invoked when modifications
1892  * have been made to what we assume is a working configuration, which
1893  * we leave active.
1894  *
1895  * Note it is legal for this routine to be invoked even if none of the
1896  * properties in the port node within the MD have actually changed.
1897  */
1898 static void
1899 vsw_update_md_prop(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
1900 {
1901 	char		physname[LIFNAMSIZ];
1902 	char		drv[LIFNAMSIZ];
1903 	uint_t		ddi_instance;
1904 	uint8_t		new_smode[NUM_SMODES];
1905 	int		i, smode_num = 0;
1906 	uint64_t 	macaddr = 0;
1907 	enum		{MD_init = 0x1,
1908 				MD_physname = 0x2,
1909 				MD_macaddr = 0x4,
1910 				MD_smode = 0x8,
1911 				MD_vlans = 0x10} updated;
1912 	int		rv;
1913 	uint16_t	pvid;
1914 	uint16_t	*vids;
1915 	uint16_t	nvids;
1916 
1917 	updated = MD_init;
1918 
1919 	D1(vswp, "%s: enter", __func__);
1920 
1921 	/*
1922 	 * Check if name of physical device in MD has changed.
1923 	 */
1924 	if (vsw_get_md_physname(vswp, mdp, node, (char *)&physname) == 0) {
1925 		/*
1926 		 * Do basic sanity check on new device name/instance,
1927 		 * if its non NULL. It is valid for the device name to
1928 		 * have changed from a non NULL to a NULL value, i.e.
1929 		 * the vsw is being changed to 'routed' mode.
1930 		 */
1931 		if ((strlen(physname) != 0) &&
1932 		    (ddi_parse(physname, drv,
1933 		    &ddi_instance) != DDI_SUCCESS)) {
1934 			cmn_err(CE_WARN, "!vsw%d: physical device %s is not"
1935 			    " a valid device name/instance",
1936 			    vswp->instance, physname);
1937 			goto fail_reconf;
1938 		}
1939 
1940 		if (strcmp(physname, vswp->physname)) {
1941 			D2(vswp, "%s: device name changed from %s to %s",
1942 			    __func__, vswp->physname, physname);
1943 
1944 			updated |= MD_physname;
1945 		} else {
1946 			D2(vswp, "%s: device name unchanged at %s",
1947 			    __func__, vswp->physname);
1948 		}
1949 	} else {
1950 		cmn_err(CE_WARN, "!vsw%d: Unable to read name of physical "
1951 		    "device from updated MD.", vswp->instance);
1952 		goto fail_reconf;
1953 	}
1954 
1955 	/*
1956 	 * Check if MAC address has changed.
1957 	 */
1958 	if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) {
1959 		cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD",
1960 		    vswp->instance);
1961 		goto fail_reconf;
1962 	} else {
1963 		uint64_t maddr = macaddr;
1964 		READ_ENTER(&vswp->if_lockrw);
1965 		for (i = ETHERADDRL - 1; i >= 0; i--) {
1966 			if (vswp->if_addr.ether_addr_octet[i]
1967 			    != (macaddr & 0xFF)) {
1968 				D2(vswp, "%s: octet[%d] 0x%x != 0x%x",
1969 				    __func__, i,
1970 				    vswp->if_addr.ether_addr_octet[i],
1971 				    (macaddr & 0xFF));
1972 				updated |= MD_macaddr;
1973 				macaddr = maddr;
1974 				break;
1975 			}
1976 			macaddr >>= 8;
1977 		}
1978 		RW_EXIT(&vswp->if_lockrw);
1979 		if (updated & MD_macaddr) {
1980 			vsw_save_lmacaddr(vswp, macaddr);
1981 		}
1982 	}
1983 
1984 	/*
1985 	 * Check if switching modes have changed.
1986 	 */
1987 	if (vsw_get_md_smodes(vswp, mdp, node,
1988 	    new_smode, &smode_num)) {
1989 		cmn_err(CE_WARN, "!vsw%d: Unable to read %s property from MD",
1990 		    vswp->instance, smode_propname);
1991 		goto fail_reconf;
1992 	} else {
1993 		ASSERT(smode_num != 0);
1994 		if (smode_num != vswp->smode_num) {
1995 			D2(vswp, "%s: number of modes changed from %d to %d",
1996 			    __func__, vswp->smode_num, smode_num);
1997 		}
1998 
1999 		for (i = 0; i < smode_num; i++) {
2000 			if (new_smode[i] != vswp->smode[i]) {
2001 				D2(vswp, "%s: mode changed from %d to %d",
2002 				    __func__, vswp->smode[i], new_smode[i]);
2003 				updated |= MD_smode;
2004 				break;
2005 			}
2006 		}
2007 	}
2008 
2009 	/* Read the vlan ids */
2010 	vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &pvid, &vids,
2011 	    &nvids, NULL);
2012 
2013 	/* Determine if there are any vlan id updates */
2014 	if ((pvid != vswp->pvid) ||		/* pvid changed? */
2015 	    (nvids != vswp->nvids) ||		/* # of vids changed? */
2016 	    ((nvids != 0) && (vswp->nvids != 0) &&	/* vids changed? */
2017 	    bcmp(vids, vswp->vids, sizeof (uint16_t) * nvids))) {
2018 		updated |= MD_vlans;
2019 	}
2020 
2021 	/*
2022 	 * Now make any changes which are needed...
2023 	 */
2024 
2025 	if (updated & (MD_physname | MD_smode)) {
2026 
2027 		/*
2028 		 * Stop any pending timeout to setup switching mode.
2029 		 */
2030 		vsw_stop_switching_timeout(vswp);
2031 
2032 		/*
2033 		 * Remove unicst, mcst addrs of vsw interface
2034 		 * and ports from the physdev.
2035 		 */
2036 		vsw_unset_addrs(vswp);
2037 
2038 		/*
2039 		 * Stop, detach and close the old device..
2040 		 */
2041 		mutex_enter(&vswp->mac_lock);
2042 
2043 		vsw_mac_detach(vswp);
2044 		vsw_mac_close(vswp);
2045 
2046 		mutex_exit(&vswp->mac_lock);
2047 
2048 		/*
2049 		 * Update phys name.
2050 		 */
2051 		if (updated & MD_physname) {
2052 			cmn_err(CE_NOTE, "!vsw%d: changing from %s to %s",
2053 			    vswp->instance, vswp->physname, physname);
2054 			(void) strncpy(vswp->physname,
2055 			    physname, strlen(physname) + 1);
2056 		}
2057 
2058 		/*
2059 		 * Update array with the new switch mode values.
2060 		 */
2061 		if (updated & MD_smode) {
2062 			for (i = 0; i < smode_num; i++)
2063 				vswp->smode[i] = new_smode[i];
2064 
2065 			vswp->smode_num = smode_num;
2066 			vswp->smode_idx = 0;
2067 		}
2068 
2069 		/*
2070 		 * ..and attach, start the new device.
2071 		 */
2072 		rv = vsw_setup_switching(vswp);
2073 		if (rv == EAGAIN) {
2074 			/*
2075 			 * Unable to setup switching mode.
2076 			 * As the error is EAGAIN, schedule a timeout to retry
2077 			 * and return. Programming addresses of ports and
2078 			 * vsw interface will be done when the timeout handler
2079 			 * completes successfully.
2080 			 */
2081 			mutex_enter(&vswp->swtmout_lock);
2082 
2083 			vswp->swtmout_enabled = B_TRUE;
2084 			vswp->swtmout_id =
2085 			    timeout(vsw_setup_switching_timeout, vswp,
2086 			    (vsw_setup_switching_delay *
2087 			    drv_usectohz(MICROSEC)));
2088 
2089 			mutex_exit(&vswp->swtmout_lock);
2090 
2091 			return;
2092 
2093 		} else if (rv) {
2094 			goto fail_update;
2095 		}
2096 
2097 		/*
2098 		 * program unicst, mcst addrs of vsw interface
2099 		 * and ports in the physdev.
2100 		 */
2101 		vsw_set_addrs(vswp);
2102 
2103 	} else if (updated & MD_macaddr) {
2104 		/*
2105 		 * We enter here if only MD_macaddr is exclusively updated.
2106 		 * If MD_physname and/or MD_smode are also updated, then
2107 		 * as part of that, we would have implicitly processed
2108 		 * MD_macaddr update (above).
2109 		 */
2110 		cmn_err(CE_NOTE, "!vsw%d: changing mac address to 0x%lx",
2111 		    vswp->instance, macaddr);
2112 
2113 		READ_ENTER(&vswp->if_lockrw);
2114 		if (vswp->if_state & VSW_IF_UP) {
2115 
2116 			mutex_enter(&vswp->hw_lock);
2117 			/*
2118 			 * Remove old mac address of vsw interface
2119 			 * from the physdev
2120 			 */
2121 			(void) vsw_unset_hw(vswp, NULL, VSW_LOCALDEV);
2122 			/*
2123 			 * Program new mac address of vsw interface
2124 			 * in the physdev
2125 			 */
2126 			rv = vsw_set_hw(vswp, NULL, VSW_LOCALDEV);
2127 			mutex_exit(&vswp->hw_lock);
2128 			if (rv != 0) {
2129 				cmn_err(CE_NOTE,
2130 				    "!vsw%d: failed to program interface "
2131 				    "unicast address\n", vswp->instance);
2132 			}
2133 			/*
2134 			 * Notify the MAC layer of the changed address.
2135 			 */
2136 			mac_unicst_update(vswp->if_mh,
2137 			    (uint8_t *)&vswp->if_addr);
2138 
2139 		}
2140 		RW_EXIT(&vswp->if_lockrw);
2141 
2142 	}
2143 
2144 	if (updated & MD_vlans) {
2145 		/* Remove existing vlan ids from the hash table. */
2146 		vsw_vlan_remove_ids(vswp, VSW_LOCALDEV);
2147 
2148 		/* save the new vlan ids */
2149 		vswp->pvid = pvid;
2150 		if (vswp->nvids != 0) {
2151 			kmem_free(vswp->vids, sizeof (uint16_t) * vswp->nvids);
2152 			vswp->nvids = 0;
2153 		}
2154 		if (nvids != 0) {
2155 			vswp->nvids = nvids;
2156 			vswp->vids = vids;
2157 		}
2158 
2159 		/* add these new vlan ids into hash table */
2160 		vsw_vlan_add_ids(vswp, VSW_LOCALDEV);
2161 	} else {
2162 		if (nvids != 0) {
2163 			kmem_free(vids, sizeof (uint16_t) * nvids);
2164 		}
2165 	}
2166 
2167 	return;
2168 
2169 fail_reconf:
2170 	cmn_err(CE_WARN, "!vsw%d: configuration unchanged", vswp->instance);
2171 	return;
2172 
2173 fail_update:
2174 	cmn_err(CE_WARN, "!vsw%d: re-configuration failed",
2175 	    vswp->instance);
2176 }
2177 
2178 /*
2179  * Read the port's md properties.
2180  */
2181 static int
2182 vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp,
2183 	md_t *mdp, mde_cookie_t *node)
2184 {
2185 	uint64_t		ldc_id;
2186 	uint8_t			*addrp;
2187 	int			i, addrsz;
2188 	int			num_nodes = 0, nchan = 0;
2189 	int			listsz = 0;
2190 	mde_cookie_t		*listp = NULL;
2191 	struct ether_addr	ea;
2192 	uint64_t		macaddr;
2193 	uint64_t		inst = 0;
2194 
2195 	if (md_get_prop_val(mdp, *node, id_propname, &inst)) {
2196 		DWARN(vswp, "%s: prop(%s) not found", __func__,
2197 		    id_propname);
2198 		return (1);
2199 	}
2200 
2201 	/*
2202 	 * Find the channel endpoint node(s) (which should be under this
2203 	 * port node) which contain the channel id(s).
2204 	 */
2205 	if ((num_nodes = md_node_count(mdp)) <= 0) {
2206 		DERR(vswp, "%s: invalid number of nodes found (%d)",
2207 		    __func__, num_nodes);
2208 		return (1);
2209 	}
2210 
2211 	D2(vswp, "%s: %d nodes found", __func__, num_nodes);
2212 
2213 	/* allocate enough space for node list */
2214 	listsz = num_nodes * sizeof (mde_cookie_t);
2215 	listp = kmem_zalloc(listsz, KM_SLEEP);
2216 
2217 	nchan = md_scan_dag(mdp, *node, md_find_name(mdp, chan_propname),
2218 	    md_find_name(mdp, "fwd"), listp);
2219 
2220 	if (nchan <= 0) {
2221 		DWARN(vswp, "%s: no %s nodes found", __func__, chan_propname);
2222 		kmem_free(listp, listsz);
2223 		return (1);
2224 	}
2225 
2226 	D2(vswp, "%s: %d %s nodes found", __func__, nchan, chan_propname);
2227 
2228 	/* use property from first node found */
2229 	if (md_get_prop_val(mdp, listp[0], id_propname, &ldc_id)) {
2230 		DWARN(vswp, "%s: prop(%s) not found\n", __func__,
2231 		    id_propname);
2232 		kmem_free(listp, listsz);
2233 		return (1);
2234 	}
2235 
2236 	/* don't need list any more */
2237 	kmem_free(listp, listsz);
2238 
2239 	D2(vswp, "%s: ldc_id 0x%llx", __func__, ldc_id);
2240 
2241 	/* read mac-address property */
2242 	if (md_get_prop_data(mdp, *node, remaddr_propname,
2243 	    &addrp, &addrsz)) {
2244 		DWARN(vswp, "%s: prop(%s) not found",
2245 		    __func__, remaddr_propname);
2246 		return (1);
2247 	}
2248 
2249 	if (addrsz < ETHERADDRL) {
2250 		DWARN(vswp, "%s: invalid address size", __func__);
2251 		return (1);
2252 	}
2253 
2254 	macaddr = *((uint64_t *)addrp);
2255 	D2(vswp, "%s: remote mac address 0x%llx", __func__, macaddr);
2256 
2257 	for (i = ETHERADDRL - 1; i >= 0; i--) {
2258 		ea.ether_addr_octet[i] = macaddr & 0xFF;
2259 		macaddr >>= 8;
2260 	}
2261 
2262 	/* now update all properties into the port */
2263 	portp->p_vswp = vswp;
2264 	portp->p_instance = inst;
2265 	portp->addr_set = VSW_ADDR_UNSET;
2266 	ether_copy(&ea, &portp->p_macaddr);
2267 	if (nchan > VSW_PORT_MAX_LDCS) {
2268 		D2(vswp, "%s: using first of %d ldc ids",
2269 		    __func__, nchan);
2270 		nchan = VSW_PORT_MAX_LDCS;
2271 	}
2272 	portp->num_ldcs = nchan;
2273 	portp->ldc_ids =
2274 	    kmem_zalloc(sizeof (uint64_t) * nchan, KM_SLEEP);
2275 	bcopy(&ldc_id, (portp->ldc_ids), sizeof (uint64_t) * nchan);
2276 
2277 	/* read vlan id properties of this port node */
2278 	vsw_vlan_read_ids(portp, VSW_VNETPORT, mdp, *node, &portp->pvid,
2279 	    &portp->vids, &portp->nvids, NULL);
2280 
2281 	return (0);
2282 }
2283 
2284 /*
2285  * Add a new port to the system.
2286  *
2287  * Returns 0 on success, 1 on failure.
2288  */
2289 int
2290 vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node)
2291 {
2292 	vsw_port_t	*portp;
2293 	int		rv;
2294 
2295 	portp = kmem_zalloc(sizeof (vsw_port_t), KM_SLEEP);
2296 
2297 	rv = vsw_port_read_props(portp, vswp, mdp, node);
2298 	if (rv != 0) {
2299 		kmem_free(portp, sizeof (*portp));
2300 		return (1);
2301 	}
2302 
2303 	rv = vsw_port_attach(portp);
2304 	if (rv != 0) {
2305 		DERR(vswp, "%s: failed to attach port", __func__);
2306 		return (1);
2307 	}
2308 
2309 	return (0);
2310 }
2311 
2312 static int
2313 vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex,
2314 	md_t *prev_mdp, mde_cookie_t prev_mdex)
2315 {
2316 	uint64_t	cport_num;
2317 	uint64_t	pport_num;
2318 	vsw_port_list_t	*plistp;
2319 	vsw_port_t	*portp;
2320 	boolean_t	updated_vlans = B_FALSE;
2321 	uint16_t	pvid;
2322 	uint16_t	*vids;
2323 	uint16_t	nvids;
2324 
2325 	/*
2326 	 * For now, we get port updates only if vlan ids changed.
2327 	 * We read the port num and do some sanity check.
2328 	 */
2329 	if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) {
2330 		return (1);
2331 	}
2332 
2333 	if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) {
2334 		return (1);
2335 	}
2336 	if (cport_num != pport_num)
2337 		return (1);
2338 
2339 	plistp = &(vswp->plist);
2340 
2341 	READ_ENTER(&plistp->lockrw);
2342 
2343 	portp = vsw_lookup_port(vswp, cport_num);
2344 	if (portp == NULL) {
2345 		RW_EXIT(&plistp->lockrw);
2346 		return (1);
2347 	}
2348 
2349 	/* Read the vlan ids */
2350 	vsw_vlan_read_ids(portp, VSW_VNETPORT, curr_mdp, curr_mdex, &pvid,
2351 	    &vids, &nvids, NULL);
2352 
2353 	/* Determine if there are any vlan id updates */
2354 	if ((pvid != portp->pvid) ||		/* pvid changed? */
2355 	    (nvids != portp->nvids) ||		/* # of vids changed? */
2356 	    ((nvids != 0) && (portp->nvids != 0) &&	/* vids changed? */
2357 	    bcmp(vids, portp->vids, sizeof (uint16_t) * nvids))) {
2358 		updated_vlans = B_TRUE;
2359 	}
2360 
2361 	if (updated_vlans == B_FALSE) {
2362 		RW_EXIT(&plistp->lockrw);
2363 		return (1);
2364 	}
2365 
2366 	/* Remove existing vlan ids from the hash table. */
2367 	vsw_vlan_remove_ids(portp, VSW_VNETPORT);
2368 
2369 	/* save the new vlan ids */
2370 	portp->pvid = pvid;
2371 	if (portp->nvids != 0) {
2372 		kmem_free(portp->vids, sizeof (uint16_t) * portp->nvids);
2373 		portp->nvids = 0;
2374 	}
2375 	if (nvids != 0) {
2376 		portp->vids = kmem_zalloc(sizeof (uint16_t) * nvids, KM_SLEEP);
2377 		bcopy(vids, portp->vids, sizeof (uint16_t) * nvids);
2378 		portp->nvids = nvids;
2379 		kmem_free(vids, sizeof (uint16_t) * nvids);
2380 	}
2381 
2382 	/* add these new vlan ids into hash table */
2383 	vsw_vlan_add_ids(portp, VSW_VNETPORT);
2384 
2385 	/* reset the port if it is vlan unaware (ver < 1.3) */
2386 	vsw_vlan_unaware_port_reset(portp);
2387 
2388 	RW_EXIT(&plistp->lockrw);
2389 
2390 	return (0);
2391 }
2392 
2393 /*
2394  * vsw_mac_rx -- A common function to send packets to the interface.
2395  * By default this function check if the interface is UP or not, the
2396  * rest of the behaviour depends on the flags as below:
2397  *
2398  *	VSW_MACRX_PROMISC -- Check if the promisc mode set or not.
2399  *	VSW_MACRX_COPYMSG -- Make a copy of the message(s).
2400  *	VSW_MACRX_FREEMSG -- Free if the messages cannot be sent up the stack.
2401  */
2402 void
2403 vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh,
2404     mblk_t *mp, vsw_macrx_flags_t flags)
2405 {
2406 	mblk_t		*mpt;
2407 
2408 	D1(vswp, "%s:enter\n", __func__);
2409 	READ_ENTER(&vswp->if_lockrw);
2410 	/* Check if the interface is up */
2411 	if (!(vswp->if_state & VSW_IF_UP)) {
2412 		RW_EXIT(&vswp->if_lockrw);
2413 		/* Free messages only if FREEMSG flag specified */
2414 		if (flags & VSW_MACRX_FREEMSG) {
2415 			freemsgchain(mp);
2416 		}
2417 		D1(vswp, "%s:exit\n", __func__);
2418 		return;
2419 	}
2420 	/*
2421 	 * If PROMISC flag is passed, then check if
2422 	 * the interface is in the PROMISC mode.
2423 	 * If not, drop the messages.
2424 	 */
2425 	if (flags & VSW_MACRX_PROMISC) {
2426 		if (!(vswp->if_state & VSW_IF_PROMISC)) {
2427 			RW_EXIT(&vswp->if_lockrw);
2428 			/* Free messages only if FREEMSG flag specified */
2429 			if (flags & VSW_MACRX_FREEMSG) {
2430 				freemsgchain(mp);
2431 			}
2432 			D1(vswp, "%s:exit\n", __func__);
2433 			return;
2434 		}
2435 	}
2436 	RW_EXIT(&vswp->if_lockrw);
2437 	/*
2438 	 * If COPYMSG flag is passed, then make a copy
2439 	 * of the message chain and send up the copy.
2440 	 */
2441 	if (flags & VSW_MACRX_COPYMSG) {
2442 		mp = copymsgchain(mp);
2443 		if (mp == NULL) {
2444 			D1(vswp, "%s:exit\n", __func__);
2445 			return;
2446 		}
2447 	}
2448 
2449 	D2(vswp, "%s: sending up stack", __func__);
2450 
2451 	mpt = NULL;
2452 	(void) vsw_vlan_frame_untag(vswp, VSW_LOCALDEV, &mp, &mpt);
2453 	if (mp != NULL) {
2454 		mac_rx(vswp->if_mh, mrh, mp);
2455 	}
2456 	D1(vswp, "%s:exit\n", __func__);
2457 }
2458 
2459 /* copy mac address of vsw into soft state structure */
2460 static void
2461 vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr)
2462 {
2463 	int	i;
2464 
2465 	WRITE_ENTER(&vswp->if_lockrw);
2466 	for (i = ETHERADDRL - 1; i >= 0; i--) {
2467 		vswp->if_addr.ether_addr_octet[i] = macaddr & 0xFF;
2468 		macaddr >>= 8;
2469 	}
2470 	RW_EXIT(&vswp->if_lockrw);
2471 }
2472