xref: /titanic_50/usr/src/uts/sun4v/io/vsw_phys.c (revision b510adae7e8895b2bf58eda3537fd56df35302e4)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/errno.h>
29 #include <sys/debug.h>
30 #include <sys/time.h>
31 #include <sys/sysmacros.h>
32 #include <sys/systm.h>
33 #include <sys/user.h>
34 #include <sys/stropts.h>
35 #include <sys/stream.h>
36 #include <sys/strlog.h>
37 #include <sys/strsubr.h>
38 #include <sys/cmn_err.h>
39 #include <sys/cpu.h>
40 #include <sys/kmem.h>
41 #include <sys/conf.h>
42 #include <sys/ddi.h>
43 #include <sys/sunddi.h>
44 #include <sys/ksynch.h>
45 #include <sys/stat.h>
46 #include <sys/kstat.h>
47 #include <sys/vtrace.h>
48 #include <sys/strsun.h>
49 #include <sys/dlpi.h>
50 #include <sys/ethernet.h>
51 #include <net/if.h>
52 #include <netinet/arp.h>
53 #include <inet/arp.h>
54 #include <sys/varargs.h>
55 #include <sys/machsystm.h>
56 #include <sys/modctl.h>
57 #include <sys/modhash.h>
58 #include <sys/mac_client.h>
59 #include <sys/mac_provider.h>
60 #include <sys/mac_ether.h>
61 #include <sys/taskq.h>
62 #include <sys/note.h>
63 #include <sys/mach_descrip.h>
64 #include <sys/mac.h>
65 #include <sys/mdeg.h>
66 #include <sys/vsw.h>
67 #include <sys/vlan.h>
68 
69 /* MAC Ring table functions. */
70 static void vsw_port_rx_cb(void *, mac_resource_handle_t, mblk_t *,
71     boolean_t);
72 static void vsw_if_rx_cb(void *, mac_resource_handle_t, mblk_t *, boolean_t);
73 
74 /* MAC layer routines */
75 static int vsw_set_port_hw_addr(vsw_port_t *port);
76 static int vsw_set_if_hw_addr(vsw_t *vswp);
77 static	void vsw_unset_hw_addr(vsw_t *, vsw_port_t *, int);
78 static int vsw_maccl_open(vsw_t *vswp, vsw_port_t *port, int type);
79 static void vsw_maccl_close(vsw_t *vswp, vsw_port_t *port, int type);
80 static void vsw_mac_multicast_add_all(vsw_t *vswp, vsw_port_t *portp, int type);
81 static void vsw_mac_multicast_remove_all(vsw_t *vswp,
82     vsw_port_t *portp, int type);
83 static void vsw_mac_add_vlans(vsw_t *vswp, mac_client_handle_t mch,
84     uint8_t *macaddr, uint16_t flags, vsw_vlanid_t *vids, int nvids);
85 static void vsw_mac_remove_vlans(mac_client_handle_t mch, vsw_vlanid_t *vids,
86     int nvids);
87 static	void vsw_mac_set_mtu(vsw_t *vswp, uint32_t mtu);
88 
89 /* Support functions */
90 int vsw_set_hw(vsw_t *, vsw_port_t *, int);
91 void vsw_unset_hw(vsw_t *, vsw_port_t *, int);
92 void vsw_reconfig_hw(vsw_t *);
93 int vsw_mac_open(vsw_t *vswp);
94 void vsw_mac_close(vsw_t *vswp);
95 int vsw_mac_multicast_add(vsw_t *vswp, vsw_port_t *port, mcst_addr_t *mcst_p,
96     int type);
97 void vsw_mac_multicast_remove(vsw_t *vswp, vsw_port_t *port,
98     mcst_addr_t *mcst_p, int type);
99 int vsw_mac_client_init(vsw_t *vswp, vsw_port_t *port, int type);
100 void vsw_mac_client_cleanup(vsw_t *vswp, vsw_port_t *port, int type);
101 void vsw_mac_cleanup_ports(vsw_t *vswp);
102 void vsw_unset_addrs(vsw_t *vswp);
103 void vsw_set_addrs(vsw_t *vswp);
104 mblk_t *vsw_tx_msg(vsw_t *, mblk_t *, int, vsw_port_t *);
105 void vsw_publish_macaddr(vsw_t *vswp, vsw_port_t *portp);
106 void vsw_port_mac_reconfig(vsw_port_t *portp, boolean_t update_vlans,
107     uint16_t new_pvid, vsw_vlanid_t *new_vids, int new_nvids);
108 void vsw_mac_port_reconfig_vlans(vsw_port_t *portp, uint16_t new_pvid,
109     vsw_vlanid_t *new_vids, int new_nvids);
110 void vsw_if_mac_reconfig(vsw_t *vswp, boolean_t update_vlans,
111     uint16_t new_pvid, vsw_vlanid_t *new_vids, int new_nvids);
112 
113 /*
114  * Functions imported from other files.
115  */
116 extern int vsw_portsend(vsw_port_t *port, mblk_t *mp);
117 extern void vsw_hio_stop_port(vsw_port_t *portp);
118 extern void vsw_hio_port_reset(vsw_port_t *portp, boolean_t immediate);
119 extern uint32_t vsw_publish_macaddr_count;
120 extern uint32_t vsw_vlan_frame_untag(void *arg, int type, mblk_t **np,
121 	mblk_t **npt);
122 static char mac_mtu_propname[] = "mtu";
123 
124 /*
125  * Tunables used in this file.
126  */
127 extern int vsw_mac_open_retries;
128 
129 
130 #define	WRITE_MACCL_ENTER(vswp, port, type)	\
131 	(type == VSW_LOCALDEV) ?  rw_enter(&vswp->maccl_rwlock, RW_WRITER) :\
132 	rw_enter(&port->maccl_rwlock, RW_WRITER)
133 
134 #define	READ_MACCL_ENTER(vswp, port, type)	\
135 	(type == VSW_LOCALDEV) ?  rw_enter(&vswp->maccl_rwlock, RW_READER) :\
136 	rw_enter(&port->maccl_rwlock, RW_READER)
137 
138 #define	RW_MACCL_EXIT(vswp, port, type)	\
139 	(type == VSW_LOCALDEV) ?  rw_exit(&vswp->maccl_rwlock) :	\
140 	rw_exit(&port->maccl_rwlock)
141 
142 
143 /*
144  * Locking strategy in this file is explained as follows:
145  *	 - A global lock(vswp->mac_lock) is used to protect the
146  *	   MAC calls that deal with entire device. That is, the
147  *	   operations that deal with mac_handle which include
148  *	   mac_open()/close() and mac_client_open().
149  *
150  *	- A per port/interface RW lock(maccl_rwlock) is used protect
151  *	  the operations that deal with the MAC client.
152  *
153  *	When both mac_lock and maccl_rwlock need to be held, the
154  *	mac_lock need be acquired first and then maccl_rwlock. That is,
155  *		mac_lock---->maccl_rwlock
156  *
157  *	The 'mca_lock' that protects the mcast list is also acquired
158  *	within the context of maccl_rwlock. The hierarchy for this
159  *	one is as below:
160  *		maccl_rwlock---->mca_lock
161  */
162 
163 
164 /*
165  * Program unicast and multicast addresses of vsw interface and the ports
166  * into the network device.
167  */
168 void
169 vsw_set_addrs(vsw_t *vswp)
170 {
171 	vsw_port_list_t	*plist = &vswp->plist;
172 	vsw_port_t	*port;
173 	int		rv;
174 
175 	READ_ENTER(&vswp->if_lockrw);
176 
177 	if (vswp->if_state & VSW_IF_UP) {
178 
179 		/* Open a mac client and program addresses */
180 		rv = vsw_mac_client_init(vswp, NULL, VSW_LOCALDEV);
181 		if (rv != 0) {
182 			cmn_err(CE_NOTE,
183 			    "!vsw%d: failed to program interface "
184 			    "unicast address\n", vswp->instance);
185 		}
186 
187 		/*
188 		 * Notify the MAC layer of the changed address.
189 		 */
190 		if (rv == 0) {
191 			mac_unicst_update(vswp->if_mh,
192 			    (uint8_t *)&vswp->if_addr);
193 		}
194 
195 	}
196 
197 	RW_EXIT(&vswp->if_lockrw);
198 
199 	WRITE_ENTER(&plist->lockrw);
200 
201 	/* program unicast address of ports in the network device */
202 	for (port = plist->head; port != NULL; port = port->p_next) {
203 		if (port->addr_set) /* addr already set */
204 			continue;
205 
206 		/* Open a mac client and program addresses */
207 		rv = vsw_mac_client_init(vswp, port, VSW_VNETPORT);
208 		if (rv != 0) {
209 			cmn_err(CE_NOTE,
210 			    "!vsw%d: failed to program port(%d) "
211 			    "unicast address\n", vswp->instance,
212 			    port->p_instance);
213 		}
214 	}
215 	/* announce macaddr of vnets to the physical switch */
216 	if (vsw_publish_macaddr_count != 0) {	/* enabled */
217 		for (port = plist->head; port != NULL; port = port->p_next) {
218 			vsw_publish_macaddr(vswp, port);
219 		}
220 	}
221 
222 	RW_EXIT(&plist->lockrw);
223 }
224 
225 /*
226  * Remove unicast, multicast addresses and close mac clients
227  * for the vsw interface and all ports.
228  */
229 void
230 vsw_unset_addrs(vsw_t *vswp)
231 {
232 	READ_ENTER(&vswp->if_lockrw);
233 	if (vswp->if_state & VSW_IF_UP) {
234 
235 		/* Cleanup and close the mac client for the interface */
236 		vsw_mac_client_cleanup(vswp, NULL, VSW_LOCALDEV);
237 	}
238 	RW_EXIT(&vswp->if_lockrw);
239 
240 	/* Cleanup and close the mac clients for all ports */
241 	vsw_mac_cleanup_ports(vswp);
242 }
243 
244 /*
245  * Open the underlying network device for access in layer2 mode.
246  * Returns:
247  *	0 on success
248  *	EAGAIN if mac_open() fails due to the device being not available yet.
249  *	EIO on any other failures.
250  */
251 int
252 vsw_mac_open(vsw_t *vswp)
253 {
254 	int			rv;
255 
256 	ASSERT(MUTEX_HELD(&vswp->mac_lock));
257 
258 	if (vswp->mh != NULL) {
259 		/* already open */
260 		return (0);
261 	}
262 
263 	if (vswp->mac_open_retries++ >= vsw_mac_open_retries) {
264 		/* exceeded max retries */
265 		return (EIO);
266 	}
267 
268 	if ((rv = mac_open_by_linkname(vswp->physname, &vswp->mh)) != 0) {
269 		/*
270 		 * If mac_open() failed and the error indicates that either
271 		 * the dlmgmtd door or the device is not available yet, we
272 		 * return EAGAIN to indicate that mac_open() needs to be
273 		 * retried. For example, this may happen during boot up, if
274 		 * the required link aggregation groups(devices) have not
275 		 * been created yet.
276 		 */
277 		if (rv == ENOENT || rv == EBADF) {
278 			return (EAGAIN);
279 		} else {
280 			cmn_err(CE_WARN, "vsw%d: mac_open %s failed rv:%x",
281 			    vswp->instance, vswp->physname, rv);
282 			return (EIO);
283 		}
284 	}
285 	vswp->mac_open_retries = 0;
286 
287 	vsw_mac_set_mtu(vswp, vswp->mtu);
288 
289 	return (0);
290 }
291 
292 /*
293  * Close the underlying physical device.
294  */
295 void
296 vsw_mac_close(vsw_t *vswp)
297 {
298 	ASSERT(MUTEX_HELD(&vswp->mac_lock));
299 
300 	if (vswp->mh != NULL) {
301 		if (vswp->mtu != vswp->mtu_physdev_orig) {
302 			vsw_mac_set_mtu(vswp, vswp->mtu_physdev_orig);
303 		}
304 		mac_close(vswp->mh);
305 		vswp->mh = NULL;
306 	}
307 }
308 
309 /*
310  * Add multicast addr.
311  */
312 int
313 vsw_mac_multicast_add(vsw_t *vswp, vsw_port_t *port, mcst_addr_t *mcst_p,
314     int type)
315 {
316 	int			ret = 0;
317 	mac_client_handle_t	mch;
318 
319 	WRITE_MACCL_ENTER(vswp, port, type);
320 
321 	mch = (type == VSW_LOCALDEV) ? vswp->mch : port->p_mch;
322 
323 	if (mch != NULL) {
324 		ret = mac_multicast_add(mch, mcst_p->mca.ether_addr_octet);
325 		if (ret != 0) {
326 			cmn_err(CE_WARN, "!vsw%d: unable to "
327 			    "program multicast address(%s) err=%d",
328 			    vswp->instance,
329 			    ether_sprintf((void *)&mcst_p->mca), ret);
330 			RW_MACCL_EXIT(vswp, port, type);
331 			return (ret);
332 		}
333 		mcst_p->mac_added = B_TRUE;
334 	}
335 
336 	RW_MACCL_EXIT(vswp, port, type);
337 	return (ret);
338 }
339 
340 /*
341  * Remove multicast addr.
342  */
343 void
344 vsw_mac_multicast_remove(vsw_t *vswp, vsw_port_t *port, mcst_addr_t *mcst_p,
345     int type)
346 {
347 	mac_client_handle_t	mch;
348 
349 	WRITE_MACCL_ENTER(vswp, port, type);
350 	mch = (type == VSW_LOCALDEV) ? vswp->mch : port->p_mch;
351 
352 	if (mch != NULL && mcst_p->mac_added) {
353 		mac_multicast_remove(mch, mcst_p->mca.ether_addr_octet);
354 		mcst_p->mac_added = B_FALSE;
355 	}
356 	RW_MACCL_EXIT(vswp, port, type);
357 }
358 
359 
360 /*
361  * Add all multicast addresses of the port.
362  */
363 static void
364 vsw_mac_multicast_add_all(vsw_t *vswp, vsw_port_t *portp, int type)
365 {
366 	mcst_addr_t		*mcap;
367 	mac_client_handle_t	mch;
368 	kmutex_t		*mca_lockp;
369 	int			rv;
370 
371 	ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT));
372 	if (type == VSW_LOCALDEV) {
373 		ASSERT(RW_WRITE_HELD(&vswp->maccl_rwlock));
374 		mch = vswp->mch;
375 		mcap = vswp->mcap;
376 		mca_lockp = &vswp->mca_lock;
377 	} else {
378 		ASSERT(RW_WRITE_HELD(&portp->maccl_rwlock));
379 		mch = portp->p_mch;
380 		mcap = portp->mcap;
381 		mca_lockp = &portp->mca_lock;
382 	}
383 
384 	if (mch == NULL)
385 		return;
386 
387 	mutex_enter(mca_lockp);
388 	for (mcap = mcap; mcap != NULL; mcap = mcap->nextp) {
389 		if (mcap->mac_added)
390 			continue;
391 		rv = mac_multicast_add(mch, (uchar_t *)&mcap->mca);
392 		if (rv == 0) {
393 			mcap->mac_added = B_TRUE;
394 		} else {
395 			cmn_err(CE_WARN, "!vsw%d: unable to program "
396 			    "multicast address(%s) err=%d", vswp->instance,
397 			    ether_sprintf((void *)&mcap->mca), rv);
398 		}
399 	}
400 	mutex_exit(mca_lockp);
401 }
402 
403 /*
404  * Remove all multicast addresses of the port.
405  */
406 static void
407 vsw_mac_multicast_remove_all(vsw_t *vswp, vsw_port_t *portp, int type)
408 {
409 	mac_client_handle_t	mch;
410 	mcst_addr_t		*mcap;
411 	kmutex_t		*mca_lockp;
412 
413 	ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT));
414 	if (type == VSW_LOCALDEV) {
415 		ASSERT(RW_WRITE_HELD(&vswp->maccl_rwlock));
416 		mch = vswp->mch;
417 		mcap = vswp->mcap;
418 		mca_lockp = &vswp->mca_lock;
419 	} else {
420 		ASSERT(RW_WRITE_HELD(&portp->maccl_rwlock));
421 		mch = portp->p_mch;
422 		mcap = portp->mcap;
423 		mca_lockp = &portp->mca_lock;
424 	}
425 
426 	if (mch == NULL)
427 		return;
428 
429 	mutex_enter(mca_lockp);
430 	for (; mcap != NULL; mcap = mcap->nextp) {
431 		if (!mcap->mac_added)
432 			continue;
433 		(void) mac_multicast_remove(mch, (uchar_t *)&mcap->mca);
434 		mcap->mac_added = B_FALSE;
435 	}
436 	mutex_exit(mca_lockp);
437 }
438 
439 /*
440  * Open a mac client and program uncast and multicast addresses
441  * for a port or the interface.
442  * Returns:
443  *	0 on success
444  *	non-zero for failure.
445  */
446 int
447 vsw_mac_client_init(vsw_t *vswp, vsw_port_t *port, int type)
448 {
449 	int rv;
450 
451 	mutex_enter(&vswp->mac_lock);
452 	WRITE_MACCL_ENTER(vswp, port, type);
453 	rv = vsw_maccl_open(vswp, port, type);
454 
455 	/* Release mac_lock now */
456 	mutex_exit(&vswp->mac_lock);
457 
458 	if (rv == 0) {
459 		(void) vsw_set_hw(vswp, port, type);
460 		vsw_mac_multicast_add_all(vswp, port, type);
461 	}
462 	RW_MACCL_EXIT(vswp, port, type);
463 	return (rv);
464 }
465 
466 /*
467  * Open a MAC client for a port or an interface.
468  * The flags and their purpose as below:
469  *
470  *	MAC_OPEN_FLAGS_NO_HWRINGS -- This flag is used by default
471  *	for all ports/interface so that they are associated with
472  *	default group & resources. It will not be used for the
473  *	ports that have HybridIO is enabled so that the h/w resources
474  *	assigned to it.
475  *
476  *	MAC_OPEN_FLAGS_SHARES_DESIRED -- This flag is used to indicate
477  *	that a port desires a Share. This will be the case with the
478  *	the ports that have hybrid mode enabled. This will only cause
479  *	MAC layer to allocate a share and corresponding resources
480  *	ahead of time.
481  *
482  *	MAC_UNICAST_TAG_DISABLE -- This flag is used for VLAN
483  *	support. It will cause MAC to not add any tags, but expect
484  *	vsw to tag the packets.
485  *
486  *	MAC_UNICAST_STRIP_DISABLE -- This flag is used for VLAN
487  *	support. It will case the MAC layer to not strip the tags.
488  *	Vsw may have to strip the tag for pvid case.
489  */
490 static int
491 vsw_maccl_open(vsw_t *vswp, vsw_port_t *port, int type)
492 {
493 	int		rv = 0;
494 	int		instance;
495 	char		mac_cl_name[MAXNAMELEN];
496 	const char	*dev_name;
497 	mac_client_handle_t *mchp;
498 	uint64_t flags = MAC_OPEN_FLAGS_NO_HWRINGS;
499 
500 	ASSERT(MUTEX_HELD(&vswp->mac_lock));
501 	if (vswp->mh == NULL) {
502 		/*
503 		 * In case net-dev is changed (either set to nothing or
504 		 * using aggregation device), return success here as the
505 		 * timeout mechanism will handle it.
506 		 */
507 		return (0);
508 	}
509 
510 	mchp = (type == VSW_LOCALDEV) ? &vswp->mch : &port->p_mch;
511 	if (*mchp != NULL) {
512 		/* already open */
513 		return (0);
514 	}
515 	dev_name = ddi_driver_name(vswp->dip);
516 	instance = ddi_get_instance(vswp->dip);
517 	if (type == VSW_VNETPORT) {
518 		if (port->p_hio_enabled == B_TRUE) {
519 			flags &= ~MAC_OPEN_FLAGS_NO_HWRINGS;
520 			flags |= MAC_OPEN_FLAGS_SHARES_DESIRED;
521 		}
522 		(void) snprintf(mac_cl_name, MAXNAMELEN, "%s%d%s%d", dev_name,
523 		    instance, "_port", port->p_instance);
524 	} else {
525 		(void) snprintf(mac_cl_name, MAXNAMELEN, "%s%s%d",
526 		    dev_name, "_if", instance);
527 	}
528 
529 	rv = mac_client_open(vswp->mh, mchp, mac_cl_name, flags);
530 	if (rv != 0) {
531 		cmn_err(CE_NOTE, "!vsw%d:%s mac_client_open() failed\n",
532 		    vswp->instance, mac_cl_name);
533 	}
534 	return (rv);
535 }
536 
537 /*
538  * Clean up by removing uncast, multicast addresses and
539  * closing the MAC client for a port or the interface.
540  */
541 void
542 vsw_mac_client_cleanup(vsw_t *vswp, vsw_port_t *port, int type)
543 {
544 	WRITE_MACCL_ENTER(vswp, port, type);
545 	vsw_unset_hw(vswp, port, type);
546 	vsw_maccl_close(vswp, port, type);
547 	vsw_mac_multicast_remove_all(vswp, port, type);
548 	RW_MACCL_EXIT(vswp, port, type);
549 }
550 
551 /*
552  * Close a MAC client for a port or an interface.
553  */
554 static void
555 vsw_maccl_close(vsw_t *vswp, vsw_port_t *port, int type)
556 {
557 	mac_client_handle_t *mchp;
558 
559 	ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT));
560 
561 	mchp = (type == VSW_LOCALDEV) ? &vswp->mch : &port->p_mch;
562 	if (*mchp != NULL) {
563 		mac_client_close(*mchp, 0);
564 		*mchp = NULL;
565 	}
566 }
567 
568 /*
569  * Cleanup MAC client related stuff for all ports.
570  */
571 void
572 vsw_mac_cleanup_ports(vsw_t *vswp)
573 {
574 	vsw_port_list_t		*plist = &vswp->plist;
575 	vsw_port_t		*port;
576 
577 	READ_ENTER(&plist->lockrw);
578 	for (port = plist->head; port != NULL; port = port->p_next) {
579 		vsw_mac_client_cleanup(vswp, port, VSW_VNETPORT);
580 	}
581 	RW_EXIT(&plist->lockrw);
582 }
583 
584 /*
585  * Depending on the mode specified, the capabilites and capacity
586  * of the underlying device setup the physical device.
587  *
588  * If in layer 3 mode, then do nothing.
589  *
590  * If in layer 2 mode, open a mac client and program the mac-address
591  * and vlan-ids. The MAC layer will take care of programming
592  * the address into h/w or set the h/w into promiscuous mode.
593  *
594  * Returns 0 success, 1 on failure.
595  */
596 int
597 vsw_set_hw(vsw_t *vswp, vsw_port_t *port, int type)
598 {
599 	int			err = 1;
600 
601 	D1(vswp, "%s: enter", __func__);
602 
603 	ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT));
604 
605 	if (vswp->smode == VSW_LAYER3)
606 		return (0);
607 
608 	if (type == VSW_VNETPORT) {
609 		ASSERT(port != NULL);
610 		err = vsw_set_port_hw_addr(port);
611 	} else {
612 		err = vsw_set_if_hw_addr(vswp);
613 	}
614 
615 	D1(vswp, "%s: exit", __func__);
616 	return (err);
617 }
618 
619 /*
620  * If in layer 3 mode do nothing.
621  *
622  * If in layer 2 switched mode remove the address from the physical
623  * device.
624  *
625  * If in layer 2 promiscuous mode disable promisc mode.
626  *
627  * Returns 0 on success.
628  */
629 void
630 vsw_unset_hw(vsw_t *vswp, vsw_port_t *port, int type)
631 {
632 	D1(vswp, "%s: enter", __func__);
633 
634 	ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT));
635 
636 	if (vswp->smode == VSW_LAYER3)
637 		return;
638 
639 	if (type == VSW_VNETPORT) {
640 		ASSERT(port != NULL);
641 		vsw_unset_hw_addr(vswp, port, type);
642 	} else {
643 		vsw_unset_hw_addr(vswp, NULL, type);
644 	}
645 
646 	D1(vswp, "%s: exit", __func__);
647 }
648 
649 /*
650  * Program the macaddress and vlans of a port.
651  *
652  * Returns 0 on sucess, 1 on failure.
653  */
654 static int
655 vsw_set_port_hw_addr(vsw_port_t *port)
656 {
657 	vsw_t			*vswp = port->p_vswp;
658 	mac_diag_t		diag;
659 	uint8_t			*macaddr;
660 	uint16_t		vid = VLAN_ID_NONE;
661 	int			rv;
662 	uint16_t		mac_flags = MAC_UNICAST_TAG_DISABLE |
663 	    MAC_UNICAST_STRIP_DISABLE;
664 
665 	D1(vswp, "%s: enter", __func__);
666 
667 	ASSERT(RW_WRITE_HELD(&port->maccl_rwlock));
668 	if (port->p_mch == NULL)
669 		return (0);
670 
671 	/*
672 	 * If the port has a specific 'pvid', then
673 	 * register with that vlan-id, otherwise register
674 	 * with VLAN_ID_NONE.
675 	 */
676 	if (port->pvid != vswp->default_vlan_id) {
677 		vid = port->pvid;
678 	}
679 	macaddr = (uint8_t *)port->p_macaddr.ether_addr_octet;
680 
681 	if (!(vswp->smode & VSW_LAYER2_PROMISC)) {
682 		mac_flags |= MAC_UNICAST_HW;
683 	}
684 
685 	if (port->addr_set == B_FALSE) {
686 		port->p_muh = NULL;
687 		rv = mac_unicast_add(port->p_mch, macaddr, mac_flags,
688 		    &port->p_muh, vid, &diag);
689 
690 		if (rv != 0) {
691 			cmn_err(CE_WARN, "vsw%d: Failed to program"
692 			    "macaddr,vid(%s, %d) err=%d",
693 			    vswp->instance, ether_sprintf((void *)macaddr),
694 			    vid, rv);
695 			return (rv);
696 		}
697 		port->addr_set = B_TRUE;
698 
699 		D2(vswp, "%s:programmed macaddr(%s) vid(%d) into device %s",
700 		    __func__, ether_sprintf((void *)macaddr), vid,
701 		    vswp->physname);
702 	}
703 
704 	/* Add vlans to the MAC layer */
705 	vsw_mac_add_vlans(vswp, port->p_mch, macaddr,
706 	    mac_flags, port->vids, port->nvids);
707 
708 	mac_rx_set(port->p_mch, vsw_port_rx_cb, (void *)port);
709 
710 	D1(vswp, "%s: exit", __func__);
711 	return (rv);
712 }
713 
714 /*
715  * Program the macaddress and vlans of a port.
716  *
717  * Returns 0 on sucess, 1 on failure.
718  */
719 static int
720 vsw_set_if_hw_addr(vsw_t *vswp)
721 {
722 	mac_diag_t		diag;
723 	uint8_t			*macaddr;
724 	uint8_t			primary_addr[ETHERADDRL];
725 	uint16_t		vid = VLAN_ID_NONE;
726 	int			rv;
727 	uint16_t		mac_flags = MAC_UNICAST_TAG_DISABLE |
728 	    MAC_UNICAST_STRIP_DISABLE;
729 
730 	D1(vswp, "%s: enter", __func__);
731 
732 	ASSERT(RW_WRITE_HELD(&vswp->maccl_rwlock));
733 	if (vswp->mch == NULL)
734 		return (0);
735 
736 	macaddr = (uint8_t *)vswp->if_addr.ether_addr_octet;
737 
738 	/* check if it is the primary macaddr of the card. */
739 	mac_unicast_primary_get(vswp->mh, primary_addr);
740 	if (ether_cmp((void *)primary_addr, (void*)macaddr) == 0) {
741 		mac_flags |= MAC_UNICAST_PRIMARY;
742 	}
743 
744 	/*
745 	 * If the interface has a specific 'pvid', then
746 	 * register with that vlan-id, otherwise register
747 	 * with VLAN_ID_NONE.
748 	 */
749 	if (vswp->pvid != vswp->default_vlan_id) {
750 		vid = vswp->pvid;
751 	}
752 
753 	if (!(vswp->smode & VSW_LAYER2_PROMISC)) {
754 		mac_flags |= MAC_UNICAST_HW;
755 	}
756 
757 	if (vswp->addr_set == B_FALSE) {
758 		vswp->muh = NULL;
759 		rv = mac_unicast_add(vswp->mch, macaddr, mac_flags,
760 		    &vswp->muh, vid, &diag);
761 
762 		if (rv != 0) {
763 			cmn_err(CE_WARN, "vsw%d: Failed to program"
764 			    "macaddr,vid(%s, %d) err=%d",
765 			    vswp->instance, ether_sprintf((void *)macaddr),
766 			    vid, rv);
767 			return (rv);
768 		}
769 		vswp->addr_set = B_TRUE;
770 
771 		D2(vswp, "%s:programmed macaddr(%s) vid(%d) into device %s",
772 		    __func__, ether_sprintf((void *)macaddr), vid,
773 		    vswp->physname);
774 	}
775 
776 	vsw_mac_add_vlans(vswp, vswp->mch, macaddr, mac_flags,
777 	    vswp->vids, vswp->nvids);
778 
779 	mac_rx_set(vswp->mch, vsw_if_rx_cb, (void *)vswp);
780 
781 	D1(vswp, "%s: exit", __func__);
782 	return (rv);
783 }
784 
785 /*
786  * Remove a unicast mac address which has previously been programmed
787  * into HW.
788  *
789  * Returns 0 on sucess, 1 on failure.
790  */
791 static void
792 vsw_unset_hw_addr(vsw_t *vswp, vsw_port_t *port, int type)
793 {
794 	vsw_vlanid_t		*vids;
795 	int			nvids;
796 	mac_client_handle_t	mch = NULL;
797 
798 	D1(vswp, "%s: enter", __func__);
799 
800 	ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT));
801 
802 	if (type == VSW_VNETPORT) {
803 		ASSERT(port != NULL);
804 		ASSERT(RW_WRITE_HELD(&port->maccl_rwlock));
805 		vids = port->vids;
806 		nvids = port->nvids;
807 	} else {
808 		ASSERT(RW_WRITE_HELD(&vswp->maccl_rwlock));
809 		vids = vswp->vids;
810 		nvids = vswp->nvids;
811 	}
812 
813 	/* First clear the callback */
814 	if (type == VSW_LOCALDEV) {
815 		mch = vswp->mch;
816 	} else if (type == VSW_VNETPORT) {
817 		mch = port->p_mch;
818 	}
819 
820 
821 	if (mch == NULL) {
822 		return;
823 	}
824 
825 	mac_rx_clear(mch);
826 
827 	/* Remove vlans */
828 	vsw_mac_remove_vlans(mch, vids, nvids);
829 
830 	if ((type == VSW_LOCALDEV) && (vswp->addr_set == B_TRUE)) {
831 		(void) mac_unicast_remove(vswp->mch, vswp->muh);
832 		vswp->muh = NULL;
833 		D2(vswp, "removed vsw interface mac-addr from "
834 		    "the device %s", vswp->physname);
835 		vswp->addr_set = B_FALSE;
836 
837 	} else if ((type == VSW_VNETPORT) && (port->addr_set == B_TRUE)) {
838 		(void) mac_unicast_remove(port->p_mch, port->p_muh);
839 		port->p_muh = NULL;
840 		D2(vswp, "removed port(0x%p) mac-addr from "
841 		    "the device %s", port, vswp->physname);
842 		port->addr_set = B_FALSE;
843 	}
844 
845 	D1(vswp, "%s: exit", __func__);
846 }
847 
848 /*
849  * receive callback routine for vsw interface. Invoked by MAC layer when there
850  * are pkts being passed up from physical device for this vsw interface.
851  */
852 /* ARGSUSED */
853 static void
854 vsw_if_rx_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
855     boolean_t loopback)
856 {
857 	_NOTE(ARGUNUSED(mrh))
858 
859 	vsw_t		*vswp = (vsw_t *)arg;
860 	mblk_t		*mpt;
861 	int		count;
862 
863 	ASSERT(vswp != NULL);
864 
865 	D1(vswp, "%s: enter", __func__);
866 
867 	READ_ENTER(&vswp->if_lockrw);
868 	if (vswp->if_state & VSW_IF_UP) {
869 		RW_EXIT(&vswp->if_lockrw);
870 		count = vsw_vlan_frame_untag(vswp, VSW_LOCALDEV, &mp, &mpt);
871 		if (count != 0) {
872 			mac_rx(vswp->if_mh, NULL, mp);
873 		}
874 	} else {
875 		RW_EXIT(&vswp->if_lockrw);
876 		freemsgchain(mp);
877 	}
878 
879 	D1(vswp, "%s: exit", __func__);
880 }
881 
882 /*
883  * receive callback routine for port. Invoked by MAC layer when there
884  * are pkts being passed up from physical device for this port.
885  */
886 /* ARGSUSED */
887 static void
888 vsw_port_rx_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
889     boolean_t loopback)
890 {
891 	_NOTE(ARGUNUSED(mrh))
892 
893 	vsw_t		*vswp;
894 	vsw_port_t	*port = arg;
895 
896 	ASSERT(port != NULL);
897 
898 	vswp = port->p_vswp;
899 
900 	D1(vswp, "vsw_port_rx_cb: enter");
901 
902 	/*
903 	 * Send the packets to the peer directly.
904 	 */
905 	(void) vsw_portsend(port, mp);
906 
907 	D1(vswp, "vsw_port_rx_cb: exit");
908 }
909 
910 /*
911  * Send a message out over the physical device
912  * via the MAC layer.
913  *
914  * Returns any mblks that it was unable to transmit.
915  */
916 mblk_t *
917 vsw_tx_msg(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *port)
918 {
919 	mac_client_handle_t	mch;
920 	mac_unicast_handle_t	muh;
921 
922 	READ_MACCL_ENTER(vswp, port, caller);
923 
924 	mch = (caller == VSW_LOCALDEV) ? vswp->mch : port->p_mch;
925 	muh = (caller == VSW_LOCALDEV) ? vswp->muh : port->p_muh;
926 
927 	if (mch == NULL || muh == NULL) {
928 		RW_MACCL_EXIT(vswp, port, caller);
929 		return (mp);
930 	}
931 
932 	/* packets are sent or dropped */
933 	(void) mac_tx(mch, mp, 0, MAC_DROP_ON_NO_DESC, NULL);
934 	RW_MACCL_EXIT(vswp, port, caller);
935 	return (NULL);
936 }
937 
938 /*
939  * vsw_port_mac_reconfig -- Cleanup and close the MAC client
940  * and reopen and re-configure the MAC client with new flags etc.
941  * This function is useful for two different purposes:
942  *	1) To update the MAC client with new vlan-ids. This is done
943  *	   by freeing the existing vlan-ids and reopen with the new
944  *	   vlan-ids.
945  *
946  *	2) If the Hybrid mode status of a port changes, then the
947  *	   MAC client need to be closed and re-opened, otherwise,
948  *	   Share related resources may not be freed(hybird mode disabled)
949  *	   or assigned(hybrid mode enabled). To accomplish this,
950  *	   this function simply closes and reopens the MAC client.
951  *	   The reopen will result in using the flags based on the
952  *	   new hybrid mode of the port.
953  */
954 void
955 vsw_port_mac_reconfig(vsw_port_t *portp, boolean_t update_vlans,
956     uint16_t new_pvid, vsw_vlanid_t *new_vids, int new_nvids)
957 {
958 	vsw_t *vswp = portp->p_vswp;
959 	int rv;
960 
961 	D1(vswp, "%s: enter", __func__);
962 	/*
963 	 * Remove the multi-cast addresses, unicast address
964 	 * and close the mac-client.
965 	 */
966 	mutex_enter(&vswp->mac_lock);
967 	WRITE_ENTER(&portp->maccl_rwlock);
968 	vsw_mac_multicast_remove_all(vswp, portp, VSW_VNETPORT);
969 	vsw_unset_hw(vswp, portp, VSW_VNETPORT);
970 	vsw_maccl_close(vswp, portp, VSW_VNETPORT);
971 
972 	if (update_vlans == B_TRUE) {
973 		if (portp->nvids != 0) {
974 			kmem_free(portp->vids,
975 			    sizeof (vsw_vlanid_t) * portp->nvids);
976 			portp->vids = NULL;
977 			portp->nvids = 0;
978 		}
979 		portp->vids = new_vids;
980 		portp->nvids = new_nvids;
981 		portp->pvid = new_pvid;
982 	}
983 
984 	/*
985 	 * Now re-open the mac-client and
986 	 * configure unicast addr and multicast addrs.
987 	 */
988 	rv = vsw_maccl_open(vswp, portp, VSW_VNETPORT);
989 	if (rv != 0) {
990 		goto recret;
991 	}
992 
993 	if (vsw_set_hw(vswp, portp, VSW_VNETPORT)) {
994 		cmn_err(CE_NOTE, "!vsw%d: port:%d failed to "
995 		    "set unicast address\n", vswp->instance, portp->p_instance);
996 		goto recret;
997 	}
998 
999 	vsw_mac_multicast_add_all(vswp, portp, VSW_VNETPORT);
1000 
1001 recret:
1002 	RW_EXIT(&portp->maccl_rwlock);
1003 	mutex_exit(&vswp->mac_lock);
1004 	D1(vswp, "%s: exit", __func__);
1005 }
1006 
1007 /*
1008  * vsw_if_mac_reconfig -- Reconfigure the vsw interfaace's mac-client
1009  * by closing and re-opening it. This function is used handle the
1010  * following two cases:
1011  *
1012  *	1) Handle the MAC address change for the interface.
1013  *	2) Handle vlan update.
1014  */
1015 void
1016 vsw_if_mac_reconfig(vsw_t *vswp, boolean_t update_vlans,
1017     uint16_t new_pvid, vsw_vlanid_t *new_vids, int new_nvids)
1018 {
1019 	int rv;
1020 
1021 	D1(vswp, "%s: enter", __func__);
1022 	/*
1023 	 * Remove the multi-cast addresses, unicast address
1024 	 * and close the mac-client.
1025 	 */
1026 	mutex_enter(&vswp->mac_lock);
1027 	WRITE_ENTER(&vswp->maccl_rwlock);
1028 	vsw_mac_multicast_remove_all(vswp, NULL, VSW_LOCALDEV);
1029 	vsw_unset_hw(vswp, NULL, VSW_LOCALDEV);
1030 	vsw_maccl_close(vswp, NULL, VSW_LOCALDEV);
1031 
1032 	if (update_vlans == B_TRUE) {
1033 		if (vswp->nvids != 0) {
1034 			kmem_free(vswp->vids,
1035 			    sizeof (vsw_vlanid_t) * vswp->nvids);
1036 			vswp->vids = NULL;
1037 			vswp->nvids = 0;
1038 		}
1039 		vswp->vids = new_vids;
1040 		vswp->nvids = new_nvids;
1041 		vswp->pvid = new_pvid;
1042 	}
1043 
1044 	/*
1045 	 * Now re-open the mac-client and
1046 	 * configure unicast addr and multicast addrs.
1047 	 */
1048 	rv = vsw_maccl_open(vswp, NULL, VSW_LOCALDEV);
1049 	if (rv != 0) {
1050 		goto ifrecret;
1051 	}
1052 
1053 	if (vsw_set_hw(vswp, NULL, VSW_LOCALDEV)) {
1054 		cmn_err(CE_NOTE, "!vsw%d:failed to set unicast address\n",
1055 		    vswp->instance);
1056 		goto ifrecret;
1057 	}
1058 
1059 	vsw_mac_multicast_add_all(vswp, NULL, VSW_LOCALDEV);
1060 
1061 ifrecret:
1062 	RW_EXIT(&vswp->maccl_rwlock);
1063 	mutex_exit(&vswp->mac_lock);
1064 	D1(vswp, "%s: exit", __func__);
1065 }
1066 
1067 /*
1068  * vsw_mac_port_reconfig_vlans -- Reconfigure a port to handle
1069  * vlan configuration update. As the removal of the last unicast-address,vid
1070  * from the MAC client results in releasing all resources, it expects
1071  * no Shares to be associated with such MAC client.
1072  *
1073  * To handle vlan configuration update for a port that already has
1074  * a Share bound, then we need to free that share prior to reconfiguration.
1075  * Initiate the hybrdIO setup again after the completion of reconfiguration.
1076  */
1077 void
1078 vsw_mac_port_reconfig_vlans(vsw_port_t *portp, uint16_t new_pvid,
1079     vsw_vlanid_t *new_vids, int new_nvids)
1080 {
1081 	/*
1082 	 * As the reconfiguration involves the close of
1083 	 * mac client, cleanup HybridIO and later restart
1084 	 * HybridIO setup again.
1085 	 */
1086 	if (portp->p_hio_enabled == B_TRUE) {
1087 		vsw_hio_stop_port(portp);
1088 	}
1089 	vsw_port_mac_reconfig(portp, B_TRUE, new_pvid, new_vids, new_nvids);
1090 	if (portp->p_hio_enabled == B_TRUE) {
1091 		/* reset to setup the HybridIO again. */
1092 		vsw_hio_port_reset(portp, B_FALSE);
1093 	}
1094 }
1095 
1096 /* Add vlans to MAC client */
1097 static void
1098 vsw_mac_add_vlans(vsw_t *vswp, mac_client_handle_t mch, uint8_t *macaddr,
1099     uint16_t flags, vsw_vlanid_t *vids, int nvids)
1100 {
1101 	vsw_vlanid_t	*vidp;
1102 	mac_diag_t	diag;
1103 	int		rv;
1104 	int		i;
1105 
1106 	flags |= MAC_UNICAST_TAG_DISABLE | MAC_UNICAST_STRIP_DISABLE;
1107 
1108 	/* Add vlans to the MAC layer */
1109 	for (i = 0; i < nvids; i++) {
1110 		vidp = &vids[i];
1111 
1112 		if (vidp->vl_set == B_TRUE) {
1113 			continue;
1114 		}
1115 
1116 		rv = mac_unicast_add(mch, macaddr, flags,
1117 		    &vidp->vl_muh, vidp->vl_vid, &diag);
1118 		if (rv != 0) {
1119 			cmn_err(CE_WARN, "vsw%d: Failed to program"
1120 			    "macaddr,vid(%s, %d) err=%d",
1121 			    vswp->instance, ether_sprintf((void *)macaddr),
1122 			    vidp->vl_vid, rv);
1123 		} else {
1124 			vidp->vl_set = B_TRUE;
1125 			D2(vswp, "%s:programmed macaddr(%s) vid(%d) "
1126 			    "into device %s", __func__,
1127 			    ether_sprintf((void *)macaddr),
1128 			    vidp->vl_vid, vswp->physname);
1129 		}
1130 	}
1131 }
1132 
1133 /* Remove vlans from the MAC client */
1134 static void
1135 vsw_mac_remove_vlans(mac_client_handle_t mch, vsw_vlanid_t *vids, int nvids)
1136 {
1137 	int i;
1138 	vsw_vlanid_t *vidp;
1139 
1140 	for (i = 0; i < nvids; i++) {
1141 		vidp = &vids[i];
1142 		if (vidp->vl_set == B_FALSE) {
1143 			continue;
1144 		}
1145 		mac_unicast_remove(mch, vidp->vl_muh);
1146 		vidp->vl_set = B_FALSE;
1147 	}
1148 }
1149 
1150 #define	ARH_FIXED_LEN	8    /* Length of fixed part of ARP header(see arp.h) */
1151 
1152 /*
1153  * Send a gratuitous RARP packet to notify the physical switch to update its
1154  * Layer2 forwarding table for the given mac address. This is done to allow the
1155  * switch to quickly learn the macaddr-port association when a guest is live
1156  * migrated or when vsw's physical device is changed dynamically. Any protocol
1157  * packet would serve this purpose, but we choose RARP, as it allows us to
1158  * accomplish this within L2 (ie, no need to specify IP addr etc in the packet)
1159  * The macaddr of vnet is retained across migration. Hence, we don't need to
1160  * update the arp cache of other hosts within the broadcast domain. Note that
1161  * it is harmless to send these RARP packets during normal port attach of a
1162  * client vnet. This can can be turned off if needed, by setting
1163  * vsw_publish_macaddr_count to zero in /etc/system.
1164  */
1165 void
1166 vsw_publish_macaddr(vsw_t *vswp, vsw_port_t *portp)
1167 {
1168 	mblk_t			*mp;
1169 	mblk_t			*bp;
1170 	struct arphdr		*arh;
1171 	struct	ether_header 	*ehp;
1172 	int			count = 0;
1173 	int			plen = 4;
1174 	uint8_t			*cp;
1175 
1176 	mp = allocb(ETHERMIN, BPRI_MED);
1177 	if (mp == NULL) {
1178 		return;
1179 	}
1180 
1181 	/* Initialize eth header */
1182 	ehp = (struct  ether_header *)mp->b_rptr;
1183 	bcopy(&etherbroadcastaddr, &ehp->ether_dhost, ETHERADDRL);
1184 	bcopy(&portp->p_macaddr, &ehp->ether_shost, ETHERADDRL);
1185 	ehp->ether_type = htons(ETHERTYPE_REVARP);
1186 
1187 	/* Initialize arp packet */
1188 	arh = (struct arphdr *)(mp->b_rptr + sizeof (struct ether_header));
1189 	cp = (uint8_t *)arh;
1190 
1191 	arh->ar_hrd = htons(ARPHRD_ETHER);	/* Hardware type:  ethernet */
1192 	arh->ar_pro = htons(ETHERTYPE_IP);	/* Protocol type:  IP */
1193 	arh->ar_hln = ETHERADDRL;	/* Length of hardware address:  6 */
1194 	arh->ar_pln = plen;		/* Length of protocol address:  4 */
1195 	arh->ar_op = htons(REVARP_REQUEST);	/* Opcode: REVARP Request */
1196 
1197 	cp += ARH_FIXED_LEN;
1198 
1199 	/* Sender's hardware address and protocol address */
1200 	bcopy(&portp->p_macaddr, cp, ETHERADDRL);
1201 	cp += ETHERADDRL;
1202 	bzero(cp, plen);	/* INADDR_ANY */
1203 	cp += plen;
1204 
1205 	/* Target hardware address and protocol address */
1206 	bcopy(&portp->p_macaddr, cp, ETHERADDRL);
1207 	cp += ETHERADDRL;
1208 	bzero(cp, plen);	/* INADDR_ANY */
1209 	cp += plen;
1210 
1211 	mp->b_wptr += ETHERMIN;	/* total size is 42; round up to ETHERMIN */
1212 
1213 	for (count = 0; count < vsw_publish_macaddr_count; count++) {
1214 
1215 		bp = dupmsg(mp);
1216 		if (bp == NULL) {
1217 			continue;
1218 		}
1219 
1220 		/* transmit the packet */
1221 		bp = vsw_tx_msg(vswp, bp, VSW_VNETPORT, portp);
1222 		if (bp != NULL) {
1223 			freemsg(bp);
1224 		}
1225 	}
1226 
1227 	freemsg(mp);
1228 }
1229 
1230 static void
1231 vsw_mac_set_mtu(vsw_t *vswp, uint32_t mtu)
1232 {
1233 	uint_t	mtu_orig;
1234 	int	rv;
1235 
1236 	rv = mac_set_mtu(vswp->mh, mtu, &mtu_orig);
1237 	if (rv != 0) {
1238 		cmn_err(CE_NOTE,
1239 		    "!vsw%d: Unable to set the mtu:%d, in the "
1240 		    "physical device:%s\n",
1241 		    vswp->instance, mtu, vswp->physname);
1242 		return;
1243 	}
1244 
1245 	/* save the original mtu of physdev to reset it back later if needed */
1246 	vswp->mtu_physdev_orig = mtu_orig;
1247 }
1248