xref: /titanic_51/usr/src/uts/sun4v/io/vsw_switching.c (revision ad1592816585b2f21f25dcc07a8626676a7cec20)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/errno.h>
31 #include <sys/debug.h>
32 #include <sys/time.h>
33 #include <sys/sysmacros.h>
34 #include <sys/systm.h>
35 #include <sys/user.h>
36 #include <sys/stropts.h>
37 #include <sys/stream.h>
38 #include <sys/strlog.h>
39 #include <sys/strsubr.h>
40 #include <sys/cmn_err.h>
41 #include <sys/cpu.h>
42 #include <sys/kmem.h>
43 #include <sys/conf.h>
44 #include <sys/ddi.h>
45 #include <sys/sunddi.h>
46 #include <sys/ksynch.h>
47 #include <sys/stat.h>
48 #include <sys/kstat.h>
49 #include <sys/vtrace.h>
50 #include <sys/strsun.h>
51 #include <sys/dlpi.h>
52 #include <sys/ethernet.h>
53 #include <net/if.h>
54 #include <sys/varargs.h>
55 #include <sys/machsystm.h>
56 #include <sys/modctl.h>
57 #include <sys/modhash.h>
58 #include <sys/mac.h>
59 #include <sys/mac_ether.h>
60 #include <sys/taskq.h>
61 #include <sys/note.h>
62 #include <sys/mach_descrip.h>
63 #include <sys/mac.h>
64 #include <sys/mdeg.h>
65 #include <sys/ldc.h>
66 #include <sys/vsw_fdb.h>
67 #include <sys/vsw.h>
68 #include <sys/vio_mailbox.h>
69 #include <sys/vnet_mailbox.h>
70 #include <sys/vnet_common.h>
71 #include <sys/vio_util.h>
72 #include <sys/sdt.h>
73 #include <sys/atomic.h>
74 
75 /* Switching setup routines */
76 void vsw_setup_switching_timeout(void *arg);
77 void vsw_stop_switching_timeout(vsw_t *vswp);
78 int vsw_setup_switching(vsw_t *);
79 static	int vsw_setup_layer2(vsw_t *);
80 static	int vsw_setup_layer3(vsw_t *);
81 
82 /* Switching/data transmit routines */
83 static	void vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller,
84     vsw_port_t *port, mac_resource_handle_t);
85 static	void vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller,
86     vsw_port_t *port, mac_resource_handle_t);
87 static	int vsw_forward_all(vsw_t *vswp, mblk_t *mp, mblk_t *mpt,
88     int caller, vsw_port_t *port);
89 static	int vsw_forward_grp(vsw_t *vswp, mblk_t *mp, mblk_t *mpt,
90     int caller, vsw_port_t *port);
91 
92 /* Forwarding database (FDB) routines */
93 static	vsw_port_t *vsw_lookup_fdb(vsw_t *vswp, struct ether_header *);
94 int vsw_add_rem_mcst(vnet_mcast_msg_t *, vsw_port_t *);
95 void vsw_del_mcst_port(vsw_port_t *);
96 int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *);
97 int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *);
98 void vsw_del_mcst_vsw(vsw_t *);
99 int vsw_add_fdb(vsw_t *vswp, vsw_port_t *port);
100 int vsw_del_fdb(vsw_t *vswp, vsw_port_t *port);
101 
102 /* Support functions */
103 static mblk_t *vsw_dupmsgchain(mblk_t *mp);
104 static int vsw_get_same_dest_list(struct ether_header *ehp,
105     mblk_t **rhead, mblk_t **rtail, mblk_t **mpp);
106 
107 
108 /*
109  * Functions imported from other files.
110  */
111 extern mblk_t *vsw_tx_msg(vsw_t *, mblk_t *);
112 extern mcst_addr_t *vsw_del_addr(uint8_t, void *, uint64_t);
113 extern int vsw_mac_open(vsw_t *vswp);
114 extern void vsw_mac_close(vsw_t *vswp);
115 extern void vsw_mac_rx(vsw_t *vswp, int caller, mac_resource_handle_t mrh,
116     mblk_t *mp, mblk_t *mpt, vsw_macrx_flags_t flags);
117 extern void vsw_mac_rx(vsw_t *vswp, int caller, mac_resource_handle_t mrh,
118     mblk_t *mp, mblk_t *mpt, vsw_macrx_flags_t flags);
119 extern void vsw_set_addrs(vsw_t *vswp);
120 extern int vsw_get_hw_maddr(vsw_t *);
121 extern int vsw_mac_attach(vsw_t *vswp);
122 extern int vsw_portsend(vsw_port_t *port, mblk_t *mp, mblk_t *mpt);
123 
124 /*
125  * Tunables used in this file.
126  */
127 extern int vsw_setup_switching_delay;
128 
129 
130 /*
131  * Timeout routine to setup switching mode:
132  * vsw_setup_switching() is invoked from vsw_attach() or vsw_update_md_prop()
133  * initially. If it fails and the error is EAGAIN, then this timeout handler
134  * is started to retry vsw_setup_switching(). vsw_setup_switching() is retried
135  * until we successfully finish it; or the returned error is not EAGAIN.
136  */
137 void
138 vsw_setup_switching_timeout(void *arg)
139 {
140 	vsw_t		*vswp = (vsw_t *)arg;
141 	int		rv;
142 
143 	if (vswp->swtmout_enabled == B_FALSE)
144 		return;
145 
146 	rv = vsw_setup_switching(vswp);
147 
148 	if (rv == 0) {
149 		/*
150 		 * Successfully setup switching mode.
151 		 * Program unicst, mcst addrs of vsw
152 		 * interface and ports in the physdev.
153 		 */
154 		vsw_set_addrs(vswp);
155 	}
156 
157 	mutex_enter(&vswp->swtmout_lock);
158 
159 	if (rv == EAGAIN && vswp->swtmout_enabled == B_TRUE) {
160 		/*
161 		 * Reschedule timeout() if the error is EAGAIN and the
162 		 * timeout is still enabled. For errors other than EAGAIN,
163 		 * we simply return without rescheduling timeout().
164 		 */
165 		vswp->swtmout_id =
166 		    timeout(vsw_setup_switching_timeout, vswp,
167 		    (vsw_setup_switching_delay * drv_usectohz(MICROSEC)));
168 		goto exit;
169 	}
170 
171 	/* timeout handler completed */
172 	vswp->swtmout_enabled = B_FALSE;
173 	vswp->swtmout_id = 0;
174 
175 exit:
176 	mutex_exit(&vswp->swtmout_lock);
177 }
178 
179 /*
180  * Cancel the timeout handler to setup switching mode.
181  */
182 void
183 vsw_stop_switching_timeout(vsw_t *vswp)
184 {
185 	timeout_id_t tid;
186 
187 	mutex_enter(&vswp->swtmout_lock);
188 
189 	tid = vswp->swtmout_id;
190 
191 	if (tid != 0) {
192 		/* signal timeout handler to stop */
193 		vswp->swtmout_enabled = B_FALSE;
194 		vswp->swtmout_id = 0;
195 		mutex_exit(&vswp->swtmout_lock);
196 
197 		(void) untimeout(tid);
198 	} else {
199 		mutex_exit(&vswp->swtmout_lock);
200 	}
201 
202 	(void) atomic_swap_32(&vswp->switching_setup_done, B_FALSE);
203 
204 	mutex_enter(&vswp->mac_lock);
205 	vswp->mac_open_retries = 0;
206 	mutex_exit(&vswp->mac_lock);
207 }
208 
209 /*
210  * Setup the required switching mode.
211  * This routine is invoked from vsw_attach() or vsw_update_md_prop()
212  * initially. If it fails and the error is EAGAIN, then a timeout handler
213  * is started to retry vsw_setup_switching(), until it successfully finishes;
214  * or the returned error is not EAGAIN.
215  *
216  * Returns:
217  *  0 on success.
218  *  EAGAIN if retry is needed.
219  *  1 on all other failures.
220  */
221 int
222 vsw_setup_switching(vsw_t *vswp)
223 {
224 	int	i, rv = 1;
225 
226 	D1(vswp, "%s: enter", __func__);
227 
228 	/*
229 	 * Select best switching mode.
230 	 * Note that we start from the saved smode_idx. This is done as
231 	 * this routine can be called from the timeout handler to retry
232 	 * setting up a specific mode. Currently only the function which
233 	 * sets up layer2/promisc mode returns EAGAIN if the underlying
234 	 * physical device is not available yet, causing retries.
235 	 */
236 	for (i = vswp->smode_idx; i < vswp->smode_num; i++) {
237 		vswp->smode_idx = i;
238 		switch (vswp->smode[i]) {
239 		case VSW_LAYER2:
240 		case VSW_LAYER2_PROMISC:
241 			rv = vsw_setup_layer2(vswp);
242 			break;
243 
244 		case VSW_LAYER3:
245 			rv = vsw_setup_layer3(vswp);
246 			break;
247 
248 		default:
249 			DERR(vswp, "unknown switch mode");
250 			break;
251 		}
252 
253 		if ((rv == 0) || (rv == EAGAIN))
254 			break;
255 
256 		/* all other errors(rv != 0): continue & select the next mode */
257 		rv = 1;
258 	}
259 
260 	if (rv && (rv != EAGAIN)) {
261 		cmn_err(CE_WARN, "!vsw%d: Unable to setup specified "
262 		    "switching mode", vswp->instance);
263 	} else if (rv == 0) {
264 		(void) atomic_swap_32(&vswp->switching_setup_done, B_TRUE);
265 	}
266 
267 	D2(vswp, "%s: Operating in mode %d", __func__,
268 	    vswp->smode[vswp->smode_idx]);
269 
270 	D1(vswp, "%s: exit", __func__);
271 
272 	return (rv);
273 }
274 
275 /*
276  * Setup for layer 2 switching.
277  *
278  * Returns:
279  *  0 on success.
280  *  EAGAIN if retry is needed.
281  *  EIO on all other failures.
282  */
283 static int
284 vsw_setup_layer2(vsw_t *vswp)
285 {
286 	int	rv;
287 
288 	D1(vswp, "%s: enter", __func__);
289 
290 	vswp->vsw_switch_frame = vsw_switch_l2_frame;
291 
292 	rv = strlen(vswp->physname);
293 	if (rv == 0) {
294 		/*
295 		 * Physical device name is NULL, which is
296 		 * required for layer 2.
297 		 */
298 		cmn_err(CE_WARN, "!vsw%d: no physical device name specified",
299 		    vswp->instance);
300 		return (EIO);
301 	}
302 
303 	mutex_enter(&vswp->mac_lock);
304 
305 	rv = vsw_mac_open(vswp);
306 	if (rv != 0) {
307 		if (rv != EAGAIN) {
308 			cmn_err(CE_WARN, "!vsw%d: Unable to open physical "
309 			    "device: %s\n", vswp->instance, vswp->physname);
310 		}
311 		mutex_exit(&vswp->mac_lock);
312 		return (rv);
313 	}
314 
315 	if (vswp->smode[vswp->smode_idx] == VSW_LAYER2) {
316 		/*
317 		 * Verify that underlying device can support multiple
318 		 * unicast mac addresses.
319 		 */
320 		rv = vsw_get_hw_maddr(vswp);
321 		if (rv != 0) {
322 			cmn_err(CE_WARN, "!vsw%d: Unable to setup "
323 			    "layer2 switching", vswp->instance);
324 			goto exit_error;
325 		}
326 	}
327 
328 	/*
329 	 * Attempt to link into the MAC layer so we can get
330 	 * and send packets out over the physical adapter.
331 	 */
332 	rv = vsw_mac_attach(vswp);
333 	if (rv != 0) {
334 		/*
335 		 * Registration with the MAC layer has failed,
336 		 * so return error so that can fall back to next
337 		 * prefered switching method.
338 		 */
339 		cmn_err(CE_WARN, "!vsw%d: Unable to setup physical device: "
340 		    "%s\n", vswp->instance, vswp->physname);
341 		goto exit_error;
342 	}
343 
344 	D1(vswp, "%s: exit", __func__);
345 
346 	mutex_exit(&vswp->mac_lock);
347 	return (0);
348 
349 exit_error:
350 	vsw_mac_close(vswp);
351 	mutex_exit(&vswp->mac_lock);
352 	return (EIO);
353 }
354 
355 static int
356 vsw_setup_layer3(vsw_t *vswp)
357 {
358 	D1(vswp, "%s: enter", __func__);
359 
360 	D2(vswp, "%s: operating in layer 3 mode", __func__);
361 	vswp->vsw_switch_frame = vsw_switch_l3_frame;
362 
363 	D1(vswp, "%s: exit", __func__);
364 
365 	return (0);
366 }
367 
368 /*
369  * Switch the given ethernet frame when operating in layer 2 mode.
370  *
371  * vswp: pointer to the vsw instance
372  * mp: pointer to chain of ethernet frame(s) to be switched
373  * caller: identifies the source of this frame as:
374  * 		1. VSW_VNETPORT - a vsw port (connected to a vnet).
375  *		2. VSW_PHYSDEV - the physical ethernet device
376  *		3. VSW_LOCALDEV - vsw configured as a virtual interface
377  * arg: argument provided by the caller.
378  *		1. for VNETPORT - pointer to the corresponding vsw_port_t.
379  *		2. for PHYSDEV - NULL
380  *		3. for LOCALDEV - pointer to to this vsw_t(self)
381  */
382 void
383 vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller,
384 			vsw_port_t *arg, mac_resource_handle_t mrh)
385 {
386 	struct ether_header	*ehp;
387 	vsw_port_t		*port = NULL;
388 	mblk_t			*bp, *ret_m;
389 	mblk_t			*mpt = NULL;
390 	int			rv;
391 	vsw_port_list_t		*plist = &vswp->plist;
392 
393 	D1(vswp, "%s: enter (caller %d)", __func__, caller);
394 
395 	/*
396 	 * PERF: rather than breaking up the chain here, scan it
397 	 * to find all mblks heading to same destination and then
398 	 * pass that sub-chain to the lower transmit functions.
399 	 */
400 
401 	/* process the chain of packets */
402 	bp = mp;
403 	while (bp) {
404 		ehp = (struct ether_header *)bp->b_rptr;
405 		rv = vsw_get_same_dest_list(ehp, &mp, &mpt, &bp);
406 		ASSERT(rv != 0);
407 
408 		D2(vswp, "%s: mblk data buffer %lld : actual data size %lld",
409 		    __func__, MBLKSIZE(mp), MBLKL(mp));
410 
411 		if (ether_cmp(&ehp->ether_dhost, &vswp->if_addr) == 0) {
412 			/*
413 			 * If destination is VSW_LOCALDEV (vsw as an eth
414 			 * interface) and if the device is up & running,
415 			 * send the packet up the stack on this host.
416 			 * If the virtual interface is down, drop the packet.
417 			 */
418 			if (caller != VSW_LOCALDEV) {
419 				vsw_mac_rx(vswp, caller, mrh, mp,
420 				    mpt, VSW_MACRX_FREEMSG);
421 			} else {
422 				freemsgchain(mp);
423 			}
424 			continue;
425 		}
426 
427 		READ_ENTER(&plist->lockrw);
428 		port = vsw_lookup_fdb(vswp, ehp);
429 		if (port) {
430 			/*
431 			 * Mark the port as in-use before releasing the lockrw.
432 			 */
433 			VSW_PORT_REFHOLD(port);
434 			RW_EXIT(&plist->lockrw);
435 
436 			/*
437 			 * If plumbed and in promisc mode then copy msg
438 			 * and send up the stack.
439 			 */
440 			vsw_mac_rx(vswp, caller, mrh, mp,
441 			    mpt, VSW_MACRX_PROMISC | VSW_MACRX_COPYMSG);
442 
443 			/*
444 			 * If the destination is in FDB, the packet
445 			 * should be forwarded to the correponding
446 			 * vsw_port (connected to a vnet device -
447 			 * VSW_VNETPORT)
448 			 */
449 			(void) vsw_portsend(port, mp, mpt);
450 
451 			/*
452 			 * Decrement use count in port.
453 			 */
454 			VSW_PORT_REFRELE(port);
455 		} else {
456 			RW_EXIT(&plist->lockrw);
457 			/*
458 			 * Destination not in FDB.
459 			 *
460 			 * If the destination is broadcast or
461 			 * multicast forward the packet to all
462 			 * (VNETPORTs, PHYSDEV, LOCALDEV),
463 			 * except the caller.
464 			 */
465 			if (IS_BROADCAST(ehp)) {
466 				D3(vswp, "%s: BROADCAST pkt", __func__);
467 				(void) vsw_forward_all(vswp, mp, mpt,
468 				    caller, arg);
469 			} else if (IS_MULTICAST(ehp)) {
470 				D3(vswp, "%s: MULTICAST pkt", __func__);
471 				(void) vsw_forward_grp(vswp, mp, mpt,
472 				    caller, arg);
473 			} else {
474 				/*
475 				 * If the destination is unicast, and came
476 				 * from either a logical network device or
477 				 * the switch itself when it is plumbed, then
478 				 * send it out on the physical device and also
479 				 * up the stack if the logical interface is
480 				 * in promiscious mode.
481 				 *
482 				 * NOTE:  The assumption here is that if we
483 				 * cannot find the destination in our fdb, its
484 				 * a unicast address, and came from either a
485 				 * vnet or down the stack (when plumbed) it
486 				 * must be destinded for an ethernet device
487 				 * outside our ldoms.
488 				 */
489 				if (caller == VSW_VNETPORT) {
490 					/* promisc check copy etc */
491 					vsw_mac_rx(vswp, caller, mrh, mp, mpt,
492 					    VSW_MACRX_PROMISC |
493 					    VSW_MACRX_COPYMSG);
494 
495 					if ((ret_m = vsw_tx_msg(vswp, mp))
496 					    != NULL) {
497 						DERR(vswp, "%s: drop mblks to "
498 						    "phys dev", __func__);
499 						freemsgchain(ret_m);
500 					}
501 
502 				} else if (caller == VSW_PHYSDEV) {
503 					/*
504 					 * Pkt seen because card in promisc
505 					 * mode. Send up stack if plumbed in
506 					 * promisc mode, else drop it.
507 					 */
508 					vsw_mac_rx(vswp, caller, mrh, mp, mpt,
509 					    VSW_MACRX_PROMISC |
510 					    VSW_MACRX_FREEMSG);
511 
512 				} else if (caller == VSW_LOCALDEV) {
513 					/*
514 					 * Pkt came down the stack, send out
515 					 * over physical device.
516 					 */
517 					if ((ret_m = vsw_tx_msg(vswp, mp))
518 					    != NULL) {
519 						DERR(vswp, "%s: drop mblks to "
520 						    "phys dev", __func__);
521 						freemsgchain(ret_m);
522 					}
523 				}
524 			}
525 		}
526 	}
527 	D1(vswp, "%s: exit\n", __func__);
528 }
529 
530 /*
531  * Switch ethernet frame when in layer 3 mode (i.e. using IP
532  * layer to do the routing).
533  *
534  * There is a large amount of overlap between this function and
535  * vsw_switch_l2_frame. At some stage we need to revisit and refactor
536  * both these functions.
537  */
538 void
539 vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller,
540 			vsw_port_t *arg, mac_resource_handle_t mrh)
541 {
542 	struct ether_header	*ehp;
543 	vsw_port_t		*port = NULL;
544 	mblk_t			*bp = NULL;
545 	mblk_t			*mpt;
546 	int			rv;
547 	vsw_port_list_t		*plist = &vswp->plist;
548 
549 	D1(vswp, "%s: enter (caller %d)", __func__, caller);
550 
551 	/*
552 	 * In layer 3 mode should only ever be switching packets
553 	 * between IP layer and vnet devices. So make sure thats
554 	 * who is invoking us.
555 	 */
556 	if ((caller != VSW_LOCALDEV) && (caller != VSW_VNETPORT)) {
557 		DERR(vswp, "%s: unexpected caller (%d)", __func__, caller);
558 		freemsgchain(mp);
559 		return;
560 	}
561 
562 	/* process the chain of packets */
563 	bp = mp;
564 	while (bp) {
565 		ehp = (struct ether_header *)bp->b_rptr;
566 		rv = vsw_get_same_dest_list(ehp, &mp, &mpt, &bp);
567 		ASSERT(rv != 0);
568 
569 		D2(vswp, "%s: mblk data buffer %lld : actual data size %lld",
570 		    __func__, MBLKSIZE(mp), MBLKL(mp));
571 
572 		READ_ENTER(&plist->lockrw);
573 		port = vsw_lookup_fdb(vswp, ehp);
574 		if (port) {
575 			/*
576 			 * Mark the port as in-use before releasing the lockrw.
577 			 */
578 			VSW_PORT_REFHOLD(port);
579 			RW_EXIT(&plist->lockrw);
580 
581 			D2(vswp, "%s: sending to target port", __func__);
582 			(void) vsw_portsend(port, mp, mpt);
583 
584 			/*
585 			 * Decrement ref count.
586 			 */
587 			VSW_PORT_REFRELE(port);
588 		} else {
589 			RW_EXIT(&plist->lockrw);
590 			/*
591 			 * Destination not in FDB
592 			 *
593 			 * If the destination is broadcast or
594 			 * multicast forward the packet to all
595 			 * (VNETPORTs, PHYSDEV, LOCALDEV),
596 			 * except the caller.
597 			 */
598 			if (IS_BROADCAST(ehp)) {
599 				D2(vswp, "%s: BROADCAST pkt", __func__);
600 				(void) vsw_forward_all(vswp, mp, mpt,
601 				    caller, arg);
602 			} else if (IS_MULTICAST(ehp)) {
603 				D2(vswp, "%s: MULTICAST pkt", __func__);
604 				(void) vsw_forward_grp(vswp, mp, mpt,
605 				    caller, arg);
606 			} else {
607 				/*
608 				 * Unicast pkt from vnet that we don't have
609 				 * an FDB entry for, so must be destinded for
610 				 * the outside world. Attempt to send up to the
611 				 * IP layer to allow it to deal with it.
612 				 */
613 				if (caller == VSW_VNETPORT) {
614 					vsw_mac_rx(vswp, caller, mrh,
615 					    mp, mpt, VSW_MACRX_FREEMSG);
616 				}
617 			}
618 		}
619 	}
620 
621 	D1(vswp, "%s: exit", __func__);
622 }
623 
624 /*
625  * Forward the ethernet frame to all ports (VNETPORTs, PHYSDEV, LOCALDEV),
626  * except the caller (port on which frame arrived).
627  */
628 static int
629 vsw_forward_all(vsw_t *vswp, mblk_t *mp, mblk_t *mpt,
630     int caller, vsw_port_t *arg)
631 {
632 	vsw_port_list_t	*plist = &vswp->plist;
633 	vsw_port_t	*portp;
634 	mblk_t		*nmp = NULL;
635 	mblk_t		*ret_m = NULL;
636 	int		skip_port = 0;
637 
638 	D1(vswp, "vsw_forward_all: enter\n");
639 
640 	/*
641 	 * Broadcast message from inside ldoms so send to outside
642 	 * world if in either of layer 2 modes.
643 	 */
644 	if (((vswp->smode[vswp->smode_idx] == VSW_LAYER2) ||
645 	    (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC)) &&
646 	    ((caller == VSW_LOCALDEV) || (caller == VSW_VNETPORT))) {
647 
648 		nmp = vsw_dupmsgchain(mp);
649 		if (nmp) {
650 			if ((ret_m = vsw_tx_msg(vswp, nmp)) != NULL) {
651 				DERR(vswp, "%s: dropping pkt(s) "
652 				    "consisting of %ld bytes of data for"
653 				    " physical device", __func__, MBLKL(ret_m));
654 				freemsgchain(ret_m);
655 			}
656 		}
657 	}
658 
659 	if (caller == VSW_VNETPORT)
660 		skip_port = 1;
661 
662 	/*
663 	 * Broadcast message from other vnet (layer 2 or 3) or outside
664 	 * world (layer 2 only), send up stack if plumbed.
665 	 */
666 	if ((caller == VSW_PHYSDEV) || (caller == VSW_VNETPORT)) {
667 		vsw_mac_rx(vswp, caller, NULL, mp, mpt, VSW_MACRX_COPYMSG);
668 	}
669 
670 	/* send it to all VNETPORTs */
671 	READ_ENTER(&plist->lockrw);
672 	for (portp = plist->head; portp != NULL; portp = portp->p_next) {
673 		D2(vswp, "vsw_forward_all: port %d", portp->p_instance);
674 		/*
675 		 * Caution ! - don't reorder these two checks as arg
676 		 * will be NULL if the caller is PHYSDEV. skip_port is
677 		 * only set if caller is VNETPORT.
678 		 */
679 		if ((skip_port) && (portp == arg)) {
680 			continue;
681 		} else {
682 			nmp = vsw_dupmsgchain(mp);
683 			if (nmp) {
684 				mblk_t *mpt = nmp;
685 
686 				/* Find tail */
687 				while (mpt->b_next != NULL) {
688 					mpt = mpt->b_next;
689 				}
690 				/*
691 				 * The plist->lockrw is protecting the
692 				 * portp from getting destroyed here.
693 				 * So, no ref_cnt is incremented here.
694 				 */
695 				(void) vsw_portsend(portp, nmp, mpt);
696 			} else {
697 				DERR(vswp, "vsw_forward_all: nmp NULL");
698 			}
699 		}
700 	}
701 	RW_EXIT(&plist->lockrw);
702 
703 	freemsgchain(mp);
704 
705 	D1(vswp, "vsw_forward_all: exit\n");
706 	return (0);
707 }
708 
709 /*
710  * Forward pkts to any devices or interfaces which have registered
711  * an interest in them (i.e. multicast groups).
712  */
713 static int
714 vsw_forward_grp(vsw_t *vswp, mblk_t *mp, mblk_t *mpt,
715     int caller, vsw_port_t *arg)
716 {
717 	struct ether_header	*ehp = (struct ether_header *)mp->b_rptr;
718 	mfdb_ent_t		*entp = NULL;
719 	mfdb_ent_t		*tpp = NULL;
720 	vsw_port_t 		*port;
721 	uint64_t		key = 0;
722 	mblk_t			*nmp = NULL;
723 	mblk_t			*ret_m = NULL;
724 	boolean_t		check_if = B_TRUE;
725 
726 	/*
727 	 * Convert address to hash table key
728 	 */
729 	KEY_HASH(key, ehp->ether_dhost);
730 
731 	D1(vswp, "%s: key 0x%llx", __func__, key);
732 
733 	/*
734 	 * If pkt came from either a vnet or down the stack (if we are
735 	 * plumbed) and we are in layer 2 mode, then we send the pkt out
736 	 * over the physical adapter, and then check to see if any other
737 	 * vnets are interested in it.
738 	 */
739 	if (((vswp->smode[vswp->smode_idx] == VSW_LAYER2) ||
740 	    (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC)) &&
741 	    ((caller == VSW_VNETPORT) || (caller == VSW_LOCALDEV))) {
742 		nmp = vsw_dupmsgchain(mp);
743 		if (nmp) {
744 			if ((ret_m = vsw_tx_msg(vswp, nmp)) != NULL) {
745 				DERR(vswp, "%s: dropping pkt(s) consisting of "
746 				    "%ld bytes of data for physical device",
747 				    __func__, MBLKL(ret_m));
748 				freemsgchain(ret_m);
749 			}
750 		}
751 	}
752 
753 	READ_ENTER(&vswp->mfdbrw);
754 	if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)key,
755 	    (mod_hash_val_t *)&entp) != 0) {
756 		D3(vswp, "%s: no table entry found for addr 0x%llx",
757 		    __func__, key);
758 	} else {
759 		/*
760 		 * Send to list of devices associated with this address...
761 		 */
762 		for (tpp = entp; tpp != NULL; tpp = tpp->nextp) {
763 
764 			/* dont send to ourselves */
765 			if ((caller == VSW_VNETPORT) &&
766 			    (tpp->d_addr == (void *)arg)) {
767 				port = (vsw_port_t *)tpp->d_addr;
768 				D3(vswp, "%s: not sending to ourselves"
769 				    " : port %d", __func__, port->p_instance);
770 				continue;
771 
772 			} else if ((caller == VSW_LOCALDEV) &&
773 			    (tpp->d_type == VSW_LOCALDEV)) {
774 				D3(vswp, "%s: not sending back up stack",
775 				    __func__);
776 				continue;
777 			}
778 
779 			if (tpp->d_type == VSW_VNETPORT) {
780 				port = (vsw_port_t *)tpp->d_addr;
781 				D3(vswp, "%s: sending to port %ld for addr "
782 				    "0x%llx", __func__, port->p_instance, key);
783 
784 				nmp = vsw_dupmsgchain(mp);
785 				if (nmp) {
786 					mblk_t *mpt = nmp;
787 
788 					/* Find tail */
789 					while (mpt->b_next != NULL) {
790 						mpt = mpt->b_next;
791 					}
792 					/*
793 					 * The vswp->mfdbrw is protecting the
794 					 * portp from getting destroyed here.
795 					 * So, no ref_cnt is incremented here.
796 					 */
797 					(void) vsw_portsend(port, nmp, mpt);
798 				}
799 			} else {
800 				vsw_mac_rx(vswp, caller, NULL,
801 				    mp, mpt, VSW_MACRX_COPYMSG);
802 				D3(vswp, "%s: sending up stack"
803 				    " for addr 0x%llx", __func__, key);
804 				check_if = B_FALSE;
805 			}
806 		}
807 	}
808 
809 	RW_EXIT(&vswp->mfdbrw);
810 
811 	/*
812 	 * If the pkt came from either a vnet or from physical device,
813 	 * and if we havent already sent the pkt up the stack then we
814 	 * check now if we can/should (i.e. the interface is plumbed
815 	 * and in promisc mode).
816 	 */
817 	if ((check_if) &&
818 	    ((caller == VSW_VNETPORT) || (caller == VSW_PHYSDEV))) {
819 		vsw_mac_rx(vswp, caller, NULL, mp, mpt,
820 		    VSW_MACRX_PROMISC | VSW_MACRX_COPYMSG);
821 	}
822 
823 	freemsgchain(mp);
824 
825 	D1(vswp, "%s: exit", __func__);
826 
827 	return (0);
828 }
829 
830 /*
831  * Add an entry into FDB, for the given mac address and port_id.
832  * Returns 0 on success, 1 on failure.
833  *
834  * Lock protecting FDB must be held by calling process.
835  */
836 int
837 vsw_add_fdb(vsw_t *vswp, vsw_port_t *port)
838 {
839 	uint64_t	addr = 0;
840 
841 	D1(vswp, "%s: enter", __func__);
842 
843 	KEY_HASH(addr, port->p_macaddr);
844 
845 	D2(vswp, "%s: key = 0x%llx", __func__, addr);
846 
847 	/*
848 	 * Note: duplicate keys will be rejected by mod_hash.
849 	 */
850 	if (mod_hash_insert(vswp->fdb, (mod_hash_key_t)addr,
851 	    (mod_hash_val_t)port) != 0) {
852 		DERR(vswp, "%s: unable to add entry into fdb.", __func__);
853 		return (1);
854 	}
855 
856 	D1(vswp, "%s: exit", __func__);
857 	return (0);
858 }
859 
860 /*
861  * Remove an entry from FDB.
862  * Returns 0 on success, 1 on failure.
863  */
864 int
865 vsw_del_fdb(vsw_t *vswp, vsw_port_t *port)
866 {
867 	uint64_t	addr = 0;
868 
869 	D1(vswp, "%s: enter", __func__);
870 
871 	KEY_HASH(addr, port->p_macaddr);
872 
873 	D2(vswp, "%s: key = 0x%llx", __func__, addr);
874 
875 	(void) mod_hash_destroy(vswp->fdb, (mod_hash_val_t)addr);
876 
877 	D1(vswp, "%s: enter", __func__);
878 
879 	return (0);
880 }
881 
882 /*
883  * Search fdb for a given mac address.
884  * Returns pointer to the entry if found, else returns NULL.
885  */
886 static vsw_port_t *
887 vsw_lookup_fdb(vsw_t *vswp, struct ether_header *ehp)
888 {
889 	uint64_t	key = 0;
890 	vsw_port_t	*port = NULL;
891 
892 	D1(vswp, "%s: enter", __func__);
893 
894 	KEY_HASH(key, ehp->ether_dhost);
895 
896 	D2(vswp, "%s: key = 0x%llx", __func__, key);
897 
898 	if (mod_hash_find(vswp->fdb, (mod_hash_key_t)key,
899 	    (mod_hash_val_t *)&port) != 0) {
900 		D2(vswp, "%s: no port found", __func__);
901 		return (NULL);
902 	}
903 
904 	D1(vswp, "%s: exit", __func__);
905 
906 	return (port);
907 }
908 
909 /*
910  * Add or remove multicast address(es).
911  *
912  * Returns 0 on success, 1 on failure.
913  */
914 int
915 vsw_add_rem_mcst(vnet_mcast_msg_t *mcst_pkt, vsw_port_t *port)
916 {
917 	mcst_addr_t		*mcst_p = NULL;
918 	vsw_t			*vswp = port->p_vswp;
919 	uint64_t		addr = 0x0;
920 	int			i;
921 
922 	D1(vswp, "%s: enter", __func__);
923 
924 	D2(vswp, "%s: %d addresses", __func__, mcst_pkt->count);
925 
926 	for (i = 0; i < mcst_pkt->count; i++) {
927 		/*
928 		 * Convert address into form that can be used
929 		 * as hash table key.
930 		 */
931 		KEY_HASH(addr, mcst_pkt->mca[i]);
932 
933 		/*
934 		 * Add or delete the specified address/port combination.
935 		 */
936 		if (mcst_pkt->set == 0x1) {
937 			D3(vswp, "%s: adding multicast address 0x%llx for "
938 			    "port %ld", __func__, addr, port->p_instance);
939 			if (vsw_add_mcst(vswp, VSW_VNETPORT, addr, port) == 0) {
940 				/*
941 				 * Update the list of multicast
942 				 * addresses contained within the
943 				 * port structure to include this new
944 				 * one.
945 				 */
946 				mcst_p = kmem_zalloc(sizeof (mcst_addr_t),
947 				    KM_NOSLEEP);
948 				if (mcst_p == NULL) {
949 					DERR(vswp, "%s: unable to alloc mem",
950 					    __func__);
951 					(void) vsw_del_mcst(vswp,
952 					    VSW_VNETPORT, addr, port);
953 					return (1);
954 				}
955 
956 				mcst_p->nextp = NULL;
957 				mcst_p->addr = addr;
958 				ether_copy(&mcst_pkt->mca[i], &mcst_p->mca);
959 
960 				/*
961 				 * Program the address into HW. If the addr
962 				 * has already been programmed then the MAC
963 				 * just increments a ref counter (which is
964 				 * used when the address is being deleted)
965 				 */
966 				mutex_enter(&vswp->mac_lock);
967 				if (vswp->mh != NULL) {
968 					if (mac_multicst_add(vswp->mh,
969 					    (uchar_t *)&mcst_pkt->mca[i])) {
970 						mutex_exit(&vswp->mac_lock);
971 						cmn_err(CE_WARN, "!vsw%d: "
972 						    "unable to add multicast "
973 						    "address: %s\n",
974 						    vswp->instance,
975 						    ether_sprintf((void *)
976 						    &mcst_p->mca));
977 						(void) vsw_del_mcst(vswp,
978 						    VSW_VNETPORT, addr, port);
979 						kmem_free(mcst_p,
980 						    sizeof (*mcst_p));
981 						return (1);
982 					}
983 					mcst_p->mac_added = B_TRUE;
984 				}
985 				mutex_exit(&vswp->mac_lock);
986 
987 				mutex_enter(&port->mca_lock);
988 				mcst_p->nextp = port->mcap;
989 				port->mcap = mcst_p;
990 				mutex_exit(&port->mca_lock);
991 
992 			} else {
993 				DERR(vswp, "%s: error adding multicast "
994 				    "address 0x%llx for port %ld",
995 				    __func__, addr, port->p_instance);
996 				return (1);
997 			}
998 		} else {
999 			/*
1000 			 * Delete an entry from the multicast hash
1001 			 * table and update the address list
1002 			 * appropriately.
1003 			 */
1004 			if (vsw_del_mcst(vswp, VSW_VNETPORT, addr, port) == 0) {
1005 				D3(vswp, "%s: deleting multicast address "
1006 				    "0x%llx for port %ld", __func__, addr,
1007 				    port->p_instance);
1008 
1009 				mcst_p = vsw_del_addr(VSW_VNETPORT, port, addr);
1010 				ASSERT(mcst_p != NULL);
1011 
1012 				/*
1013 				 * Remove the address from HW. The address
1014 				 * will actually only be removed once the ref
1015 				 * count within the MAC layer has dropped to
1016 				 * zero. I.e. we can safely call this fn even
1017 				 * if other ports are interested in this
1018 				 * address.
1019 				 */
1020 				mutex_enter(&vswp->mac_lock);
1021 				if (vswp->mh != NULL && mcst_p->mac_added) {
1022 					if (mac_multicst_remove(vswp->mh,
1023 					    (uchar_t *)&mcst_pkt->mca[i])) {
1024 						mutex_exit(&vswp->mac_lock);
1025 						cmn_err(CE_WARN, "!vsw%d: "
1026 						    "unable to remove mcast "
1027 						    "address: %s\n",
1028 						    vswp->instance,
1029 						    ether_sprintf((void *)
1030 						    &mcst_p->mca));
1031 						kmem_free(mcst_p,
1032 						    sizeof (*mcst_p));
1033 						return (1);
1034 					}
1035 					mcst_p->mac_added = B_FALSE;
1036 				}
1037 				mutex_exit(&vswp->mac_lock);
1038 				kmem_free(mcst_p, sizeof (*mcst_p));
1039 
1040 			} else {
1041 				DERR(vswp, "%s: error deleting multicast "
1042 				    "addr 0x%llx for port %ld",
1043 				    __func__, addr, port->p_instance);
1044 				return (1);
1045 			}
1046 		}
1047 	}
1048 	D1(vswp, "%s: exit", __func__);
1049 	return (0);
1050 }
1051 
1052 /*
1053  * Add a new multicast entry.
1054  *
1055  * Search hash table based on address. If match found then
1056  * update associated val (which is chain of ports), otherwise
1057  * create new key/val (addr/port) pair and insert into table.
1058  */
1059 int
1060 vsw_add_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg)
1061 {
1062 	int		dup = 0;
1063 	int		rv = 0;
1064 	mfdb_ent_t	*ment = NULL;
1065 	mfdb_ent_t	*tmp_ent = NULL;
1066 	mfdb_ent_t	*new_ent = NULL;
1067 	void		*tgt = NULL;
1068 
1069 	if (devtype == VSW_VNETPORT) {
1070 		/*
1071 		 * Being invoked from a vnet.
1072 		 */
1073 		ASSERT(arg != NULL);
1074 		tgt = arg;
1075 		D2(NULL, "%s: port %d : address 0x%llx", __func__,
1076 		    ((vsw_port_t *)arg)->p_instance, addr);
1077 	} else {
1078 		/*
1079 		 * We are being invoked via the m_multicst mac entry
1080 		 * point.
1081 		 */
1082 		D2(NULL, "%s: address 0x%llx", __func__, addr);
1083 		tgt = (void *)vswp;
1084 	}
1085 
1086 	WRITE_ENTER(&vswp->mfdbrw);
1087 	if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr,
1088 	    (mod_hash_val_t *)&ment) != 0) {
1089 
1090 		/* address not currently in table */
1091 		ment = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP);
1092 		ment->d_addr = (void *)tgt;
1093 		ment->d_type = devtype;
1094 		ment->nextp = NULL;
1095 
1096 		if (mod_hash_insert(vswp->mfdb, (mod_hash_key_t)addr,
1097 		    (mod_hash_val_t)ment) != 0) {
1098 			DERR(vswp, "%s: hash table insertion failed", __func__);
1099 			kmem_free(ment, sizeof (mfdb_ent_t));
1100 			rv = 1;
1101 		} else {
1102 			D2(vswp, "%s: added initial entry for 0x%llx to "
1103 			    "table", __func__, addr);
1104 		}
1105 	} else {
1106 		/*
1107 		 * Address in table. Check to see if specified port
1108 		 * is already associated with the address. If not add
1109 		 * it now.
1110 		 */
1111 		tmp_ent = ment;
1112 		while (tmp_ent != NULL) {
1113 			if (tmp_ent->d_addr == (void *)tgt) {
1114 				if (devtype == VSW_VNETPORT) {
1115 					DERR(vswp, "%s: duplicate port entry "
1116 					    "found for portid %ld and key "
1117 					    "0x%llx", __func__,
1118 					    ((vsw_port_t *)arg)->p_instance,
1119 					    addr);
1120 				} else {
1121 					DERR(vswp, "%s: duplicate entry found"
1122 					    "for key 0x%llx", __func__, addr);
1123 				}
1124 				rv = 1;
1125 				dup = 1;
1126 				break;
1127 			}
1128 			tmp_ent = tmp_ent->nextp;
1129 		}
1130 
1131 		/*
1132 		 * Port not on list so add it to end now.
1133 		 */
1134 		if (0 == dup) {
1135 			D2(vswp, "%s: added entry for 0x%llx to table",
1136 			    __func__, addr);
1137 			new_ent = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP);
1138 			new_ent->d_addr = (void *)tgt;
1139 			new_ent->d_type = devtype;
1140 			new_ent->nextp = NULL;
1141 
1142 			tmp_ent = ment;
1143 			while (tmp_ent->nextp != NULL)
1144 				tmp_ent = tmp_ent->nextp;
1145 
1146 			tmp_ent->nextp = new_ent;
1147 		}
1148 	}
1149 
1150 	RW_EXIT(&vswp->mfdbrw);
1151 	return (rv);
1152 }
1153 
1154 /*
1155  * Remove a multicast entry from the hashtable.
1156  *
1157  * Search hash table based on address. If match found, scan
1158  * list of ports associated with address. If specified port
1159  * found remove it from list.
1160  */
1161 int
1162 vsw_del_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg)
1163 {
1164 	mfdb_ent_t	*ment = NULL;
1165 	mfdb_ent_t	*curr_p, *prev_p;
1166 	void		*tgt = NULL;
1167 
1168 	D1(vswp, "%s: enter", __func__);
1169 
1170 	if (devtype == VSW_VNETPORT) {
1171 		tgt = (vsw_port_t *)arg;
1172 		D2(vswp, "%s: removing port %d from mFDB for address"
1173 		    " 0x%llx", __func__, ((vsw_port_t *)tgt)->p_instance, addr);
1174 	} else {
1175 		D2(vswp, "%s: removing entry", __func__);
1176 		tgt = (void *)vswp;
1177 	}
1178 
1179 	WRITE_ENTER(&vswp->mfdbrw);
1180 	if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr,
1181 	    (mod_hash_val_t *)&ment) != 0) {
1182 		D2(vswp, "%s: address 0x%llx not in table", __func__, addr);
1183 		RW_EXIT(&vswp->mfdbrw);
1184 		return (1);
1185 	}
1186 
1187 	prev_p = curr_p = ment;
1188 
1189 	while (curr_p != NULL) {
1190 		if (curr_p->d_addr == (void *)tgt) {
1191 			if (devtype == VSW_VNETPORT) {
1192 				D2(vswp, "%s: port %d found", __func__,
1193 				    ((vsw_port_t *)tgt)->p_instance);
1194 			} else {
1195 				D2(vswp, "%s: instance found", __func__);
1196 			}
1197 
1198 			if (prev_p == curr_p) {
1199 				/*
1200 				 * head of list, if no other element is in
1201 				 * list then destroy this entry, otherwise
1202 				 * just replace it with updated value.
1203 				 */
1204 				ment = curr_p->nextp;
1205 				if (ment == NULL) {
1206 					(void) mod_hash_destroy(vswp->mfdb,
1207 					    (mod_hash_val_t)addr);
1208 				} else {
1209 					(void) mod_hash_replace(vswp->mfdb,
1210 					    (mod_hash_key_t)addr,
1211 					    (mod_hash_val_t)ment);
1212 				}
1213 			} else {
1214 				/*
1215 				 * Not head of list, no need to do
1216 				 * replacement, just adjust list pointers.
1217 				 */
1218 				prev_p->nextp = curr_p->nextp;
1219 			}
1220 			break;
1221 		}
1222 
1223 		prev_p = curr_p;
1224 		curr_p = curr_p->nextp;
1225 	}
1226 
1227 	RW_EXIT(&vswp->mfdbrw);
1228 
1229 	D1(vswp, "%s: exit", __func__);
1230 
1231 	if (curr_p == NULL)
1232 		return (1);
1233 	kmem_free(curr_p, sizeof (mfdb_ent_t));
1234 	return (0);
1235 }
1236 
1237 /*
1238  * Port is being deleted, but has registered an interest in one
1239  * or more multicast groups. Using the list of addresses maintained
1240  * within the port structure find the appropriate entry in the hash
1241  * table and remove this port from the list of interested ports.
1242  */
1243 void
1244 vsw_del_mcst_port(vsw_port_t *port)
1245 {
1246 	mcst_addr_t	*mcap = NULL;
1247 	vsw_t		*vswp = port->p_vswp;
1248 
1249 	D1(vswp, "%s: enter", __func__);
1250 
1251 	mutex_enter(&port->mca_lock);
1252 
1253 	while ((mcap = port->mcap) != NULL) {
1254 
1255 		port->mcap = mcap->nextp;
1256 
1257 		mutex_exit(&port->mca_lock);
1258 
1259 		(void) vsw_del_mcst(vswp, VSW_VNETPORT,
1260 		    mcap->addr, port);
1261 
1262 		/*
1263 		 * Remove the address from HW. The address
1264 		 * will actually only be removed once the ref
1265 		 * count within the MAC layer has dropped to
1266 		 * zero. I.e. we can safely call this fn even
1267 		 * if other ports are interested in this
1268 		 * address.
1269 		 */
1270 		mutex_enter(&vswp->mac_lock);
1271 		if (vswp->mh != NULL && mcap->mac_added) {
1272 			(void) mac_multicst_remove(vswp->mh,
1273 			    (uchar_t *)&mcap->mca);
1274 		}
1275 		mutex_exit(&vswp->mac_lock);
1276 
1277 		kmem_free(mcap, sizeof (*mcap));
1278 
1279 		mutex_enter(&port->mca_lock);
1280 
1281 	}
1282 
1283 	mutex_exit(&port->mca_lock);
1284 
1285 	D1(vswp, "%s: exit", __func__);
1286 }
1287 
1288 /*
1289  * This vsw instance is detaching, but has registered an interest in one
1290  * or more multicast groups. Using the list of addresses maintained
1291  * within the vsw structure find the appropriate entry in the hash
1292  * table and remove this instance from the list of interested ports.
1293  */
1294 void
1295 vsw_del_mcst_vsw(vsw_t *vswp)
1296 {
1297 	mcst_addr_t	*next_p = NULL;
1298 
1299 	D1(vswp, "%s: enter", __func__);
1300 
1301 	mutex_enter(&vswp->mca_lock);
1302 
1303 	while (vswp->mcap != NULL) {
1304 		DERR(vswp, "%s: deleting addr 0x%llx",
1305 		    __func__, vswp->mcap->addr);
1306 		(void) vsw_del_mcst(vswp, VSW_LOCALDEV, vswp->mcap->addr, NULL);
1307 
1308 		next_p = vswp->mcap->nextp;
1309 		kmem_free(vswp->mcap, sizeof (mcst_addr_t));
1310 		vswp->mcap = next_p;
1311 	}
1312 
1313 	vswp->mcap = NULL;
1314 	mutex_exit(&vswp->mca_lock);
1315 
1316 	D1(vswp, "%s: exit", __func__);
1317 }
1318 
1319 static int
1320 vsw_get_same_dest_list(struct ether_header *ehp,
1321     mblk_t **rhead, mblk_t **rtail, mblk_t **mpp)
1322 {
1323 	int count = 0;
1324 	mblk_t *bp;
1325 	mblk_t *nbp;
1326 	mblk_t *head = NULL;
1327 	mblk_t *tail = NULL;
1328 	mblk_t *prev = NULL;
1329 	struct ether_header *behp;
1330 
1331 	/* process the chain of packets */
1332 	bp = *mpp;
1333 	while (bp) {
1334 		nbp = bp->b_next;
1335 		behp = (struct ether_header *)bp->b_rptr;
1336 		bp->b_prev = NULL;
1337 		if (ether_cmp(&ehp->ether_dhost, &behp->ether_dhost) == 0) {
1338 			if (prev == NULL) {
1339 				*mpp = nbp;
1340 			} else {
1341 				prev->b_next = nbp;
1342 			}
1343 			bp->b_next =  NULL;
1344 			if (head == NULL) {
1345 				head = tail = bp;
1346 			} else {
1347 				tail->b_next = bp;
1348 				tail = bp;
1349 			}
1350 			count++;
1351 		} else {
1352 			prev = bp;
1353 		}
1354 		bp = nbp;
1355 	}
1356 	*rhead = head;
1357 	*rtail = tail;
1358 	DTRACE_PROBE1(vsw_same_dest, int, count);
1359 	return (count);
1360 }
1361 
1362 static mblk_t *
1363 vsw_dupmsgchain(mblk_t *mp)
1364 {
1365 	mblk_t	*nmp = NULL;
1366 	mblk_t	**nmpp = &nmp;
1367 
1368 	for (; mp != NULL; mp = mp->b_next) {
1369 		if ((*nmpp = dupmsg(mp)) == NULL) {
1370 			freemsgchain(nmp);
1371 			return (NULL);
1372 		}
1373 
1374 		nmpp = &((*nmpp)->b_next);
1375 	}
1376 
1377 	return (nmp);
1378 }
1379