xref: /titanic_51/usr/src/uts/sun4v/io/vsw_switching.c (revision 4202ea4b139fb1fab45cd14d9d03a81b86902341)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/errno.h>
31 #include <sys/debug.h>
32 #include <sys/time.h>
33 #include <sys/sysmacros.h>
34 #include <sys/systm.h>
35 #include <sys/user.h>
36 #include <sys/stropts.h>
37 #include <sys/stream.h>
38 #include <sys/strlog.h>
39 #include <sys/strsubr.h>
40 #include <sys/cmn_err.h>
41 #include <sys/cpu.h>
42 #include <sys/kmem.h>
43 #include <sys/conf.h>
44 #include <sys/ddi.h>
45 #include <sys/sunddi.h>
46 #include <sys/ksynch.h>
47 #include <sys/stat.h>
48 #include <sys/kstat.h>
49 #include <sys/vtrace.h>
50 #include <sys/strsun.h>
51 #include <sys/dlpi.h>
52 #include <sys/ethernet.h>
53 #include <net/if.h>
54 #include <sys/varargs.h>
55 #include <sys/machsystm.h>
56 #include <sys/modctl.h>
57 #include <sys/modhash.h>
58 #include <sys/mac.h>
59 #include <sys/mac_ether.h>
60 #include <sys/taskq.h>
61 #include <sys/note.h>
62 #include <sys/mach_descrip.h>
63 #include <sys/mac.h>
64 #include <sys/mdeg.h>
65 #include <sys/ldc.h>
66 #include <sys/vsw_fdb.h>
67 #include <sys/vsw.h>
68 #include <sys/vio_mailbox.h>
69 #include <sys/vnet_mailbox.h>
70 #include <sys/vnet_common.h>
71 #include <sys/vio_util.h>
72 #include <sys/sdt.h>
73 #include <sys/atomic.h>
74 
75 /* Switching setup routines */
76 void vsw_setup_switching_timeout(void *arg);
77 void vsw_stop_switching_timeout(vsw_t *vswp);
78 int vsw_setup_switching(vsw_t *);
79 static	int vsw_setup_layer2(vsw_t *);
80 static	int vsw_setup_layer3(vsw_t *);
81 
82 /* Switching/data transmit routines */
83 static	void vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller,
84     vsw_port_t *port, mac_resource_handle_t);
85 static	void vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller,
86     vsw_port_t *port, mac_resource_handle_t);
87 static	int vsw_forward_all(vsw_t *vswp, mblk_t *mp,
88 	int caller, vsw_port_t *port);
89 static	int vsw_forward_grp(vsw_t *vswp, mblk_t *mp,
90     int caller, vsw_port_t *port);
91 
92 /* Forwarding database (FDB) routines */
93 static	vsw_port_t *vsw_lookup_fdb(vsw_t *vswp, struct ether_header *);
94 int vsw_add_rem_mcst(vnet_mcast_msg_t *, vsw_port_t *);
95 void vsw_del_mcst_port(vsw_port_t *);
96 int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *);
97 int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *);
98 void vsw_del_mcst_vsw(vsw_t *);
99 int vsw_add_fdb(vsw_t *vswp, vsw_port_t *port);
100 int vsw_del_fdb(vsw_t *vswp, vsw_port_t *port);
101 
102 /* Support functions */
103 static mblk_t *vsw_dupmsgchain(mblk_t *mp);
104 static uint32_t vsw_get_same_dest_list(struct ether_header *ehp,
105     mblk_t **rhead, mblk_t **rtail, mblk_t **mpp);
106 
107 
108 /*
109  * Functions imported from other files.
110  */
111 extern mblk_t *vsw_tx_msg(vsw_t *, mblk_t *);
112 extern mcst_addr_t *vsw_del_addr(uint8_t, void *, uint64_t);
113 extern int vsw_mac_open(vsw_t *vswp);
114 extern void vsw_mac_close(vsw_t *vswp);
115 extern void vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh,
116     mblk_t *mp, vsw_macrx_flags_t flags);
117 extern void vsw_set_addrs(vsw_t *vswp);
118 extern int vsw_get_hw_maddr(vsw_t *);
119 extern int vsw_mac_attach(vsw_t *vswp);
120 extern int vsw_portsend(vsw_port_t *port, mblk_t *mp, mblk_t *mpt,
121 	uint32_t count);
122 
123 /*
124  * Tunables used in this file.
125  */
126 extern int vsw_setup_switching_delay;
127 
128 
129 /*
130  * Timeout routine to setup switching mode:
131  * vsw_setup_switching() is invoked from vsw_attach() or vsw_update_md_prop()
132  * initially. If it fails and the error is EAGAIN, then this timeout handler
133  * is started to retry vsw_setup_switching(). vsw_setup_switching() is retried
134  * until we successfully finish it; or the returned error is not EAGAIN.
135  */
136 void
137 vsw_setup_switching_timeout(void *arg)
138 {
139 	vsw_t		*vswp = (vsw_t *)arg;
140 	int		rv;
141 
142 	if (vswp->swtmout_enabled == B_FALSE)
143 		return;
144 
145 	rv = vsw_setup_switching(vswp);
146 
147 	if (rv == 0) {
148 		/*
149 		 * Successfully setup switching mode.
150 		 * Program unicst, mcst addrs of vsw
151 		 * interface and ports in the physdev.
152 		 */
153 		vsw_set_addrs(vswp);
154 	}
155 
156 	mutex_enter(&vswp->swtmout_lock);
157 
158 	if (rv == EAGAIN && vswp->swtmout_enabled == B_TRUE) {
159 		/*
160 		 * Reschedule timeout() if the error is EAGAIN and the
161 		 * timeout is still enabled. For errors other than EAGAIN,
162 		 * we simply return without rescheduling timeout().
163 		 */
164 		vswp->swtmout_id =
165 		    timeout(vsw_setup_switching_timeout, vswp,
166 		    (vsw_setup_switching_delay * drv_usectohz(MICROSEC)));
167 		goto exit;
168 	}
169 
170 	/* timeout handler completed */
171 	vswp->swtmout_enabled = B_FALSE;
172 	vswp->swtmout_id = 0;
173 
174 exit:
175 	mutex_exit(&vswp->swtmout_lock);
176 }
177 
178 /*
179  * Cancel the timeout handler to setup switching mode.
180  */
181 void
182 vsw_stop_switching_timeout(vsw_t *vswp)
183 {
184 	timeout_id_t tid;
185 
186 	mutex_enter(&vswp->swtmout_lock);
187 
188 	tid = vswp->swtmout_id;
189 
190 	if (tid != 0) {
191 		/* signal timeout handler to stop */
192 		vswp->swtmout_enabled = B_FALSE;
193 		vswp->swtmout_id = 0;
194 		mutex_exit(&vswp->swtmout_lock);
195 
196 		(void) untimeout(tid);
197 	} else {
198 		mutex_exit(&vswp->swtmout_lock);
199 	}
200 
201 	(void) atomic_swap_32(&vswp->switching_setup_done, B_FALSE);
202 
203 	mutex_enter(&vswp->mac_lock);
204 	vswp->mac_open_retries = 0;
205 	mutex_exit(&vswp->mac_lock);
206 }
207 
208 /*
209  * Setup the required switching mode.
210  * This routine is invoked from vsw_attach() or vsw_update_md_prop()
211  * initially. If it fails and the error is EAGAIN, then a timeout handler
212  * is started to retry vsw_setup_switching(), until it successfully finishes;
213  * or the returned error is not EAGAIN.
214  *
215  * Returns:
216  *  0 on success.
217  *  EAGAIN if retry is needed.
218  *  1 on all other failures.
219  */
220 int
221 vsw_setup_switching(vsw_t *vswp)
222 {
223 	int	i, rv = 1;
224 
225 	D1(vswp, "%s: enter", __func__);
226 
227 	/*
228 	 * Select best switching mode.
229 	 * Note that we start from the saved smode_idx. This is done as
230 	 * this routine can be called from the timeout handler to retry
231 	 * setting up a specific mode. Currently only the function which
232 	 * sets up layer2/promisc mode returns EAGAIN if the underlying
233 	 * physical device is not available yet, causing retries.
234 	 */
235 	for (i = vswp->smode_idx; i < vswp->smode_num; i++) {
236 		vswp->smode_idx = i;
237 		switch (vswp->smode[i]) {
238 		case VSW_LAYER2:
239 		case VSW_LAYER2_PROMISC:
240 			rv = vsw_setup_layer2(vswp);
241 			break;
242 
243 		case VSW_LAYER3:
244 			rv = vsw_setup_layer3(vswp);
245 			break;
246 
247 		default:
248 			DERR(vswp, "unknown switch mode");
249 			break;
250 		}
251 
252 		if ((rv == 0) || (rv == EAGAIN))
253 			break;
254 
255 		/* all other errors(rv != 0): continue & select the next mode */
256 		rv = 1;
257 	}
258 
259 	if (rv && (rv != EAGAIN)) {
260 		cmn_err(CE_WARN, "!vsw%d: Unable to setup specified "
261 		    "switching mode", vswp->instance);
262 	} else if (rv == 0) {
263 		(void) atomic_swap_32(&vswp->switching_setup_done, B_TRUE);
264 	}
265 
266 	D2(vswp, "%s: Operating in mode %d", __func__,
267 	    vswp->smode[vswp->smode_idx]);
268 
269 	D1(vswp, "%s: exit", __func__);
270 
271 	return (rv);
272 }
273 
274 /*
275  * Setup for layer 2 switching.
276  *
277  * Returns:
278  *  0 on success.
279  *  EAGAIN if retry is needed.
280  *  EIO on all other failures.
281  */
282 static int
283 vsw_setup_layer2(vsw_t *vswp)
284 {
285 	int	rv;
286 
287 	D1(vswp, "%s: enter", __func__);
288 
289 	vswp->vsw_switch_frame = vsw_switch_l2_frame;
290 
291 	rv = strlen(vswp->physname);
292 	if (rv == 0) {
293 		/*
294 		 * Physical device name is NULL, which is
295 		 * required for layer 2.
296 		 */
297 		cmn_err(CE_WARN, "!vsw%d: no physical device name specified",
298 		    vswp->instance);
299 		return (EIO);
300 	}
301 
302 	mutex_enter(&vswp->mac_lock);
303 
304 	rv = vsw_mac_open(vswp);
305 	if (rv != 0) {
306 		if (rv != EAGAIN) {
307 			cmn_err(CE_WARN, "!vsw%d: Unable to open physical "
308 			    "device: %s\n", vswp->instance, vswp->physname);
309 		}
310 		mutex_exit(&vswp->mac_lock);
311 		return (rv);
312 	}
313 
314 	if (vswp->smode[vswp->smode_idx] == VSW_LAYER2) {
315 		/*
316 		 * Verify that underlying device can support multiple
317 		 * unicast mac addresses.
318 		 */
319 		rv = vsw_get_hw_maddr(vswp);
320 		if (rv != 0) {
321 			cmn_err(CE_WARN, "!vsw%d: Unable to setup "
322 			    "layer2 switching", vswp->instance);
323 			goto exit_error;
324 		}
325 	}
326 
327 	/*
328 	 * Attempt to link into the MAC layer so we can get
329 	 * and send packets out over the physical adapter.
330 	 */
331 	rv = vsw_mac_attach(vswp);
332 	if (rv != 0) {
333 		/*
334 		 * Registration with the MAC layer has failed,
335 		 * so return error so that can fall back to next
336 		 * prefered switching method.
337 		 */
338 		cmn_err(CE_WARN, "!vsw%d: Unable to setup physical device: "
339 		    "%s\n", vswp->instance, vswp->physname);
340 		goto exit_error;
341 	}
342 
343 	D1(vswp, "%s: exit", __func__);
344 
345 	mutex_exit(&vswp->mac_lock);
346 	return (0);
347 
348 exit_error:
349 	vsw_mac_close(vswp);
350 	mutex_exit(&vswp->mac_lock);
351 	return (EIO);
352 }
353 
354 static int
355 vsw_setup_layer3(vsw_t *vswp)
356 {
357 	D1(vswp, "%s: enter", __func__);
358 
359 	D2(vswp, "%s: operating in layer 3 mode", __func__);
360 	vswp->vsw_switch_frame = vsw_switch_l3_frame;
361 
362 	D1(vswp, "%s: exit", __func__);
363 
364 	return (0);
365 }
366 
367 /*
368  * Switch the given ethernet frame when operating in layer 2 mode.
369  *
370  * vswp: pointer to the vsw instance
371  * mp: pointer to chain of ethernet frame(s) to be switched
372  * caller: identifies the source of this frame as:
373  * 		1. VSW_VNETPORT - a vsw port (connected to a vnet).
374  *		2. VSW_PHYSDEV - the physical ethernet device
375  *		3. VSW_LOCALDEV - vsw configured as a virtual interface
376  * arg: argument provided by the caller.
377  *		1. for VNETPORT - pointer to the corresponding vsw_port_t.
378  *		2. for PHYSDEV - NULL
379  *		3. for LOCALDEV - pointer to to this vsw_t(self)
380  */
381 void
382 vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller,
383 			vsw_port_t *arg, mac_resource_handle_t mrh)
384 {
385 	struct ether_header	*ehp;
386 	vsw_port_t		*port = NULL;
387 	mblk_t			*bp, *ret_m;
388 	mblk_t			*mpt = NULL;
389 	uint32_t		count;
390 	vsw_port_list_t		*plist = &vswp->plist;
391 
392 	D1(vswp, "%s: enter (caller %d)", __func__, caller);
393 
394 	/*
395 	 * PERF: rather than breaking up the chain here, scan it
396 	 * to find all mblks heading to same destination and then
397 	 * pass that sub-chain to the lower transmit functions.
398 	 */
399 
400 	/* process the chain of packets */
401 	bp = mp;
402 	while (bp) {
403 		ehp = (struct ether_header *)bp->b_rptr;
404 		count = vsw_get_same_dest_list(ehp, &mp, &mpt, &bp);
405 		ASSERT(count != 0);
406 
407 		D2(vswp, "%s: mblk data buffer %lld : actual data size %lld",
408 		    __func__, MBLKSIZE(mp), MBLKL(mp));
409 
410 		if (ether_cmp(&ehp->ether_dhost, &vswp->if_addr) == 0) {
411 			/*
412 			 * If destination is VSW_LOCALDEV (vsw as an eth
413 			 * interface) and if the device is up & running,
414 			 * send the packet up the stack on this host.
415 			 * If the virtual interface is down, drop the packet.
416 			 */
417 			if (caller != VSW_LOCALDEV) {
418 				vsw_mac_rx(vswp, mrh, mp, VSW_MACRX_FREEMSG);
419 			} else {
420 				freemsgchain(mp);
421 			}
422 			continue;
423 		}
424 
425 		READ_ENTER(&plist->lockrw);
426 		port = vsw_lookup_fdb(vswp, ehp);
427 		if (port) {
428 			/*
429 			 * Mark the port as in-use before releasing the lockrw.
430 			 */
431 			VSW_PORT_REFHOLD(port);
432 			RW_EXIT(&plist->lockrw);
433 
434 			/*
435 			 * If plumbed and in promisc mode then copy msg
436 			 * and send up the stack.
437 			 */
438 			vsw_mac_rx(vswp, mrh, mp,
439 			    VSW_MACRX_PROMISC | VSW_MACRX_COPYMSG);
440 
441 			/*
442 			 * If the destination is in FDB, the packet
443 			 * should be forwarded to the correponding
444 			 * vsw_port (connected to a vnet device -
445 			 * VSW_VNETPORT)
446 			 */
447 			(void) vsw_portsend(port, mp, mpt, count);
448 
449 			/*
450 			 * Decrement use count in port.
451 			 */
452 			VSW_PORT_REFRELE(port);
453 		} else {
454 			RW_EXIT(&plist->lockrw);
455 			/*
456 			 * Destination not in FDB.
457 			 *
458 			 * If the destination is broadcast or
459 			 * multicast forward the packet to all
460 			 * (VNETPORTs, PHYSDEV, LOCALDEV),
461 			 * except the caller.
462 			 */
463 			if (IS_BROADCAST(ehp)) {
464 				D2(vswp, "%s: BROADCAST pkt", __func__);
465 				(void) vsw_forward_all(vswp, mp, caller, arg);
466 			} else if (IS_MULTICAST(ehp)) {
467 				D2(vswp, "%s: MULTICAST pkt", __func__);
468 				(void) vsw_forward_grp(vswp, mp, caller, arg);
469 			} else {
470 				/*
471 				 * If the destination is unicast, and came
472 				 * from either a logical network device or
473 				 * the switch itself when it is plumbed, then
474 				 * send it out on the physical device and also
475 				 * up the stack if the logical interface is
476 				 * in promiscious mode.
477 				 *
478 				 * NOTE:  The assumption here is that if we
479 				 * cannot find the destination in our fdb, its
480 				 * a unicast address, and came from either a
481 				 * vnet or down the stack (when plumbed) it
482 				 * must be destinded for an ethernet device
483 				 * outside our ldoms.
484 				 */
485 				if (caller == VSW_VNETPORT) {
486 					/* promisc check copy etc */
487 					vsw_mac_rx(vswp, mrh, mp,
488 					    VSW_MACRX_PROMISC |
489 					    VSW_MACRX_COPYMSG);
490 
491 					if ((ret_m = vsw_tx_msg(vswp, mp))
492 					    != NULL) {
493 						DERR(vswp, "%s: drop mblks to "
494 						    "phys dev", __func__);
495 						freemsgchain(ret_m);
496 					}
497 
498 				} else if (caller == VSW_PHYSDEV) {
499 					/*
500 					 * Pkt seen because card in promisc
501 					 * mode. Send up stack if plumbed in
502 					 * promisc mode, else drop it.
503 					 */
504 					vsw_mac_rx(vswp, mrh, mp,
505 					    VSW_MACRX_PROMISC |
506 					    VSW_MACRX_FREEMSG);
507 
508 				} else if (caller == VSW_LOCALDEV) {
509 					/*
510 					 * Pkt came down the stack, send out
511 					 * over physical device.
512 					 */
513 					if ((ret_m = vsw_tx_msg(vswp, mp))
514 					    != NULL) {
515 						DERR(vswp, "%s: drop mblks to "
516 						    "phys dev", __func__);
517 						freemsgchain(ret_m);
518 					}
519 				}
520 			}
521 		}
522 	}
523 	D1(vswp, "%s: exit\n", __func__);
524 }
525 
526 /*
527  * Switch ethernet frame when in layer 3 mode (i.e. using IP
528  * layer to do the routing).
529  *
530  * There is a large amount of overlap between this function and
531  * vsw_switch_l2_frame. At some stage we need to revisit and refactor
532  * both these functions.
533  */
534 void
535 vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller,
536 			vsw_port_t *arg, mac_resource_handle_t mrh)
537 {
538 	struct ether_header	*ehp;
539 	vsw_port_t		*port = NULL;
540 	mblk_t			*bp = NULL;
541 	mblk_t			*mpt;
542 	uint32_t		count;
543 	vsw_port_list_t		*plist = &vswp->plist;
544 
545 	D1(vswp, "%s: enter (caller %d)", __func__, caller);
546 
547 	/*
548 	 * In layer 3 mode should only ever be switching packets
549 	 * between IP layer and vnet devices. So make sure thats
550 	 * who is invoking us.
551 	 */
552 	if ((caller != VSW_LOCALDEV) && (caller != VSW_VNETPORT)) {
553 		DERR(vswp, "%s: unexpected caller (%d)", __func__, caller);
554 		freemsgchain(mp);
555 		return;
556 	}
557 
558 	/* process the chain of packets */
559 	bp = mp;
560 	while (bp) {
561 		ehp = (struct ether_header *)bp->b_rptr;
562 		count = vsw_get_same_dest_list(ehp, &mp, &mpt, &bp);
563 		ASSERT(count != 0);
564 
565 		D2(vswp, "%s: mblk data buffer %lld : actual data size %lld",
566 		    __func__, MBLKSIZE(mp), MBLKL(mp));
567 
568 		READ_ENTER(&plist->lockrw);
569 		port = vsw_lookup_fdb(vswp, ehp);
570 		if (port) {
571 			/*
572 			 * Mark the port as in-use before releasing the lockrw.
573 			 */
574 			VSW_PORT_REFHOLD(port);
575 			RW_EXIT(&plist->lockrw);
576 
577 			D2(vswp, "%s: sending to target port", __func__);
578 			(void) vsw_portsend(port, mp, mpt, count);
579 
580 			/*
581 			 * Decrement ref count.
582 			 */
583 			VSW_PORT_REFRELE(port);
584 		} else {
585 			RW_EXIT(&plist->lockrw);
586 			/*
587 			 * Destination not in FDB
588 			 *
589 			 * If the destination is broadcast or
590 			 * multicast forward the packet to all
591 			 * (VNETPORTs, PHYSDEV, LOCALDEV),
592 			 * except the caller.
593 			 */
594 			if (IS_BROADCAST(ehp)) {
595 				D2(vswp, "%s: BROADCAST pkt", __func__);
596 				(void) vsw_forward_all(vswp, mp, caller, arg);
597 			} else if (IS_MULTICAST(ehp)) {
598 				D2(vswp, "%s: MULTICAST pkt", __func__);
599 				(void) vsw_forward_grp(vswp, mp, caller, arg);
600 			} else {
601 				/*
602 				 * Unicast pkt from vnet that we don't have
603 				 * an FDB entry for, so must be destinded for
604 				 * the outside world. Attempt to send up to the
605 				 * IP layer to allow it to deal with it.
606 				 */
607 				if (caller == VSW_VNETPORT) {
608 					vsw_mac_rx(vswp, mrh,
609 					    mp, VSW_MACRX_FREEMSG);
610 				}
611 			}
612 		}
613 	}
614 
615 	D1(vswp, "%s: exit", __func__);
616 }
617 
618 /*
619  * Forward the ethernet frame to all ports (VNETPORTs, PHYSDEV, LOCALDEV),
620  * except the caller (port on which frame arrived).
621  */
622 static int
623 vsw_forward_all(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg)
624 {
625 	vsw_port_list_t	*plist = &vswp->plist;
626 	vsw_port_t	*portp;
627 	mblk_t		*nmp = NULL;
628 	mblk_t		*ret_m = NULL;
629 	int		skip_port = 0;
630 
631 	D1(vswp, "vsw_forward_all: enter\n");
632 
633 	/*
634 	 * Broadcast message from inside ldoms so send to outside
635 	 * world if in either of layer 2 modes.
636 	 */
637 	if (((vswp->smode[vswp->smode_idx] == VSW_LAYER2) ||
638 	    (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC)) &&
639 	    ((caller == VSW_LOCALDEV) || (caller == VSW_VNETPORT))) {
640 
641 		nmp = vsw_dupmsgchain(mp);
642 		if (nmp) {
643 			if ((ret_m = vsw_tx_msg(vswp, nmp)) != NULL) {
644 				DERR(vswp, "%s: dropping pkt(s) "
645 				    "consisting of %ld bytes of data for"
646 				    " physical device", __func__, MBLKL(ret_m));
647 				freemsgchain(ret_m);
648 			}
649 		}
650 	}
651 
652 	if (caller == VSW_VNETPORT)
653 		skip_port = 1;
654 
655 	/*
656 	 * Broadcast message from other vnet (layer 2 or 3) or outside
657 	 * world (layer 2 only), send up stack if plumbed.
658 	 */
659 	if ((caller == VSW_PHYSDEV) || (caller == VSW_VNETPORT)) {
660 		vsw_mac_rx(vswp, NULL, mp, VSW_MACRX_COPYMSG);
661 	}
662 
663 	/* send it to all VNETPORTs */
664 	READ_ENTER(&plist->lockrw);
665 	for (portp = plist->head; portp != NULL; portp = portp->p_next) {
666 		D2(vswp, "vsw_forward_all: port %d", portp->p_instance);
667 		/*
668 		 * Caution ! - don't reorder these two checks as arg
669 		 * will be NULL if the caller is PHYSDEV. skip_port is
670 		 * only set if caller is VNETPORT.
671 		 */
672 		if ((skip_port) && (portp == arg)) {
673 			continue;
674 		} else {
675 			nmp = vsw_dupmsgchain(mp);
676 			if (nmp) {
677 				mblk_t	*mpt = nmp;
678 				uint32_t count = 1;
679 
680 				/* Find tail */
681 				while (mpt->b_next != NULL) {
682 					mpt = mpt->b_next;
683 					count++;
684 				}
685 				/*
686 				 * The plist->lockrw is protecting the
687 				 * portp from getting destroyed here.
688 				 * So, no ref_cnt is incremented here.
689 				 */
690 				(void) vsw_portsend(portp, nmp, mpt, count);
691 			} else {
692 				DERR(vswp, "vsw_forward_all: nmp NULL");
693 			}
694 		}
695 	}
696 	RW_EXIT(&plist->lockrw);
697 
698 	freemsgchain(mp);
699 
700 	D1(vswp, "vsw_forward_all: exit\n");
701 	return (0);
702 }
703 
704 /*
705  * Forward pkts to any devices or interfaces which have registered
706  * an interest in them (i.e. multicast groups).
707  */
708 static int
709 vsw_forward_grp(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg)
710 {
711 	struct ether_header	*ehp = (struct ether_header *)mp->b_rptr;
712 	mfdb_ent_t		*entp = NULL;
713 	mfdb_ent_t		*tpp = NULL;
714 	vsw_port_t 		*port;
715 	uint64_t		key = 0;
716 	mblk_t			*nmp = NULL;
717 	mblk_t			*ret_m = NULL;
718 	boolean_t		check_if = B_TRUE;
719 
720 	/*
721 	 * Convert address to hash table key
722 	 */
723 	KEY_HASH(key, ehp->ether_dhost);
724 
725 	D1(vswp, "%s: key 0x%llx", __func__, key);
726 
727 	/*
728 	 * If pkt came from either a vnet or down the stack (if we are
729 	 * plumbed) and we are in layer 2 mode, then we send the pkt out
730 	 * over the physical adapter, and then check to see if any other
731 	 * vnets are interested in it.
732 	 */
733 	if (((vswp->smode[vswp->smode_idx] == VSW_LAYER2) ||
734 	    (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC)) &&
735 	    ((caller == VSW_VNETPORT) || (caller == VSW_LOCALDEV))) {
736 		nmp = vsw_dupmsgchain(mp);
737 		if (nmp) {
738 			if ((ret_m = vsw_tx_msg(vswp, nmp)) != NULL) {
739 				DERR(vswp, "%s: dropping pkt(s) consisting of "
740 				    "%ld bytes of data for physical device",
741 				    __func__, MBLKL(ret_m));
742 				freemsgchain(ret_m);
743 			}
744 		}
745 	}
746 
747 	READ_ENTER(&vswp->mfdbrw);
748 	if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)key,
749 	    (mod_hash_val_t *)&entp) != 0) {
750 		D3(vswp, "%s: no table entry found for addr 0x%llx",
751 		    __func__, key);
752 	} else {
753 		/*
754 		 * Send to list of devices associated with this address...
755 		 */
756 		for (tpp = entp; tpp != NULL; tpp = tpp->nextp) {
757 
758 			/* dont send to ourselves */
759 			if ((caller == VSW_VNETPORT) &&
760 			    (tpp->d_addr == (void *)arg)) {
761 				port = (vsw_port_t *)tpp->d_addr;
762 				D3(vswp, "%s: not sending to ourselves"
763 				    " : port %d", __func__, port->p_instance);
764 				continue;
765 
766 			} else if ((caller == VSW_LOCALDEV) &&
767 			    (tpp->d_type == VSW_LOCALDEV)) {
768 				D2(vswp, "%s: not sending back up stack",
769 				    __func__);
770 				continue;
771 			}
772 
773 			if (tpp->d_type == VSW_VNETPORT) {
774 				port = (vsw_port_t *)tpp->d_addr;
775 				D3(vswp, "%s: sending to port %ld for addr "
776 				    "0x%llx", __func__, port->p_instance, key);
777 
778 				nmp = vsw_dupmsgchain(mp);
779 				if (nmp) {
780 					mblk_t	*mpt = nmp;
781 					uint32_t count = 1;
782 
783 					/* Find tail */
784 					while (mpt->b_next != NULL) {
785 						mpt = mpt->b_next;
786 						count++;
787 					}
788 					/*
789 					 * The vswp->mfdbrw is protecting the
790 					 * portp from getting destroyed here.
791 					 * So, no ref_cnt is incremented here.
792 					 */
793 					(void) vsw_portsend(port, nmp, mpt,
794 					    count);
795 				}
796 			} else {
797 				vsw_mac_rx(vswp, NULL,
798 				    mp, VSW_MACRX_COPYMSG);
799 				D2(vswp, "%s: sending up stack"
800 				    " for addr 0x%llx", __func__, key);
801 				check_if = B_FALSE;
802 			}
803 		}
804 	}
805 
806 	RW_EXIT(&vswp->mfdbrw);
807 
808 	/*
809 	 * If the pkt came from either a vnet or from physical device,
810 	 * and if we havent already sent the pkt up the stack then we
811 	 * check now if we can/should (i.e. the interface is plumbed
812 	 * and in promisc mode).
813 	 */
814 	if ((check_if) &&
815 	    ((caller == VSW_VNETPORT) || (caller == VSW_PHYSDEV))) {
816 		vsw_mac_rx(vswp, NULL, mp,
817 		    VSW_MACRX_PROMISC | VSW_MACRX_COPYMSG);
818 	}
819 
820 	freemsgchain(mp);
821 
822 	D1(vswp, "%s: exit", __func__);
823 
824 	return (0);
825 }
826 
827 /*
828  * Add an entry into FDB, for the given mac address and port_id.
829  * Returns 0 on success, 1 on failure.
830  *
831  * Lock protecting FDB must be held by calling process.
832  */
833 int
834 vsw_add_fdb(vsw_t *vswp, vsw_port_t *port)
835 {
836 	uint64_t	addr = 0;
837 
838 	D1(vswp, "%s: enter", __func__);
839 
840 	KEY_HASH(addr, port->p_macaddr);
841 
842 	D2(vswp, "%s: key = 0x%llx", __func__, addr);
843 
844 	/*
845 	 * Note: duplicate keys will be rejected by mod_hash.
846 	 */
847 	if (mod_hash_insert(vswp->fdb, (mod_hash_key_t)addr,
848 	    (mod_hash_val_t)port) != 0) {
849 		DERR(vswp, "%s: unable to add entry into fdb.", __func__);
850 		return (1);
851 	}
852 
853 	D1(vswp, "%s: exit", __func__);
854 	return (0);
855 }
856 
857 /*
858  * Remove an entry from FDB.
859  * Returns 0 on success, 1 on failure.
860  */
861 int
862 vsw_del_fdb(vsw_t *vswp, vsw_port_t *port)
863 {
864 	uint64_t	addr = 0;
865 
866 	D1(vswp, "%s: enter", __func__);
867 
868 	KEY_HASH(addr, port->p_macaddr);
869 
870 	D2(vswp, "%s: key = 0x%llx", __func__, addr);
871 
872 	(void) mod_hash_destroy(vswp->fdb, (mod_hash_val_t)addr);
873 
874 	D1(vswp, "%s: enter", __func__);
875 
876 	return (0);
877 }
878 
879 /*
880  * Search fdb for a given mac address.
881  * Returns pointer to the entry if found, else returns NULL.
882  */
883 static vsw_port_t *
884 vsw_lookup_fdb(vsw_t *vswp, struct ether_header *ehp)
885 {
886 	uint64_t	key = 0;
887 	vsw_port_t	*port = NULL;
888 
889 	D1(vswp, "%s: enter", __func__);
890 
891 	KEY_HASH(key, ehp->ether_dhost);
892 
893 	D2(vswp, "%s: key = 0x%llx", __func__, key);
894 
895 	if (mod_hash_find(vswp->fdb, (mod_hash_key_t)key,
896 	    (mod_hash_val_t *)&port) != 0) {
897 		D2(vswp, "%s: no port found", __func__);
898 		return (NULL);
899 	}
900 
901 	D1(vswp, "%s: exit", __func__);
902 
903 	return (port);
904 }
905 
906 /*
907  * Add or remove multicast address(es).
908  *
909  * Returns 0 on success, 1 on failure.
910  */
911 int
912 vsw_add_rem_mcst(vnet_mcast_msg_t *mcst_pkt, vsw_port_t *port)
913 {
914 	mcst_addr_t		*mcst_p = NULL;
915 	vsw_t			*vswp = port->p_vswp;
916 	uint64_t		addr = 0x0;
917 	int			i;
918 
919 	D1(vswp, "%s: enter", __func__);
920 
921 	D2(vswp, "%s: %d addresses", __func__, mcst_pkt->count);
922 
923 	for (i = 0; i < mcst_pkt->count; i++) {
924 		/*
925 		 * Convert address into form that can be used
926 		 * as hash table key.
927 		 */
928 		KEY_HASH(addr, mcst_pkt->mca[i]);
929 
930 		/*
931 		 * Add or delete the specified address/port combination.
932 		 */
933 		if (mcst_pkt->set == 0x1) {
934 			D3(vswp, "%s: adding multicast address 0x%llx for "
935 			    "port %ld", __func__, addr, port->p_instance);
936 			if (vsw_add_mcst(vswp, VSW_VNETPORT, addr, port) == 0) {
937 				/*
938 				 * Update the list of multicast
939 				 * addresses contained within the
940 				 * port structure to include this new
941 				 * one.
942 				 */
943 				mcst_p = kmem_zalloc(sizeof (mcst_addr_t),
944 				    KM_NOSLEEP);
945 				if (mcst_p == NULL) {
946 					DERR(vswp, "%s: unable to alloc mem",
947 					    __func__);
948 					(void) vsw_del_mcst(vswp,
949 					    VSW_VNETPORT, addr, port);
950 					return (1);
951 				}
952 
953 				mcst_p->nextp = NULL;
954 				mcst_p->addr = addr;
955 				ether_copy(&mcst_pkt->mca[i], &mcst_p->mca);
956 
957 				/*
958 				 * Program the address into HW. If the addr
959 				 * has already been programmed then the MAC
960 				 * just increments a ref counter (which is
961 				 * used when the address is being deleted)
962 				 */
963 				mutex_enter(&vswp->mac_lock);
964 				if (vswp->mh != NULL) {
965 					if (mac_multicst_add(vswp->mh,
966 					    (uchar_t *)&mcst_pkt->mca[i])) {
967 						mutex_exit(&vswp->mac_lock);
968 						cmn_err(CE_WARN, "!vsw%d: "
969 						    "unable to add multicast "
970 						    "address: %s\n",
971 						    vswp->instance,
972 						    ether_sprintf((void *)
973 						    &mcst_p->mca));
974 						(void) vsw_del_mcst(vswp,
975 						    VSW_VNETPORT, addr, port);
976 						kmem_free(mcst_p,
977 						    sizeof (*mcst_p));
978 						return (1);
979 					}
980 					mcst_p->mac_added = B_TRUE;
981 				}
982 				mutex_exit(&vswp->mac_lock);
983 
984 				mutex_enter(&port->mca_lock);
985 				mcst_p->nextp = port->mcap;
986 				port->mcap = mcst_p;
987 				mutex_exit(&port->mca_lock);
988 
989 			} else {
990 				DERR(vswp, "%s: error adding multicast "
991 				    "address 0x%llx for port %ld",
992 				    __func__, addr, port->p_instance);
993 				return (1);
994 			}
995 		} else {
996 			/*
997 			 * Delete an entry from the multicast hash
998 			 * table and update the address list
999 			 * appropriately.
1000 			 */
1001 			if (vsw_del_mcst(vswp, VSW_VNETPORT, addr, port) == 0) {
1002 				D3(vswp, "%s: deleting multicast address "
1003 				    "0x%llx for port %ld", __func__, addr,
1004 				    port->p_instance);
1005 
1006 				mcst_p = vsw_del_addr(VSW_VNETPORT, port, addr);
1007 				ASSERT(mcst_p != NULL);
1008 
1009 				/*
1010 				 * Remove the address from HW. The address
1011 				 * will actually only be removed once the ref
1012 				 * count within the MAC layer has dropped to
1013 				 * zero. I.e. we can safely call this fn even
1014 				 * if other ports are interested in this
1015 				 * address.
1016 				 */
1017 				mutex_enter(&vswp->mac_lock);
1018 				if (vswp->mh != NULL && mcst_p->mac_added) {
1019 					if (mac_multicst_remove(vswp->mh,
1020 					    (uchar_t *)&mcst_pkt->mca[i])) {
1021 						mutex_exit(&vswp->mac_lock);
1022 						cmn_err(CE_WARN, "!vsw%d: "
1023 						    "unable to remove mcast "
1024 						    "address: %s\n",
1025 						    vswp->instance,
1026 						    ether_sprintf((void *)
1027 						    &mcst_p->mca));
1028 						kmem_free(mcst_p,
1029 						    sizeof (*mcst_p));
1030 						return (1);
1031 					}
1032 					mcst_p->mac_added = B_FALSE;
1033 				}
1034 				mutex_exit(&vswp->mac_lock);
1035 				kmem_free(mcst_p, sizeof (*mcst_p));
1036 
1037 			} else {
1038 				DERR(vswp, "%s: error deleting multicast "
1039 				    "addr 0x%llx for port %ld",
1040 				    __func__, addr, port->p_instance);
1041 				return (1);
1042 			}
1043 		}
1044 	}
1045 	D1(vswp, "%s: exit", __func__);
1046 	return (0);
1047 }
1048 
1049 /*
1050  * Add a new multicast entry.
1051  *
1052  * Search hash table based on address. If match found then
1053  * update associated val (which is chain of ports), otherwise
1054  * create new key/val (addr/port) pair and insert into table.
1055  */
1056 int
1057 vsw_add_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg)
1058 {
1059 	int		dup = 0;
1060 	int		rv = 0;
1061 	mfdb_ent_t	*ment = NULL;
1062 	mfdb_ent_t	*tmp_ent = NULL;
1063 	mfdb_ent_t	*new_ent = NULL;
1064 	void		*tgt = NULL;
1065 
1066 	if (devtype == VSW_VNETPORT) {
1067 		/*
1068 		 * Being invoked from a vnet.
1069 		 */
1070 		ASSERT(arg != NULL);
1071 		tgt = arg;
1072 		D2(NULL, "%s: port %d : address 0x%llx", __func__,
1073 		    ((vsw_port_t *)arg)->p_instance, addr);
1074 	} else {
1075 		/*
1076 		 * We are being invoked via the m_multicst mac entry
1077 		 * point.
1078 		 */
1079 		D2(NULL, "%s: address 0x%llx", __func__, addr);
1080 		tgt = (void *)vswp;
1081 	}
1082 
1083 	WRITE_ENTER(&vswp->mfdbrw);
1084 	if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr,
1085 	    (mod_hash_val_t *)&ment) != 0) {
1086 
1087 		/* address not currently in table */
1088 		ment = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP);
1089 		ment->d_addr = (void *)tgt;
1090 		ment->d_type = devtype;
1091 		ment->nextp = NULL;
1092 
1093 		if (mod_hash_insert(vswp->mfdb, (mod_hash_key_t)addr,
1094 		    (mod_hash_val_t)ment) != 0) {
1095 			DERR(vswp, "%s: hash table insertion failed", __func__);
1096 			kmem_free(ment, sizeof (mfdb_ent_t));
1097 			rv = 1;
1098 		} else {
1099 			D2(vswp, "%s: added initial entry for 0x%llx to "
1100 			    "table", __func__, addr);
1101 		}
1102 	} else {
1103 		/*
1104 		 * Address in table. Check to see if specified port
1105 		 * is already associated with the address. If not add
1106 		 * it now.
1107 		 */
1108 		tmp_ent = ment;
1109 		while (tmp_ent != NULL) {
1110 			if (tmp_ent->d_addr == (void *)tgt) {
1111 				if (devtype == VSW_VNETPORT) {
1112 					DERR(vswp, "%s: duplicate port entry "
1113 					    "found for portid %ld and key "
1114 					    "0x%llx", __func__,
1115 					    ((vsw_port_t *)arg)->p_instance,
1116 					    addr);
1117 				} else {
1118 					DERR(vswp, "%s: duplicate entry found"
1119 					    "for key 0x%llx", __func__, addr);
1120 				}
1121 				rv = 1;
1122 				dup = 1;
1123 				break;
1124 			}
1125 			tmp_ent = tmp_ent->nextp;
1126 		}
1127 
1128 		/*
1129 		 * Port not on list so add it to end now.
1130 		 */
1131 		if (0 == dup) {
1132 			D2(vswp, "%s: added entry for 0x%llx to table",
1133 			    __func__, addr);
1134 			new_ent = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP);
1135 			new_ent->d_addr = (void *)tgt;
1136 			new_ent->d_type = devtype;
1137 			new_ent->nextp = NULL;
1138 
1139 			tmp_ent = ment;
1140 			while (tmp_ent->nextp != NULL)
1141 				tmp_ent = tmp_ent->nextp;
1142 
1143 			tmp_ent->nextp = new_ent;
1144 		}
1145 	}
1146 
1147 	RW_EXIT(&vswp->mfdbrw);
1148 	return (rv);
1149 }
1150 
1151 /*
1152  * Remove a multicast entry from the hashtable.
1153  *
1154  * Search hash table based on address. If match found, scan
1155  * list of ports associated with address. If specified port
1156  * found remove it from list.
1157  */
1158 int
1159 vsw_del_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg)
1160 {
1161 	mfdb_ent_t	*ment = NULL;
1162 	mfdb_ent_t	*curr_p, *prev_p;
1163 	void		*tgt = NULL;
1164 
1165 	D1(vswp, "%s: enter", __func__);
1166 
1167 	if (devtype == VSW_VNETPORT) {
1168 		tgt = (vsw_port_t *)arg;
1169 		D2(vswp, "%s: removing port %d from mFDB for address"
1170 		    " 0x%llx", __func__, ((vsw_port_t *)tgt)->p_instance, addr);
1171 	} else {
1172 		D2(vswp, "%s: removing entry", __func__);
1173 		tgt = (void *)vswp;
1174 	}
1175 
1176 	WRITE_ENTER(&vswp->mfdbrw);
1177 	if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr,
1178 	    (mod_hash_val_t *)&ment) != 0) {
1179 		D2(vswp, "%s: address 0x%llx not in table", __func__, addr);
1180 		RW_EXIT(&vswp->mfdbrw);
1181 		return (1);
1182 	}
1183 
1184 	prev_p = curr_p = ment;
1185 
1186 	while (curr_p != NULL) {
1187 		if (curr_p->d_addr == (void *)tgt) {
1188 			if (devtype == VSW_VNETPORT) {
1189 				D2(vswp, "%s: port %d found", __func__,
1190 				    ((vsw_port_t *)tgt)->p_instance);
1191 			} else {
1192 				D2(vswp, "%s: instance found", __func__);
1193 			}
1194 
1195 			if (prev_p == curr_p) {
1196 				/*
1197 				 * head of list, if no other element is in
1198 				 * list then destroy this entry, otherwise
1199 				 * just replace it with updated value.
1200 				 */
1201 				ment = curr_p->nextp;
1202 				if (ment == NULL) {
1203 					(void) mod_hash_destroy(vswp->mfdb,
1204 					    (mod_hash_val_t)addr);
1205 				} else {
1206 					(void) mod_hash_replace(vswp->mfdb,
1207 					    (mod_hash_key_t)addr,
1208 					    (mod_hash_val_t)ment);
1209 				}
1210 			} else {
1211 				/*
1212 				 * Not head of list, no need to do
1213 				 * replacement, just adjust list pointers.
1214 				 */
1215 				prev_p->nextp = curr_p->nextp;
1216 			}
1217 			break;
1218 		}
1219 
1220 		prev_p = curr_p;
1221 		curr_p = curr_p->nextp;
1222 	}
1223 
1224 	RW_EXIT(&vswp->mfdbrw);
1225 
1226 	D1(vswp, "%s: exit", __func__);
1227 
1228 	if (curr_p == NULL)
1229 		return (1);
1230 	kmem_free(curr_p, sizeof (mfdb_ent_t));
1231 	return (0);
1232 }
1233 
1234 /*
1235  * Port is being deleted, but has registered an interest in one
1236  * or more multicast groups. Using the list of addresses maintained
1237  * within the port structure find the appropriate entry in the hash
1238  * table and remove this port from the list of interested ports.
1239  */
1240 void
1241 vsw_del_mcst_port(vsw_port_t *port)
1242 {
1243 	mcst_addr_t	*mcap = NULL;
1244 	vsw_t		*vswp = port->p_vswp;
1245 
1246 	D1(vswp, "%s: enter", __func__);
1247 
1248 	mutex_enter(&port->mca_lock);
1249 
1250 	while ((mcap = port->mcap) != NULL) {
1251 
1252 		port->mcap = mcap->nextp;
1253 
1254 		mutex_exit(&port->mca_lock);
1255 
1256 		(void) vsw_del_mcst(vswp, VSW_VNETPORT,
1257 		    mcap->addr, port);
1258 
1259 		/*
1260 		 * Remove the address from HW. The address
1261 		 * will actually only be removed once the ref
1262 		 * count within the MAC layer has dropped to
1263 		 * zero. I.e. we can safely call this fn even
1264 		 * if other ports are interested in this
1265 		 * address.
1266 		 */
1267 		mutex_enter(&vswp->mac_lock);
1268 		if (vswp->mh != NULL && mcap->mac_added) {
1269 			(void) mac_multicst_remove(vswp->mh,
1270 			    (uchar_t *)&mcap->mca);
1271 		}
1272 		mutex_exit(&vswp->mac_lock);
1273 
1274 		kmem_free(mcap, sizeof (*mcap));
1275 
1276 		mutex_enter(&port->mca_lock);
1277 
1278 	}
1279 
1280 	mutex_exit(&port->mca_lock);
1281 
1282 	D1(vswp, "%s: exit", __func__);
1283 }
1284 
1285 /*
1286  * This vsw instance is detaching, but has registered an interest in one
1287  * or more multicast groups. Using the list of addresses maintained
1288  * within the vsw structure find the appropriate entry in the hash
1289  * table and remove this instance from the list of interested ports.
1290  */
1291 void
1292 vsw_del_mcst_vsw(vsw_t *vswp)
1293 {
1294 	mcst_addr_t	*next_p = NULL;
1295 
1296 	D1(vswp, "%s: enter", __func__);
1297 
1298 	mutex_enter(&vswp->mca_lock);
1299 
1300 	while (vswp->mcap != NULL) {
1301 		DERR(vswp, "%s: deleting addr 0x%llx",
1302 		    __func__, vswp->mcap->addr);
1303 		(void) vsw_del_mcst(vswp, VSW_LOCALDEV, vswp->mcap->addr, NULL);
1304 
1305 		next_p = vswp->mcap->nextp;
1306 		kmem_free(vswp->mcap, sizeof (mcst_addr_t));
1307 		vswp->mcap = next_p;
1308 	}
1309 
1310 	vswp->mcap = NULL;
1311 	mutex_exit(&vswp->mca_lock);
1312 
1313 	D1(vswp, "%s: exit", __func__);
1314 }
1315 
1316 static uint32_t
1317 vsw_get_same_dest_list(struct ether_header *ehp,
1318     mblk_t **rhead, mblk_t **rtail, mblk_t **mpp)
1319 {
1320 	uint32_t		count = 0;
1321 	mblk_t			*bp;
1322 	mblk_t			*nbp;
1323 	mblk_t			*head = NULL;
1324 	mblk_t			*tail = NULL;
1325 	mblk_t			*prev = NULL;
1326 	struct ether_header	*behp;
1327 
1328 	/* process the chain of packets */
1329 	bp = *mpp;
1330 	while (bp) {
1331 		nbp = bp->b_next;
1332 		behp = (struct ether_header *)bp->b_rptr;
1333 		bp->b_prev = NULL;
1334 		if (ether_cmp(&ehp->ether_dhost, &behp->ether_dhost) == 0) {
1335 			if (prev == NULL) {
1336 				*mpp = nbp;
1337 			} else {
1338 				prev->b_next = nbp;
1339 			}
1340 			bp->b_next =  NULL;
1341 			if (head == NULL) {
1342 				head = tail = bp;
1343 			} else {
1344 				tail->b_next = bp;
1345 				tail = bp;
1346 			}
1347 			count++;
1348 		} else {
1349 			prev = bp;
1350 		}
1351 		bp = nbp;
1352 	}
1353 	*rhead = head;
1354 	*rtail = tail;
1355 	DTRACE_PROBE1(vsw_same_dest, int, count);
1356 	return (count);
1357 }
1358 
1359 static mblk_t *
1360 vsw_dupmsgchain(mblk_t *mp)
1361 {
1362 	mblk_t	*nmp = NULL;
1363 	mblk_t	**nmpp = &nmp;
1364 
1365 	for (; mp != NULL; mp = mp->b_next) {
1366 		if ((*nmpp = dupmsg(mp)) == NULL) {
1367 			freemsgchain(nmp);
1368 			return (NULL);
1369 		}
1370 
1371 		nmpp = &((*nmpp)->b_next);
1372 	}
1373 
1374 	return (nmp);
1375 }
1376