xref: /illumos-gate/usr/src/uts/intel/io/viona/viona_rx.c (revision badf94ff3599fab15963f6c532929e9bc411757a)
1 /*
2  * Copyright (c) 2013  Chris Torek <torek @ torek net>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 /*
27  * This file and its contents are supplied under the terms of the
28  * Common Development and Distribution License ("CDDL"), version 1.0.
29  * You may only use this file in accordance with the terms of version
30  * 1.0 of the CDDL.
31  *
32  * A full copy of the text of the CDDL should have accompanied this
33  * source.  A copy of the CDDL is also available via the Internet at
34  * http://www.illumos.org/license/CDDL.
35  *
36  * Copyright 2015 Pluribus Networks Inc.
37  * Copyright 2019 Joyent, Inc.
38  * Copyright 2021 Oxide Computer Company
39  */
40 
41 #include <sys/types.h>
42 #include <sys/strsubr.h>
43 
44 #include <sys/dlpi.h>
45 #include <sys/pattr.h>
46 #include <sys/vlan.h>
47 
48 #include "viona_impl.h"
49 
50 
51 
52 #define	VTNET_MAXSEGS		32
53 
54 /* Min. octets in an ethernet frame minus FCS */
55 #define	MIN_BUF_SIZE		60
56 #define	NEED_VLAN_PAD_SIZE	(MIN_BUF_SIZE - VLAN_TAGSZ)
57 
58 static mblk_t *viona_vlan_pad_mp;
59 
60 void
61 viona_rx_init(void)
62 {
63 	mblk_t *mp;
64 
65 	ASSERT(viona_vlan_pad_mp == NULL);
66 
67 	/* Create mblk for padding when VLAN tags are stripped */
68 	mp = allocb_wait(VLAN_TAGSZ, BPRI_HI, STR_NOSIG, NULL);
69 	bzero(mp->b_rptr, VLAN_TAGSZ);
70 	mp->b_wptr += VLAN_TAGSZ;
71 	viona_vlan_pad_mp = mp;
72 }
73 
74 void
75 viona_rx_fini(void)
76 {
77 	mblk_t *mp;
78 
79 	/* Clean up the VLAN padding mblk */
80 	mp = viona_vlan_pad_mp;
81 	viona_vlan_pad_mp = NULL;
82 	VERIFY(mp != NULL && mp->b_cont == NULL);
83 	freemsg(mp);
84 }
85 
86 void
87 viona_worker_rx(viona_vring_t *ring, viona_link_t *link)
88 {
89 	proc_t *p = ttoproc(curthread);
90 
91 	(void) thread_vsetname(curthread, "viona_rx_%p", ring);
92 
93 	ASSERT(MUTEX_HELD(&ring->vr_lock));
94 	ASSERT3U(ring->vr_state, ==, VRS_RUN);
95 
96 	viona_ring_disable_notify(ring);
97 
98 	do {
99 		if (vmm_drv_lease_expired(ring->vr_lease)) {
100 			/*
101 			 * Set the renewal flag, causing incoming traffic to be
102 			 * dropped, and issue an RX barrier to ensure any
103 			 * threads in the RX callbacks will have finished.
104 			 * The vr_lock cannot be held across the barrier as it
105 			 * poses a deadlock risk.
106 			 */
107 			ring->vr_state_flags |= VRSF_RENEW;
108 			mutex_exit(&ring->vr_lock);
109 			mac_rx_barrier(link->l_mch);
110 			mutex_enter(&ring->vr_lock);
111 
112 			if (!viona_ring_lease_renew(ring)) {
113 				break;
114 			}
115 			ring->vr_state_flags &= ~VRSF_RENEW;
116 		}
117 
118 		/*
119 		 * For now, there is little to do in the RX worker as inbound
120 		 * data is delivered by MAC via the RX callbacks.  If tap-like
121 		 * functionality is added later, this would be a convenient
122 		 * place to inject frames into the guest.
123 		 */
124 		(void) cv_wait_sig(&ring->vr_cv, &ring->vr_lock);
125 	} while (!VRING_NEED_BAIL(ring, p));
126 
127 	ring->vr_state = VRS_STOP;
128 
129 	/*
130 	 * The RX ring is stopping, before we start tearing it down it
131 	 * is imperative that we perform an RX barrier so that
132 	 * incoming packets are dropped at viona_rx_classified().
133 	 */
134 	mutex_exit(&ring->vr_lock);
135 	mac_rx_barrier(link->l_mch);
136 	mutex_enter(&ring->vr_lock);
137 
138 	viona_ring_enable_notify(ring);
139 }
140 
141 static size_t
142 viona_copy_mblk(const mblk_t *mp, size_t seek, caddr_t buf, size_t len,
143     boolean_t *end)
144 {
145 	size_t copied = 0;
146 	size_t off = 0;
147 
148 	/* Seek past already-consumed data */
149 	while (seek > 0 && mp != NULL) {
150 		const size_t chunk = MBLKL(mp);
151 
152 		if (chunk > seek) {
153 			off = seek;
154 			break;
155 		}
156 		mp = mp->b_cont;
157 		seek -= chunk;
158 	}
159 
160 	while (mp != NULL) {
161 		const size_t chunk = MBLKL(mp) - off;
162 		const size_t to_copy = MIN(chunk, len);
163 
164 		bcopy(mp->b_rptr + off, buf, to_copy);
165 		copied += to_copy;
166 		buf += to_copy;
167 		len -= to_copy;
168 
169 		/*
170 		 * If all the remaining data in the mblk_t was copied, move on
171 		 * to the next one in the chain.  Any seek offset applied to
172 		 * the first mblk copy is zeroed out for subsequent operations.
173 		 */
174 		if (chunk == to_copy) {
175 			mp = mp->b_cont;
176 			off = 0;
177 		}
178 #ifdef DEBUG
179 		else {
180 			/*
181 			 * The only valid reason for the copy to consume less
182 			 * than the entire contents of the mblk_t is because
183 			 * the output buffer has been filled.
184 			 */
185 			ASSERT0(len);
186 		}
187 #endif
188 
189 		/* Go no further if the buffer has been filled */
190 		if (len == 0) {
191 			break;
192 		}
193 
194 	}
195 	*end = (mp == NULL);
196 	return (copied);
197 }
198 
199 static int
200 viona_recv_plain(viona_vring_t *ring, const mblk_t *mp, size_t msz)
201 {
202 	struct iovec iov[VTNET_MAXSEGS];
203 	uint16_t cookie;
204 	int n;
205 	const size_t hdr_sz = sizeof (struct virtio_net_hdr);
206 	struct virtio_net_hdr *hdr;
207 	size_t len, copied = 0;
208 	caddr_t buf = NULL;
209 	boolean_t end = B_FALSE;
210 	const uint32_t features = ring->vr_link->l_features;
211 	vmm_page_t *pages = NULL;
212 
213 	ASSERT(msz >= MIN_BUF_SIZE);
214 
215 	n = vq_popchain(ring, iov, VTNET_MAXSEGS, &cookie, &pages);
216 	if (n <= 0) {
217 		/* Without available buffers, the frame must be dropped. */
218 		return (ENOSPC);
219 	}
220 	if (iov[0].iov_len < hdr_sz) {
221 		/*
222 		 * There is little to do if there is not even space available
223 		 * for the sole header.  Zero the buffer and bail out as a last
224 		 * act of desperation.
225 		 */
226 		bzero(iov[0].iov_base, iov[0].iov_len);
227 		goto bad_frame;
228 	}
229 
230 	/* Grab the address of the header before anything else */
231 	hdr = (struct virtio_net_hdr *)iov[0].iov_base;
232 
233 	/*
234 	 * If there is any space remaining in the first buffer after writing
235 	 * the header, fill it with frame data.
236 	 */
237 	if (iov[0].iov_len > hdr_sz) {
238 		buf = (caddr_t)iov[0].iov_base + hdr_sz;
239 		len = iov[0].iov_len - hdr_sz;
240 
241 		copied += viona_copy_mblk(mp, copied, buf, len, &end);
242 	}
243 
244 	/* Copy any remaining data into subsequent buffers, if present */
245 	for (int i = 1; i < n && !end; i++) {
246 		buf = (caddr_t)iov[i].iov_base;
247 		len = iov[i].iov_len;
248 
249 		copied += viona_copy_mblk(mp, copied, buf, len, &end);
250 	}
251 
252 	/* Was the expected amount of data copied? */
253 	if (copied != msz) {
254 		VIONA_PROBE5(too_short, viona_vring_t *, ring,
255 		    uint16_t, cookie, mblk_t *, mp, size_t, copied,
256 		    size_t, msz);
257 		VIONA_RING_STAT_INCR(ring, too_short);
258 		goto bad_frame;
259 	}
260 
261 	/* Populate (read: zero) the header and account for it in the size */
262 	bzero(hdr, hdr_sz);
263 	copied += hdr_sz;
264 
265 	/* Add chksum bits, if needed */
266 	if ((features & VIRTIO_NET_F_GUEST_CSUM) != 0) {
267 		uint32_t cksum_flags;
268 
269 		if (((features & VIRTIO_NET_F_GUEST_TSO4) != 0) &&
270 		    ((DB_CKSUMFLAGS(mp) & HW_LSO) != 0)) {
271 			hdr->vrh_gso_type |= VIRTIO_NET_HDR_GSO_TCPV4;
272 			hdr->vrh_gso_size = DB_LSOMSS(mp);
273 		}
274 
275 		mac_hcksum_get((mblk_t *)mp, NULL, NULL, NULL, NULL,
276 		    &cksum_flags);
277 		if ((cksum_flags & HCK_FULLCKSUM_OK) != 0) {
278 			hdr->vrh_flags |= VIRTIO_NET_HDR_F_DATA_VALID;
279 		}
280 	}
281 
282 	/* Release this chain */
283 	vmm_drv_page_release_chain(pages);
284 	vq_pushchain(ring, copied, cookie);
285 	return (0);
286 
287 bad_frame:
288 	VIONA_PROBE3(bad_rx_frame, viona_vring_t *, ring, uint16_t, cookie,
289 	    mblk_t *, mp);
290 	VIONA_RING_STAT_INCR(ring, bad_rx_frame);
291 
292 	vmm_drv_page_release_chain(pages);
293 	vq_pushchain(ring, MAX(copied, MIN_BUF_SIZE + hdr_sz), cookie);
294 	return (EINVAL);
295 }
296 
297 static int
298 viona_recv_merged(viona_vring_t *ring, const mblk_t *mp, size_t msz)
299 {
300 	struct iovec iov[VTNET_MAXSEGS];
301 	used_elem_t uelem[VTNET_MAXSEGS];
302 	vmm_page_t *pages = NULL, *hdr_pages = NULL;
303 	int n, i = 0, buf_idx = 0, err = 0;
304 	uint16_t cookie;
305 	caddr_t buf;
306 	size_t len, copied = 0, chunk = 0;
307 	struct virtio_net_mrgrxhdr *hdr = NULL;
308 	const size_t hdr_sz = sizeof (struct virtio_net_mrgrxhdr);
309 	boolean_t end = B_FALSE;
310 	const uint32_t features = ring->vr_link->l_features;
311 
312 	ASSERT(msz >= MIN_BUF_SIZE);
313 
314 	n = vq_popchain(ring, iov, VTNET_MAXSEGS, &cookie, &hdr_pages);
315 	if (n <= 0) {
316 		/* Without available buffers, the frame must be dropped. */
317 		VIONA_PROBE2(no_space, viona_vring_t *, ring, mblk_t *, mp);
318 		VIONA_RING_STAT_INCR(ring, no_space);
319 		return (ENOSPC);
320 	}
321 	if (iov[0].iov_len < hdr_sz) {
322 		/*
323 		 * There is little to do if there is not even space available
324 		 * for the sole header.  Zero the buffer and bail out as a last
325 		 * act of desperation.
326 		 */
327 		bzero(iov[0].iov_base, iov[0].iov_len);
328 		uelem[0].id = cookie;
329 		uelem[0].len = iov[0].iov_len;
330 		err = EINVAL;
331 		goto done;
332 	}
333 
334 	/* Grab the address of the header and do initial population */
335 	hdr = (struct virtio_net_mrgrxhdr *)iov[0].iov_base;
336 	bzero(hdr, hdr_sz);
337 	hdr->vrh_bufs = 1;
338 
339 	/*
340 	 * If there is any space remaining in the first buffer after writing
341 	 * the header, fill it with frame data.  The size of the header itself
342 	 * is accounted for later.
343 	 */
344 	if (iov[0].iov_len > hdr_sz) {
345 		buf = iov[0].iov_base + hdr_sz;
346 		len = iov[0].iov_len - hdr_sz;
347 
348 		size_t copy_len;
349 		copy_len = viona_copy_mblk(mp, copied, buf, len, &end);
350 		chunk += copy_len;
351 		copied += copy_len;
352 	}
353 	i = 1;
354 
355 	do {
356 		while (i < n && !end) {
357 			buf = iov[i].iov_base;
358 			len = iov[i].iov_len;
359 
360 			size_t copy_len;
361 			copy_len = viona_copy_mblk(mp, copied, buf, len, &end);
362 			chunk += copy_len;
363 			copied += copy_len;
364 			i++;
365 		}
366 
367 		uelem[buf_idx].id = cookie;
368 		uelem[buf_idx].len = chunk;
369 
370 		/*
371 		 * Try to grab another buffer from the ring if the mblk has not
372 		 * yet been entirely copied out.
373 		 */
374 		if (!end) {
375 			if (buf_idx == (VTNET_MAXSEGS - 1)) {
376 				/*
377 				 * Our arbitrary limit on the number of buffers
378 				 * to offer for merge has already been reached.
379 				 */
380 				err = EOVERFLOW;
381 				break;
382 			}
383 			if (pages != NULL) {
384 				vmm_drv_page_release_chain(pages);
385 				pages = NULL;
386 			}
387 			n = vq_popchain(ring, iov, VTNET_MAXSEGS, &cookie,
388 			    &pages);
389 			if (n <= 0) {
390 				/*
391 				 * Without more immediate space to perform the
392 				 * copying, there is little choice left but to
393 				 * drop the packet.
394 				 */
395 				err = EMSGSIZE;
396 				break;
397 			}
398 			chunk = 0;
399 			i = 0;
400 			buf_idx++;
401 			/*
402 			 * Keep the header up-to-date with the number of
403 			 * buffers, but never reference its value since the
404 			 * guest could meddle with it.
405 			 */
406 			hdr->vrh_bufs++;
407 		}
408 	} while (!end && copied < msz);
409 
410 	/* Account for the header size in the first buffer */
411 	uelem[0].len += hdr_sz;
412 
413 	/*
414 	 * If no other errors were encounted during the copy, was the expected
415 	 * amount of data transfered?
416 	 */
417 	if (err == 0 && copied != msz) {
418 		VIONA_PROBE5(too_short, viona_vring_t *, ring,
419 		    uint16_t, cookie, mblk_t *, mp, size_t, copied,
420 		    size_t, msz);
421 		VIONA_RING_STAT_INCR(ring, too_short);
422 		err = EINVAL;
423 	}
424 
425 	/* Add chksum bits, if needed */
426 	if ((features & VIRTIO_NET_F_GUEST_CSUM) != 0) {
427 		uint32_t cksum_flags;
428 
429 		if (((features & VIRTIO_NET_F_GUEST_TSO4) != 0) &&
430 		    ((DB_CKSUMFLAGS(mp) & HW_LSO) != 0)) {
431 			hdr->vrh_gso_type |= VIRTIO_NET_HDR_GSO_TCPV4;
432 			hdr->vrh_gso_size = DB_LSOMSS(mp);
433 		}
434 
435 		mac_hcksum_get((mblk_t *)mp, NULL, NULL, NULL, NULL,
436 		    &cksum_flags);
437 		if ((cksum_flags & HCK_FULLCKSUM_OK) != 0) {
438 			hdr->vrh_flags |= VIRTIO_NET_HDR_F_DATA_VALID;
439 		}
440 	}
441 
442 done:
443 	switch (err) {
444 	case 0:
445 		/* Success can fall right through to ring delivery */
446 		break;
447 
448 	case EMSGSIZE:
449 		VIONA_PROBE3(rx_merge_underrun, viona_vring_t *, ring,
450 		    uint16_t, cookie, mblk_t *, mp);
451 		VIONA_RING_STAT_INCR(ring, rx_merge_underrun);
452 		break;
453 
454 	case EOVERFLOW:
455 		VIONA_PROBE3(rx_merge_overrun, viona_vring_t *, ring,
456 		    uint16_t, cookie, mblk_t *, mp);
457 		VIONA_RING_STAT_INCR(ring, rx_merge_overrun);
458 		break;
459 
460 	default:
461 		VIONA_PROBE3(bad_rx_frame, viona_vring_t *, ring,
462 		    uint16_t, cookie, mblk_t *, mp);
463 		VIONA_RING_STAT_INCR(ring, bad_rx_frame);
464 	}
465 
466 	if (hdr_pages != NULL) {
467 		vmm_drv_page_release_chain(hdr_pages);
468 	}
469 	if (pages != NULL) {
470 		vmm_drv_page_release_chain(pages);
471 	}
472 	vq_pushchain_many(ring, buf_idx + 1, uelem);
473 	return (err);
474 }
475 
476 static void
477 viona_rx_common(viona_vring_t *ring, mblk_t *mp, boolean_t is_loopback)
478 {
479 	viona_link_t *link = ring->vr_link;
480 	mblk_t *mprx = NULL, **mprx_prevp = &mprx;
481 	mblk_t *mpdrop = NULL, **mpdrop_prevp = &mpdrop;
482 	const boolean_t do_merge =
483 	    ((link->l_features & VIRTIO_NET_F_MRG_RXBUF) != 0);
484 
485 	size_t nrx = 0, ndrop = 0;
486 
487 	while (mp != NULL) {
488 		mblk_t *next = mp->b_next;
489 		mblk_t *pad = NULL;
490 		size_t size = msgsize(mp);
491 		int err = 0;
492 
493 		mp->b_next = NULL;
494 
495 		/*
496 		 * We treat both a 'drop' response and errors the same here
497 		 * and put the packet on the drop chain.  As packets may be
498 		 * subject to different actions in ipf (which do not all
499 		 * return the same set of error values), an error processing
500 		 * one packet doesn't mean the next packet will also generate
501 		 * an error.
502 		 */
503 		if (VNETHOOK_INTERESTED_IN(link->l_neti) &&
504 		    viona_hook(link, ring, &mp, B_FALSE) != 0) {
505 			if (mp != NULL) {
506 				*mpdrop_prevp = mp;
507 				mpdrop_prevp = &mp->b_next;
508 			} else {
509 				/*
510 				 * If the hook consumer (e.g. ipf) already
511 				 * freed the mblk_t, update the drop count now.
512 				 */
513 				ndrop++;
514 			}
515 			mp = next;
516 			continue;
517 		}
518 
519 		/*
520 		 * Ethernet frames are expected to be padded out in order to
521 		 * meet the minimum size.
522 		 *
523 		 * A special case is made for frames which are short by
524 		 * VLAN_TAGSZ, having been stripped of their VLAN tag while
525 		 * traversing MAC.  A preallocated (and recycled) mblk is used
526 		 * for that specific condition.
527 		 *
528 		 * All other frames that fall short on length will have custom
529 		 * zero-padding allocated appended to them.
530 		 */
531 		if (size == NEED_VLAN_PAD_SIZE) {
532 			ASSERT(MBLKL(viona_vlan_pad_mp) == VLAN_TAGSZ);
533 			ASSERT(viona_vlan_pad_mp->b_cont == NULL);
534 
535 			for (pad = mp; pad->b_cont != NULL; pad = pad->b_cont)
536 				;
537 
538 			pad->b_cont = viona_vlan_pad_mp;
539 			size += VLAN_TAGSZ;
540 		} else if (size < MIN_BUF_SIZE) {
541 			const size_t pad_size = MIN_BUF_SIZE - size;
542 			mblk_t *zero_mp;
543 
544 			zero_mp = allocb(pad_size, BPRI_MED);
545 			if (zero_mp == NULL) {
546 				err = ENOMEM;
547 				goto pad_drop;
548 			}
549 
550 			VIONA_PROBE3(rx_pad_short, viona_vring_t *, ring,
551 			    mblk_t *, mp, size_t, pad_size);
552 			VIONA_RING_STAT_INCR(ring, rx_pad_short);
553 			zero_mp->b_wptr += pad_size;
554 			bzero(zero_mp->b_rptr, pad_size);
555 			linkb(mp, zero_mp);
556 			size += pad_size;
557 		}
558 
559 		if (do_merge) {
560 			err = viona_recv_merged(ring, mp, size);
561 		} else {
562 			err = viona_recv_plain(ring, mp, size);
563 		}
564 
565 		/*
566 		 * The VLAN padding mblk is meant for continual reuse, so
567 		 * remove it from the chain to prevent it from being freed.
568 		 *
569 		 * Custom allocated padding does not require this treatment and
570 		 * is freed normally.
571 		 */
572 		if (pad != NULL) {
573 			pad->b_cont = NULL;
574 		}
575 
576 pad_drop:
577 		/*
578 		 * While an error during rx processing
579 		 * (viona_recv_{merged,plain}) does not free mp on error,
580 		 * hook processing might or might not free mp.  Handle either
581 		 * scenario -- if mp is not yet free, it is queued up and
582 		 * freed after the guest has been notified.  If mp is
583 		 * already NULL, just proceed on.
584 		 */
585 		if (err != 0) {
586 			*mpdrop_prevp = mp;
587 			mpdrop_prevp = &mp->b_next;
588 
589 			/*
590 			 * If the available ring is empty, do not bother
591 			 * attempting to deliver any more frames.  Count the
592 			 * rest as dropped too.
593 			 */
594 			if (err == ENOSPC) {
595 				mp->b_next = next;
596 				break;
597 			}
598 		} else {
599 			/* Chain successful mblks to be freed later */
600 			*mprx_prevp = mp;
601 			mprx_prevp = &mp->b_next;
602 			nrx++;
603 		}
604 		mp = next;
605 	}
606 
607 	membar_enter();
608 	viona_intr_ring(ring, B_FALSE);
609 
610 	/* Free successfully received frames */
611 	if (mprx != NULL) {
612 		freemsgchain(mprx);
613 	}
614 
615 	/* Free dropped frames, also tallying them */
616 	mp = mpdrop;
617 	while (mp != NULL) {
618 		mblk_t *next = mp->b_next;
619 
620 		mp->b_next = NULL;
621 		freemsg(mp);
622 		mp = next;
623 		ndrop++;
624 	}
625 	VIONA_PROBE3(rx, viona_link_t *, link, size_t, nrx, size_t, ndrop);
626 }
627 
628 static void
629 viona_rx_classified(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
630     boolean_t is_loopback)
631 {
632 	viona_vring_t *ring = (viona_vring_t *)arg;
633 
634 	/* Drop traffic if ring is inactive or renewing its lease */
635 	if (ring->vr_state != VRS_RUN ||
636 	    (ring->vr_state_flags & VRSF_RENEW) != 0) {
637 		freemsgchain(mp);
638 		return;
639 	}
640 
641 	viona_rx_common(ring, mp, is_loopback);
642 }
643 
644 static void
645 viona_rx_mcast(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
646     boolean_t is_loopback)
647 {
648 	viona_vring_t *ring = (viona_vring_t *)arg;
649 	mac_handle_t mh = ring->vr_link->l_mh;
650 	mblk_t *mp_mcast_only = NULL;
651 	mblk_t **mpp = &mp_mcast_only;
652 
653 	/* Drop traffic if ring is inactive or renewing its lease */
654 	if (ring->vr_state != VRS_RUN ||
655 	    (ring->vr_state_flags & VRSF_RENEW) != 0) {
656 		freemsgchain(mp);
657 		return;
658 	}
659 
660 	/*
661 	 * In addition to multicast traffic, broadcast packets will also arrive
662 	 * via the MAC_CLIENT_PROMISC_MULTI handler. The mac_rx_set() callback
663 	 * for fully-classified traffic has already delivered that broadcast
664 	 * traffic, so it should be suppressed here, rather than duplicating it
665 	 * to the guest.
666 	 */
667 	while (mp != NULL) {
668 		mblk_t *mp_next;
669 		mac_header_info_t mhi;
670 		int err;
671 
672 		mp_next = mp->b_next;
673 		mp->b_next = NULL;
674 
675 		/* Determine the packet type */
676 		err = mac_vlan_header_info(mh, mp, &mhi);
677 		if (err != 0) {
678 			mblk_t *pull;
679 
680 			/*
681 			 * It is possible that gathering of the header
682 			 * information was impeded by a leading mblk_t which
683 			 * was of inadequate length to reference the needed
684 			 * fields.  Try again, in case that could be solved
685 			 * with a pull-up.
686 			 */
687 			pull = msgpullup(mp, sizeof (struct ether_vlan_header));
688 			if (pull == NULL) {
689 				err = ENOMEM;
690 			} else {
691 				err = mac_vlan_header_info(mh, pull, &mhi);
692 				freemsg(pull);
693 			}
694 
695 			if (err != 0) {
696 				VIONA_RING_STAT_INCR(ring, rx_mcast_check);
697 			}
698 		}
699 
700 		/* Chain up matching packets while discarding others */
701 		if (err == 0 && mhi.mhi_dsttype == MAC_ADDRTYPE_MULTICAST) {
702 			*mpp = mp;
703 			mpp = &mp->b_next;
704 		} else {
705 			freemsg(mp);
706 		}
707 
708 		mp = mp_next;
709 	}
710 
711 	if (mp_mcast_only != NULL) {
712 		viona_rx_common(ring, mp_mcast_only, is_loopback);
713 	}
714 }
715 
716 int
717 viona_rx_set(viona_link_t *link)
718 {
719 	viona_vring_t *ring = &link->l_vrings[VIONA_VQ_RX];
720 	int err;
721 
722 	mac_rx_set(link->l_mch, viona_rx_classified, ring);
723 	err = mac_promisc_add(link->l_mch, MAC_CLIENT_PROMISC_MULTI,
724 	    viona_rx_mcast, ring, &link->l_mph,
725 	    MAC_PROMISC_FLAGS_NO_TX_LOOP | MAC_PROMISC_FLAGS_VLAN_TAG_STRIP);
726 	if (err != 0) {
727 		mac_rx_clear(link->l_mch);
728 	}
729 
730 	return (err);
731 }
732 
733 void
734 viona_rx_clear(viona_link_t *link)
735 {
736 	mac_promisc_remove(link->l_mph);
737 	mac_rx_clear(link->l_mch);
738 }
739