xref: /illumos-gate/usr/src/uts/intel/io/viona/viona_rx.c (revision 76c08ae9d10f4e0b653a6ea98c06a7868246164b)
1 /*
2  * Copyright (c) 2013  Chris Torek <torek @ torek net>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 /*
27  * This file and its contents are supplied under the terms of the
28  * Common Development and Distribution License ("CDDL"), version 1.0.
29  * You may only use this file in accordance with the terms of version
30  * 1.0 of the CDDL.
31  *
32  * A full copy of the text of the CDDL should have accompanied this
33  * source.  A copy of the CDDL is also available via the Internet at
34  * http://www.illumos.org/license/CDDL.
35  *
36  * Copyright 2015 Pluribus Networks Inc.
37  * Copyright 2019 Joyent, Inc.
38  * Copyright 2022 Oxide Computer Company
39  * Copyright 2022 Michael Zeller
40  */
41 
42 #include <sys/types.h>
43 #include <sys/strsubr.h>
44 
45 #include <sys/dlpi.h>
46 #include <sys/pattr.h>
47 #include <sys/vlan.h>
48 
49 #include "viona_impl.h"
50 
51 
52 
53 #define	VTNET_MAXSEGS		32
54 
55 /* Min. octets in an ethernet frame minus FCS */
56 #define	MIN_BUF_SIZE		60
57 #define	NEED_VLAN_PAD_SIZE	(MIN_BUF_SIZE - VLAN_TAGSZ)
58 
59 static mblk_t *viona_vlan_pad_mp;
60 
61 void
62 viona_rx_init(void)
63 {
64 	mblk_t *mp;
65 
66 	ASSERT(viona_vlan_pad_mp == NULL);
67 
68 	/* Create mblk for padding when VLAN tags are stripped */
69 	mp = allocb_wait(VLAN_TAGSZ, BPRI_HI, STR_NOSIG, NULL);
70 	bzero(mp->b_rptr, VLAN_TAGSZ);
71 	mp->b_wptr += VLAN_TAGSZ;
72 	viona_vlan_pad_mp = mp;
73 }
74 
75 void
76 viona_rx_fini(void)
77 {
78 	mblk_t *mp;
79 
80 	/* Clean up the VLAN padding mblk */
81 	mp = viona_vlan_pad_mp;
82 	viona_vlan_pad_mp = NULL;
83 	VERIFY(mp != NULL && mp->b_cont == NULL);
84 	freemsg(mp);
85 }
86 
87 void
88 viona_worker_rx(viona_vring_t *ring, viona_link_t *link)
89 {
90 	(void) thread_vsetname(curthread, "viona_rx_%p", ring);
91 
92 	ASSERT(MUTEX_HELD(&ring->vr_lock));
93 	ASSERT3U(ring->vr_state, ==, VRS_RUN);
94 
95 	viona_ring_disable_notify(ring);
96 
97 	do {
98 		if (vmm_drv_lease_expired(ring->vr_lease)) {
99 			/*
100 			 * Set the renewal flag, causing incoming traffic to be
101 			 * dropped, and issue an RX barrier to ensure any
102 			 * threads in the RX callbacks will have finished.
103 			 * The vr_lock cannot be held across the barrier as it
104 			 * poses a deadlock risk.
105 			 */
106 			ring->vr_state_flags |= VRSF_RENEW;
107 			mutex_exit(&ring->vr_lock);
108 			mac_rx_barrier(link->l_mch);
109 			mutex_enter(&ring->vr_lock);
110 
111 			if (!viona_ring_lease_renew(ring)) {
112 				break;
113 			}
114 			ring->vr_state_flags &= ~VRSF_RENEW;
115 		}
116 
117 		/*
118 		 * For now, there is little to do in the RX worker as inbound
119 		 * data is delivered by MAC via the RX callbacks.  If tap-like
120 		 * functionality is added later, this would be a convenient
121 		 * place to inject frames into the guest.
122 		 */
123 		(void) cv_wait_sig(&ring->vr_cv, &ring->vr_lock);
124 	} while (!vring_need_bail(ring));
125 
126 	ring->vr_state = VRS_STOP;
127 
128 	/*
129 	 * The RX ring is stopping, before we start tearing it down it
130 	 * is imperative that we perform an RX barrier so that
131 	 * incoming packets are dropped at viona_rx_classified().
132 	 */
133 	mutex_exit(&ring->vr_lock);
134 	mac_rx_barrier(link->l_mch);
135 	mutex_enter(&ring->vr_lock);
136 
137 	/*
138 	 * If we bailed while renewing the ring lease, we cannot reset
139 	 * USED_NO_NOTIFY, since we lack a valid mapping to do so.
140 	 */
141 	if (ring->vr_lease != NULL) {
142 		viona_ring_enable_notify(ring);
143 	}
144 }
145 
146 static size_t
147 viona_copy_mblk(const mblk_t *mp, size_t seek, caddr_t buf, size_t len,
148     boolean_t *end)
149 {
150 	size_t copied = 0;
151 	size_t off = 0;
152 
153 	/* Seek past already-consumed data */
154 	while (seek > 0 && mp != NULL) {
155 		const size_t chunk = MBLKL(mp);
156 
157 		if (chunk > seek) {
158 			off = seek;
159 			break;
160 		}
161 		mp = mp->b_cont;
162 		seek -= chunk;
163 	}
164 
165 	while (mp != NULL) {
166 		const size_t chunk = MBLKL(mp) - off;
167 		const size_t to_copy = MIN(chunk, len);
168 
169 		bcopy(mp->b_rptr + off, buf, to_copy);
170 		copied += to_copy;
171 		buf += to_copy;
172 		len -= to_copy;
173 
174 		/*
175 		 * If all the remaining data in the mblk_t was copied, move on
176 		 * to the next one in the chain.  Any seek offset applied to
177 		 * the first mblk copy is zeroed out for subsequent operations.
178 		 */
179 		if (chunk == to_copy) {
180 			mp = mp->b_cont;
181 			off = 0;
182 		}
183 #ifdef DEBUG
184 		else {
185 			/*
186 			 * The only valid reason for the copy to consume less
187 			 * than the entire contents of the mblk_t is because
188 			 * the output buffer has been filled.
189 			 */
190 			ASSERT0(len);
191 		}
192 #endif
193 
194 		/* Go no further if the buffer has been filled */
195 		if (len == 0) {
196 			break;
197 		}
198 
199 	}
200 	*end = (mp == NULL);
201 	return (copied);
202 }
203 
204 static int
205 viona_recv_plain(viona_vring_t *ring, const mblk_t *mp, size_t msz)
206 {
207 	struct iovec iov[VTNET_MAXSEGS];
208 	uint16_t cookie;
209 	int n;
210 	const size_t hdr_sz = sizeof (struct virtio_net_hdr);
211 	struct virtio_net_hdr *hdr;
212 	size_t len, copied = 0;
213 	caddr_t buf = NULL;
214 	boolean_t end = B_FALSE;
215 	const uint32_t features = ring->vr_link->l_features;
216 	vmm_page_t *pages = NULL;
217 
218 	ASSERT(msz >= MIN_BUF_SIZE);
219 
220 	n = vq_popchain(ring, iov, VTNET_MAXSEGS, &cookie, &pages);
221 	if (n <= 0) {
222 		/* Without available buffers, the frame must be dropped. */
223 		return (ENOSPC);
224 	}
225 	if (iov[0].iov_len < hdr_sz) {
226 		/*
227 		 * There is little to do if there is not even space available
228 		 * for the sole header.  Zero the buffer and bail out as a last
229 		 * act of desperation.
230 		 */
231 		bzero(iov[0].iov_base, iov[0].iov_len);
232 		goto bad_frame;
233 	}
234 
235 	/* Grab the address of the header before anything else */
236 	hdr = (struct virtio_net_hdr *)iov[0].iov_base;
237 
238 	/*
239 	 * If there is any space remaining in the first buffer after writing
240 	 * the header, fill it with frame data.
241 	 */
242 	if (iov[0].iov_len > hdr_sz) {
243 		buf = (caddr_t)iov[0].iov_base + hdr_sz;
244 		len = iov[0].iov_len - hdr_sz;
245 
246 		copied += viona_copy_mblk(mp, copied, buf, len, &end);
247 	}
248 
249 	/* Copy any remaining data into subsequent buffers, if present */
250 	for (int i = 1; i < n && !end; i++) {
251 		buf = (caddr_t)iov[i].iov_base;
252 		len = iov[i].iov_len;
253 
254 		copied += viona_copy_mblk(mp, copied, buf, len, &end);
255 	}
256 
257 	/* Was the expected amount of data copied? */
258 	if (copied != msz) {
259 		VIONA_PROBE5(too_short, viona_vring_t *, ring,
260 		    uint16_t, cookie, mblk_t *, mp, size_t, copied,
261 		    size_t, msz);
262 		VIONA_RING_STAT_INCR(ring, too_short);
263 		goto bad_frame;
264 	}
265 
266 	/* Populate (read: zero) the header and account for it in the size */
267 	bzero(hdr, hdr_sz);
268 	copied += hdr_sz;
269 
270 	/* Add chksum bits, if needed */
271 	if ((features & VIRTIO_NET_F_GUEST_CSUM) != 0) {
272 		uint32_t cksum_flags;
273 
274 		if (((features & VIRTIO_NET_F_GUEST_TSO4) != 0) &&
275 		    ((DB_CKSUMFLAGS(mp) & HW_LSO) != 0)) {
276 			hdr->vrh_gso_type |= VIRTIO_NET_HDR_GSO_TCPV4;
277 			hdr->vrh_gso_size = DB_LSOMSS(mp);
278 		}
279 
280 		mac_hcksum_get((mblk_t *)mp, NULL, NULL, NULL, NULL,
281 		    &cksum_flags);
282 		if ((cksum_flags & HCK_FULLCKSUM_OK) != 0) {
283 			hdr->vrh_flags |= VIRTIO_NET_HDR_F_DATA_VALID;
284 		}
285 	}
286 
287 	/* Release this chain */
288 	vmm_drv_page_release_chain(pages);
289 	vq_pushchain(ring, copied, cookie);
290 	return (0);
291 
292 bad_frame:
293 	VIONA_PROBE3(bad_rx_frame, viona_vring_t *, ring, uint16_t, cookie,
294 	    mblk_t *, mp);
295 	VIONA_RING_STAT_INCR(ring, bad_rx_frame);
296 
297 	vmm_drv_page_release_chain(pages);
298 	vq_pushchain(ring, MAX(copied, MIN_BUF_SIZE + hdr_sz), cookie);
299 	return (EINVAL);
300 }
301 
302 static int
303 viona_recv_merged(viona_vring_t *ring, const mblk_t *mp, size_t msz)
304 {
305 	struct iovec iov[VTNET_MAXSEGS];
306 	used_elem_t uelem[VTNET_MAXSEGS];
307 	vmm_page_t *pages = NULL, *hdr_pages = NULL;
308 	int n, i = 0, buf_idx = 0, err = 0;
309 	uint16_t cookie;
310 	caddr_t buf;
311 	size_t len, copied = 0, chunk = 0;
312 	struct virtio_net_mrgrxhdr *hdr = NULL;
313 	const size_t hdr_sz = sizeof (struct virtio_net_mrgrxhdr);
314 	boolean_t end = B_FALSE;
315 	const uint32_t features = ring->vr_link->l_features;
316 
317 	ASSERT(msz >= MIN_BUF_SIZE);
318 
319 	n = vq_popchain(ring, iov, VTNET_MAXSEGS, &cookie, &hdr_pages);
320 	if (n <= 0) {
321 		/* Without available buffers, the frame must be dropped. */
322 		VIONA_PROBE2(no_space, viona_vring_t *, ring, mblk_t *, mp);
323 		VIONA_RING_STAT_INCR(ring, no_space);
324 		return (ENOSPC);
325 	}
326 	if (iov[0].iov_len < hdr_sz) {
327 		/*
328 		 * There is little to do if there is not even space available
329 		 * for the sole header.  Zero the buffer and bail out as a last
330 		 * act of desperation.
331 		 */
332 		bzero(iov[0].iov_base, iov[0].iov_len);
333 		uelem[0].id = cookie;
334 		uelem[0].len = iov[0].iov_len;
335 		err = EINVAL;
336 		goto done;
337 	}
338 
339 	/* Grab the address of the header and do initial population */
340 	hdr = (struct virtio_net_mrgrxhdr *)iov[0].iov_base;
341 	bzero(hdr, hdr_sz);
342 	hdr->vrh_bufs = 1;
343 
344 	/*
345 	 * If there is any space remaining in the first buffer after writing
346 	 * the header, fill it with frame data.  The size of the header itself
347 	 * is accounted for later.
348 	 */
349 	if (iov[0].iov_len > hdr_sz) {
350 		buf = iov[0].iov_base + hdr_sz;
351 		len = iov[0].iov_len - hdr_sz;
352 
353 		size_t copy_len;
354 		copy_len = viona_copy_mblk(mp, copied, buf, len, &end);
355 		chunk += copy_len;
356 		copied += copy_len;
357 	}
358 	i = 1;
359 
360 	do {
361 		while (i < n && !end) {
362 			buf = iov[i].iov_base;
363 			len = iov[i].iov_len;
364 
365 			size_t copy_len;
366 			copy_len = viona_copy_mblk(mp, copied, buf, len, &end);
367 			chunk += copy_len;
368 			copied += copy_len;
369 			i++;
370 		}
371 
372 		uelem[buf_idx].id = cookie;
373 		uelem[buf_idx].len = chunk;
374 
375 		/*
376 		 * Try to grab another buffer from the ring if the mblk has not
377 		 * yet been entirely copied out.
378 		 */
379 		if (!end) {
380 			if (buf_idx == (VTNET_MAXSEGS - 1)) {
381 				/*
382 				 * Our arbitrary limit on the number of buffers
383 				 * to offer for merge has already been reached.
384 				 */
385 				err = EOVERFLOW;
386 				break;
387 			}
388 			if (pages != NULL) {
389 				vmm_drv_page_release_chain(pages);
390 				pages = NULL;
391 			}
392 			n = vq_popchain(ring, iov, VTNET_MAXSEGS, &cookie,
393 			    &pages);
394 			if (n <= 0) {
395 				/*
396 				 * Without more immediate space to perform the
397 				 * copying, there is little choice left but to
398 				 * drop the packet.
399 				 */
400 				err = EMSGSIZE;
401 				break;
402 			}
403 			chunk = 0;
404 			i = 0;
405 			buf_idx++;
406 			/*
407 			 * Keep the header up-to-date with the number of
408 			 * buffers, but never reference its value since the
409 			 * guest could meddle with it.
410 			 */
411 			hdr->vrh_bufs++;
412 		}
413 	} while (!end && copied < msz);
414 
415 	/* Account for the header size in the first buffer */
416 	uelem[0].len += hdr_sz;
417 
418 	/*
419 	 * If no other errors were encounted during the copy, was the expected
420 	 * amount of data transfered?
421 	 */
422 	if (err == 0 && copied != msz) {
423 		VIONA_PROBE5(too_short, viona_vring_t *, ring,
424 		    uint16_t, cookie, mblk_t *, mp, size_t, copied,
425 		    size_t, msz);
426 		VIONA_RING_STAT_INCR(ring, too_short);
427 		err = EINVAL;
428 	}
429 
430 	/* Add chksum bits, if needed */
431 	if ((features & VIRTIO_NET_F_GUEST_CSUM) != 0) {
432 		uint32_t cksum_flags;
433 
434 		if (((features & VIRTIO_NET_F_GUEST_TSO4) != 0) &&
435 		    ((DB_CKSUMFLAGS(mp) & HW_LSO) != 0)) {
436 			hdr->vrh_gso_type |= VIRTIO_NET_HDR_GSO_TCPV4;
437 			hdr->vrh_gso_size = DB_LSOMSS(mp);
438 		}
439 
440 		mac_hcksum_get((mblk_t *)mp, NULL, NULL, NULL, NULL,
441 		    &cksum_flags);
442 		if ((cksum_flags & HCK_FULLCKSUM_OK) != 0) {
443 			hdr->vrh_flags |= VIRTIO_NET_HDR_F_DATA_VALID;
444 		}
445 	}
446 
447 done:
448 	switch (err) {
449 	case 0:
450 		/* Success can fall right through to ring delivery */
451 		break;
452 
453 	case EMSGSIZE:
454 		VIONA_PROBE3(rx_merge_underrun, viona_vring_t *, ring,
455 		    uint16_t, cookie, mblk_t *, mp);
456 		VIONA_RING_STAT_INCR(ring, rx_merge_underrun);
457 		break;
458 
459 	case EOVERFLOW:
460 		VIONA_PROBE3(rx_merge_overrun, viona_vring_t *, ring,
461 		    uint16_t, cookie, mblk_t *, mp);
462 		VIONA_RING_STAT_INCR(ring, rx_merge_overrun);
463 		break;
464 
465 	default:
466 		VIONA_PROBE3(bad_rx_frame, viona_vring_t *, ring,
467 		    uint16_t, cookie, mblk_t *, mp);
468 		VIONA_RING_STAT_INCR(ring, bad_rx_frame);
469 	}
470 
471 	if (hdr_pages != NULL) {
472 		vmm_drv_page_release_chain(hdr_pages);
473 	}
474 	if (pages != NULL) {
475 		vmm_drv_page_release_chain(pages);
476 	}
477 	vq_pushchain_many(ring, buf_idx + 1, uelem);
478 	return (err);
479 }
480 
481 static void
482 viona_rx_common(viona_vring_t *ring, mblk_t *mp, boolean_t is_loopback)
483 {
484 	viona_link_t *link = ring->vr_link;
485 	mblk_t *mprx = NULL, **mprx_prevp = &mprx;
486 	mblk_t *mpdrop = NULL, **mpdrop_prevp = &mpdrop;
487 	const boolean_t do_merge =
488 	    ((link->l_features & VIRTIO_NET_F_MRG_RXBUF) != 0);
489 
490 	size_t nrx = 0, ndrop = 0;
491 
492 	while (mp != NULL) {
493 		mblk_t *next = mp->b_next;
494 		mblk_t *pad = NULL;
495 		size_t size = msgsize(mp);
496 		int err = 0;
497 
498 		mp->b_next = NULL;
499 
500 		/*
501 		 * We treat both a 'drop' response and errors the same here
502 		 * and put the packet on the drop chain.  As packets may be
503 		 * subject to different actions in ipf (which do not all
504 		 * return the same set of error values), an error processing
505 		 * one packet doesn't mean the next packet will also generate
506 		 * an error.
507 		 */
508 		if (VNETHOOK_INTERESTED_IN(link->l_neti) &&
509 		    viona_hook(link, ring, &mp, B_FALSE) != 0) {
510 			if (mp != NULL) {
511 				*mpdrop_prevp = mp;
512 				mpdrop_prevp = &mp->b_next;
513 			} else {
514 				/*
515 				 * If the hook consumer (e.g. ipf) already
516 				 * freed the mblk_t, update the drop count now.
517 				 */
518 				ndrop++;
519 			}
520 			mp = next;
521 			continue;
522 		}
523 
524 		/*
525 		 * Ethernet frames are expected to be padded out in order to
526 		 * meet the minimum size.
527 		 *
528 		 * A special case is made for frames which are short by
529 		 * VLAN_TAGSZ, having been stripped of their VLAN tag while
530 		 * traversing MAC.  A preallocated (and recycled) mblk is used
531 		 * for that specific condition.
532 		 *
533 		 * All other frames that fall short on length will have custom
534 		 * zero-padding allocated appended to them.
535 		 */
536 		if (size == NEED_VLAN_PAD_SIZE) {
537 			ASSERT(MBLKL(viona_vlan_pad_mp) == VLAN_TAGSZ);
538 			ASSERT(viona_vlan_pad_mp->b_cont == NULL);
539 
540 			for (pad = mp; pad->b_cont != NULL; pad = pad->b_cont)
541 				;
542 
543 			pad->b_cont = viona_vlan_pad_mp;
544 			size += VLAN_TAGSZ;
545 		} else if (size < MIN_BUF_SIZE) {
546 			const size_t pad_size = MIN_BUF_SIZE - size;
547 			mblk_t *zero_mp;
548 
549 			zero_mp = allocb(pad_size, BPRI_MED);
550 			if (zero_mp == NULL) {
551 				err = ENOMEM;
552 				goto pad_drop;
553 			}
554 
555 			VIONA_PROBE3(rx_pad_short, viona_vring_t *, ring,
556 			    mblk_t *, mp, size_t, pad_size);
557 			VIONA_RING_STAT_INCR(ring, rx_pad_short);
558 			zero_mp->b_wptr += pad_size;
559 			bzero(zero_mp->b_rptr, pad_size);
560 			linkb(mp, zero_mp);
561 			size += pad_size;
562 		}
563 
564 		if (do_merge) {
565 			err = viona_recv_merged(ring, mp, size);
566 		} else {
567 			err = viona_recv_plain(ring, mp, size);
568 		}
569 
570 		/*
571 		 * The VLAN padding mblk is meant for continual reuse, so
572 		 * remove it from the chain to prevent it from being freed.
573 		 *
574 		 * Custom allocated padding does not require this treatment and
575 		 * is freed normally.
576 		 */
577 		if (pad != NULL) {
578 			pad->b_cont = NULL;
579 		}
580 
581 pad_drop:
582 		/*
583 		 * While an error during rx processing
584 		 * (viona_recv_{merged,plain}) does not free mp on error,
585 		 * hook processing might or might not free mp.  Handle either
586 		 * scenario -- if mp is not yet free, it is queued up and
587 		 * freed after the guest has been notified.  If mp is
588 		 * already NULL, just proceed on.
589 		 */
590 		if (err != 0) {
591 			*mpdrop_prevp = mp;
592 			mpdrop_prevp = &mp->b_next;
593 
594 			/*
595 			 * If the available ring is empty, do not bother
596 			 * attempting to deliver any more frames.  Count the
597 			 * rest as dropped too.
598 			 */
599 			if (err == ENOSPC) {
600 				mp->b_next = next;
601 				break;
602 			}
603 		} else {
604 			/* Chain successful mblks to be freed later */
605 			*mprx_prevp = mp;
606 			mprx_prevp = &mp->b_next;
607 			nrx++;
608 		}
609 		mp = next;
610 	}
611 
612 	membar_enter();
613 	viona_intr_ring(ring, B_FALSE);
614 
615 	/* Free successfully received frames */
616 	if (mprx != NULL) {
617 		freemsgchain(mprx);
618 	}
619 
620 	/* Free dropped frames, also tallying them */
621 	mp = mpdrop;
622 	while (mp != NULL) {
623 		mblk_t *next = mp->b_next;
624 
625 		mp->b_next = NULL;
626 		freemsg(mp);
627 		mp = next;
628 		ndrop++;
629 	}
630 	VIONA_PROBE3(rx, viona_link_t *, link, size_t, nrx, size_t, ndrop);
631 }
632 
633 static void
634 viona_rx_classified(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
635     boolean_t is_loopback)
636 {
637 	viona_vring_t *ring = (viona_vring_t *)arg;
638 
639 	/* Drop traffic if ring is inactive or renewing its lease */
640 	if (ring->vr_state != VRS_RUN ||
641 	    (ring->vr_state_flags & VRSF_RENEW) != 0) {
642 		freemsgchain(mp);
643 		return;
644 	}
645 
646 	viona_rx_common(ring, mp, is_loopback);
647 }
648 
649 static void
650 viona_rx_mcast(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
651     boolean_t is_loopback)
652 {
653 	viona_vring_t *ring = (viona_vring_t *)arg;
654 	mac_handle_t mh = ring->vr_link->l_mh;
655 	mblk_t *mp_mcast_only = NULL;
656 	mblk_t **mpp = &mp_mcast_only;
657 
658 	/* Drop traffic if ring is inactive or renewing its lease */
659 	if (ring->vr_state != VRS_RUN ||
660 	    (ring->vr_state_flags & VRSF_RENEW) != 0) {
661 		freemsgchain(mp);
662 		return;
663 	}
664 
665 	/*
666 	 * In addition to multicast traffic, broadcast packets will also arrive
667 	 * via the MAC_CLIENT_PROMISC_MULTI handler. The mac_rx_set() callback
668 	 * for fully-classified traffic has already delivered that broadcast
669 	 * traffic, so it should be suppressed here, rather than duplicating it
670 	 * to the guest.
671 	 */
672 	while (mp != NULL) {
673 		mblk_t *mp_next;
674 		mac_header_info_t mhi;
675 		int err;
676 
677 		mp_next = mp->b_next;
678 		mp->b_next = NULL;
679 
680 		/* Determine the packet type */
681 		err = mac_vlan_header_info(mh, mp, &mhi);
682 		if (err != 0) {
683 			mblk_t *pull;
684 
685 			/*
686 			 * It is possible that gathering of the header
687 			 * information was impeded by a leading mblk_t which
688 			 * was of inadequate length to reference the needed
689 			 * fields.  Try again, in case that could be solved
690 			 * with a pull-up.
691 			 */
692 			pull = msgpullup(mp, sizeof (struct ether_vlan_header));
693 			if (pull == NULL) {
694 				err = ENOMEM;
695 			} else {
696 				err = mac_vlan_header_info(mh, pull, &mhi);
697 				freemsg(pull);
698 			}
699 
700 			if (err != 0) {
701 				VIONA_RING_STAT_INCR(ring, rx_mcast_check);
702 			}
703 		}
704 
705 		/* Chain up matching packets while discarding others */
706 		if (err == 0 && mhi.mhi_dsttype == MAC_ADDRTYPE_MULTICAST) {
707 			*mpp = mp;
708 			mpp = &mp->b_next;
709 		} else {
710 			freemsg(mp);
711 		}
712 
713 		mp = mp_next;
714 	}
715 
716 	if (mp_mcast_only != NULL) {
717 		viona_rx_common(ring, mp_mcast_only, is_loopback);
718 	}
719 }
720 
721 int
722 viona_rx_set(viona_link_t *link)
723 {
724 	viona_vring_t *ring = &link->l_vrings[VIONA_VQ_RX];
725 	int err;
726 
727 	mac_rx_set(link->l_mch, viona_rx_classified, ring);
728 	err = mac_promisc_add(link->l_mch, MAC_CLIENT_PROMISC_MULTI,
729 	    viona_rx_mcast, ring, &link->l_mph,
730 	    MAC_PROMISC_FLAGS_NO_TX_LOOP | MAC_PROMISC_FLAGS_VLAN_TAG_STRIP);
731 	if (err != 0) {
732 		mac_rx_clear(link->l_mch);
733 	}
734 
735 	return (err);
736 }
737 
738 void
739 viona_rx_clear(viona_link_t *link)
740 {
741 	mac_promisc_remove(link->l_mph);
742 	mac_rx_clear(link->l_mch);
743 }
744