1 /*
2 * Copyright (c) 2013 Chris Torek <torek @ torek net>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26 /*
27 * This file and its contents are supplied under the terms of the
28 * Common Development and Distribution License ("CDDL"), version 1.0.
29 * You may only use this file in accordance with the terms of version
30 * 1.0 of the CDDL.
31 *
32 * A full copy of the text of the CDDL should have accompanied this
33 * source. A copy of the CDDL is also available via the Internet at
34 * http://www.illumos.org/license/CDDL.
35 *
36 * Copyright 2015 Pluribus Networks Inc.
37 * Copyright 2019 Joyent, Inc.
38 * Copyright 2025 Oxide Computer Company
39 * Copyright 2022 Michael Zeller
40 * Copyright 2022 OmniOS Community Edition (OmniOSce) Association.
41 */
42
43 #include <sys/types.h>
44 #include <sys/strsubr.h>
45
46 #include <sys/dlpi.h>
47 #include <sys/pattr.h>
48 #include <sys/vlan.h>
49
50 #include "viona_impl.h"
51
52
53
54 #define VTNET_MAXSEGS 32
55
56 /* Min. octets in an ethernet frame minus FCS */
57 #define MIN_BUF_SIZE 60
58 #define NEED_VLAN_PAD_SIZE (MIN_BUF_SIZE - VLAN_TAGSZ)
59
60 static mblk_t *viona_vlan_pad_mp;
61
62 void
viona_rx_init(void)63 viona_rx_init(void)
64 {
65 mblk_t *mp;
66
67 ASSERT(viona_vlan_pad_mp == NULL);
68
69 /* Create mblk for padding when VLAN tags are stripped */
70 mp = allocb_wait(VLAN_TAGSZ, BPRI_HI, STR_NOSIG, NULL);
71 bzero(mp->b_rptr, VLAN_TAGSZ);
72 mp->b_wptr += VLAN_TAGSZ;
73 viona_vlan_pad_mp = mp;
74 }
75
76 void
viona_rx_fini(void)77 viona_rx_fini(void)
78 {
79 mblk_t *mp;
80
81 /* Clean up the VLAN padding mblk */
82 mp = viona_vlan_pad_mp;
83 viona_vlan_pad_mp = NULL;
84 VERIFY(mp != NULL && mp->b_cont == NULL);
85 freemsg(mp);
86 }
87
88 void
viona_worker_rx(viona_vring_t * ring,viona_link_t * link)89 viona_worker_rx(viona_vring_t *ring, viona_link_t *link)
90 {
91 (void) thread_vsetname(curthread, "viona_rx_%p", ring);
92
93 ASSERT(MUTEX_HELD(&ring->vr_lock));
94 ASSERT3U(ring->vr_state, ==, VRS_RUN);
95
96 viona_ring_disable_notify(ring);
97
98 do {
99 if (vmm_drv_lease_expired(ring->vr_lease)) {
100 /*
101 * Set the renewal flag, causing incoming traffic to be
102 * dropped, and issue an RX barrier to ensure any
103 * threads in the RX callbacks will have finished.
104 * The vr_lock cannot be held across the barrier as it
105 * poses a deadlock risk.
106 */
107 ring->vr_state_flags |= VRSF_RENEW;
108 mutex_exit(&ring->vr_lock);
109 mac_rx_barrier(link->l_mch);
110 mutex_enter(&ring->vr_lock);
111
112 if (!viona_ring_lease_renew(ring)) {
113 break;
114 }
115 ring->vr_state_flags &= ~VRSF_RENEW;
116 }
117
118 /*
119 * For now, there is little to do in the RX worker as inbound
120 * data is delivered by MAC via the RX callbacks. If tap-like
121 * functionality is added later, this would be a convenient
122 * place to inject frames into the guest.
123 */
124 (void) cv_wait_sig(&ring->vr_cv, &ring->vr_lock);
125 } while (!vring_need_bail(ring));
126
127 ring->vr_state = VRS_STOP;
128
129 /*
130 * The RX ring is stopping, before we start tearing it down it
131 * is imperative that we perform an RX barrier so that
132 * incoming packets are dropped at viona_rx_classified().
133 */
134 mutex_exit(&ring->vr_lock);
135 mac_rx_barrier(link->l_mch);
136 mutex_enter(&ring->vr_lock);
137
138 /*
139 * If we bailed while renewing the ring lease, we cannot reset
140 * USED_NO_NOTIFY, since we lack a valid mapping to do so.
141 */
142 if (ring->vr_lease != NULL) {
143 viona_ring_enable_notify(ring);
144 }
145 }
146
147 static size_t
viona_copy_mblk(const mblk_t * mp,size_t seek,caddr_t buf,size_t len,boolean_t * end)148 viona_copy_mblk(const mblk_t *mp, size_t seek, caddr_t buf, size_t len,
149 boolean_t *end)
150 {
151 size_t copied = 0;
152 size_t off = 0;
153
154 /* Seek past already-consumed data */
155 while (seek > 0 && mp != NULL) {
156 const size_t chunk = MBLKL(mp);
157
158 if (chunk > seek) {
159 off = seek;
160 break;
161 }
162 mp = mp->b_cont;
163 seek -= chunk;
164 }
165
166 while (mp != NULL) {
167 const size_t chunk = MBLKL(mp) - off;
168 const size_t to_copy = MIN(chunk, len);
169
170 bcopy(mp->b_rptr + off, buf, to_copy);
171 copied += to_copy;
172 buf += to_copy;
173 len -= to_copy;
174
175 /*
176 * If all the remaining data in the mblk_t was copied, move on
177 * to the next one in the chain. Any seek offset applied to
178 * the first mblk copy is zeroed out for subsequent operations.
179 */
180 if (chunk == to_copy) {
181 mp = mp->b_cont;
182 off = 0;
183 }
184 #ifdef DEBUG
185 else {
186 /*
187 * The only valid reason for the copy to consume less
188 * than the entire contents of the mblk_t is because
189 * the output buffer has been filled.
190 */
191 ASSERT0(len);
192 }
193 #endif
194
195 /* Go no further if the buffer has been filled */
196 if (len == 0) {
197 break;
198 }
199
200 }
201 *end = (mp == NULL);
202 return (copied);
203 }
204
205 static int
viona_recv_plain(viona_vring_t * ring,const mblk_t * mp,size_t msz)206 viona_recv_plain(viona_vring_t *ring, const mblk_t *mp, size_t msz)
207 {
208 struct iovec iov[VTNET_MAXSEGS];
209 uint16_t cookie;
210 int n;
211 const size_t hdr_sz = sizeof (struct virtio_net_hdr);
212 struct virtio_net_hdr *hdr;
213 size_t len, copied = 0;
214 caddr_t buf = NULL;
215 boolean_t end = B_FALSE;
216 const uint32_t features = ring->vr_link->l_features;
217 vmm_page_t *pages = NULL;
218
219 ASSERT(msz >= MIN_BUF_SIZE);
220
221 n = vq_popchain(ring, iov, VTNET_MAXSEGS, &cookie, &pages, NULL);
222 if (n <= 0) {
223 /* Without available buffers, the frame must be dropped. */
224 return (ENOSPC);
225 }
226 if (iov[0].iov_len < hdr_sz) {
227 /*
228 * There is little to do if there is not even space available
229 * for the sole header. Zero the buffer and bail out as a last
230 * act of desperation.
231 */
232 bzero(iov[0].iov_base, iov[0].iov_len);
233 goto bad_frame;
234 }
235
236 /* Grab the address of the header before anything else */
237 hdr = (struct virtio_net_hdr *)iov[0].iov_base;
238
239 /*
240 * If there is any space remaining in the first buffer after writing
241 * the header, fill it with frame data.
242 */
243 if (iov[0].iov_len > hdr_sz) {
244 buf = (caddr_t)iov[0].iov_base + hdr_sz;
245 len = iov[0].iov_len - hdr_sz;
246
247 copied += viona_copy_mblk(mp, copied, buf, len, &end);
248 }
249
250 /* Copy any remaining data into subsequent buffers, if present */
251 for (int i = 1; i < n && !end; i++) {
252 buf = (caddr_t)iov[i].iov_base;
253 len = iov[i].iov_len;
254
255 copied += viona_copy_mblk(mp, copied, buf, len, &end);
256 }
257
258 /* Was the expected amount of data copied? */
259 if (copied != msz) {
260 VIONA_PROBE5(too_short, viona_vring_t *, ring,
261 uint16_t, cookie, mblk_t *, mp, size_t, copied,
262 size_t, msz);
263 VIONA_RING_STAT_INCR(ring, too_short);
264 goto bad_frame;
265 }
266
267 /* Populate (read: zero) the header and account for it in the size */
268 bzero(hdr, hdr_sz);
269 copied += hdr_sz;
270
271 /* Add chksum bits, if needed */
272 if ((features & VIRTIO_NET_F_GUEST_CSUM) != 0) {
273 uint32_t cksum_flags;
274
275 if (((features & VIRTIO_NET_F_GUEST_TSO4) != 0) &&
276 ((DB_CKSUMFLAGS(mp) & HW_LSO) != 0)) {
277 hdr->vrh_gso_type |= VIRTIO_NET_HDR_GSO_TCPV4;
278 hdr->vrh_gso_size = DB_LSOMSS(mp);
279 }
280
281 mac_hcksum_get((mblk_t *)mp, NULL, NULL, NULL, NULL,
282 &cksum_flags);
283 if ((cksum_flags & HCK_FULLCKSUM_OK) != 0) {
284 hdr->vrh_flags |= VIRTIO_NET_HDR_F_DATA_VALID;
285 }
286 }
287
288 /* Release this chain */
289 vmm_drv_page_release_chain(pages);
290 vq_pushchain(ring, copied, cookie);
291 return (0);
292
293 bad_frame:
294 VIONA_PROBE3(bad_rx_frame, viona_vring_t *, ring, uint16_t, cookie,
295 mblk_t *, mp);
296 VIONA_RING_STAT_INCR(ring, bad_rx_frame);
297
298 vmm_drv_page_release_chain(pages);
299 vq_pushchain(ring, MAX(copied, MIN_BUF_SIZE + hdr_sz), cookie);
300 return (EINVAL);
301 }
302
303 static int
viona_recv_merged(viona_vring_t * ring,const mblk_t * mp,size_t msz)304 viona_recv_merged(viona_vring_t *ring, const mblk_t *mp, size_t msz)
305 {
306 struct iovec iov[VTNET_MAXSEGS];
307 used_elem_t uelem[VTNET_MAXSEGS];
308 vmm_page_t *pages = NULL, *hdr_pages = NULL;
309 int n, i = 0, buf_idx = 0, err = 0;
310 uint16_t cookie;
311 caddr_t buf;
312 size_t len, copied = 0, chunk = 0;
313 struct virtio_net_mrgrxhdr *hdr = NULL;
314 const size_t hdr_sz = sizeof (struct virtio_net_mrgrxhdr);
315 boolean_t end = B_FALSE;
316 const uint32_t features = ring->vr_link->l_features;
317
318 ASSERT(msz >= MIN_BUF_SIZE);
319
320 n = vq_popchain(ring, iov, VTNET_MAXSEGS, &cookie, &hdr_pages, NULL);
321 if (n <= 0) {
322 /* Without available buffers, the frame must be dropped. */
323 VIONA_PROBE2(no_space, viona_vring_t *, ring, mblk_t *, mp);
324 VIONA_RING_STAT_INCR(ring, no_space);
325 return (ENOSPC);
326 }
327 if (iov[0].iov_len < hdr_sz) {
328 /*
329 * There is little to do if there is not even space available
330 * for the sole header. Zero the buffer and bail out as a last
331 * act of desperation.
332 */
333 bzero(iov[0].iov_base, iov[0].iov_len);
334 uelem[0].id = cookie;
335 uelem[0].len = iov[0].iov_len;
336 err = EINVAL;
337 goto done;
338 }
339
340 /* Grab the address of the header and do initial population */
341 hdr = (struct virtio_net_mrgrxhdr *)iov[0].iov_base;
342 bzero(hdr, hdr_sz);
343 hdr->vrh_bufs = 1;
344
345 /*
346 * If there is any space remaining in the first buffer after writing
347 * the header, fill it with frame data. The size of the header itself
348 * is accounted for later.
349 */
350 if (iov[0].iov_len > hdr_sz) {
351 buf = iov[0].iov_base + hdr_sz;
352 len = iov[0].iov_len - hdr_sz;
353
354 size_t copy_len;
355 copy_len = viona_copy_mblk(mp, copied, buf, len, &end);
356 chunk += copy_len;
357 copied += copy_len;
358 }
359 i = 1;
360
361 do {
362 while (i < n && !end) {
363 buf = iov[i].iov_base;
364 len = iov[i].iov_len;
365
366 size_t copy_len;
367 copy_len = viona_copy_mblk(mp, copied, buf, len, &end);
368 chunk += copy_len;
369 copied += copy_len;
370 i++;
371 }
372
373 uelem[buf_idx].id = cookie;
374 uelem[buf_idx].len = chunk;
375
376 /*
377 * Try to grab another buffer from the ring if the mblk has not
378 * yet been entirely copied out.
379 */
380 if (!end) {
381 if (buf_idx == (VTNET_MAXSEGS - 1)) {
382 /*
383 * Our arbitrary limit on the number of buffers
384 * to offer for merge has already been reached.
385 */
386 err = EOVERFLOW;
387 break;
388 }
389 if (pages != NULL) {
390 vmm_drv_page_release_chain(pages);
391 pages = NULL;
392 }
393 n = vq_popchain(ring, iov, VTNET_MAXSEGS, &cookie,
394 &pages, NULL);
395 if (n <= 0) {
396 /*
397 * Without more immediate space to perform the
398 * copying, there is little choice left but to
399 * drop the packet.
400 */
401 err = EMSGSIZE;
402 break;
403 }
404 chunk = 0;
405 i = 0;
406 buf_idx++;
407 /*
408 * Keep the header up-to-date with the number of
409 * buffers, but never reference its value since the
410 * guest could meddle with it.
411 */
412 hdr->vrh_bufs++;
413 }
414 } while (!end && copied < msz);
415
416 /* Account for the header size in the first buffer */
417 uelem[0].len += hdr_sz;
418
419 /*
420 * If no other errors were encounted during the copy, was the expected
421 * amount of data transferred?
422 */
423 if (err == 0 && copied != msz) {
424 VIONA_PROBE5(too_short, viona_vring_t *, ring,
425 uint16_t, cookie, mblk_t *, mp, size_t, copied,
426 size_t, msz);
427 VIONA_RING_STAT_INCR(ring, too_short);
428 err = EINVAL;
429 }
430
431 /* Add chksum bits, if needed */
432 if ((features & VIRTIO_NET_F_GUEST_CSUM) != 0) {
433 uint32_t cksum_flags;
434
435 if (((features & VIRTIO_NET_F_GUEST_TSO4) != 0) &&
436 ((DB_CKSUMFLAGS(mp) & HW_LSO) != 0)) {
437 hdr->vrh_gso_type |= VIRTIO_NET_HDR_GSO_TCPV4;
438 hdr->vrh_gso_size = DB_LSOMSS(mp);
439 }
440
441 mac_hcksum_get((mblk_t *)mp, NULL, NULL, NULL, NULL,
442 &cksum_flags);
443 if ((cksum_flags & HCK_FULLCKSUM_OK) != 0) {
444 hdr->vrh_flags |= VIRTIO_NET_HDR_F_DATA_VALID;
445 }
446 }
447
448 done:
449 switch (err) {
450 case 0:
451 /* Success can fall right through to ring delivery */
452 break;
453
454 case EMSGSIZE:
455 VIONA_PROBE3(rx_merge_underrun, viona_vring_t *, ring,
456 uint16_t, cookie, mblk_t *, mp);
457 VIONA_RING_STAT_INCR(ring, rx_merge_underrun);
458 break;
459
460 case EOVERFLOW:
461 VIONA_PROBE3(rx_merge_overrun, viona_vring_t *, ring,
462 uint16_t, cookie, mblk_t *, mp);
463 VIONA_RING_STAT_INCR(ring, rx_merge_overrun);
464 break;
465
466 default:
467 VIONA_PROBE3(bad_rx_frame, viona_vring_t *, ring,
468 uint16_t, cookie, mblk_t *, mp);
469 VIONA_RING_STAT_INCR(ring, bad_rx_frame);
470 }
471
472 if (hdr_pages != NULL) {
473 vmm_drv_page_release_chain(hdr_pages);
474 }
475 if (pages != NULL) {
476 vmm_drv_page_release_chain(pages);
477 }
478 vq_pushchain_many(ring, buf_idx + 1, uelem);
479 return (err);
480 }
481
482 static void
viona_rx_common(viona_vring_t * ring,mblk_t * mp,boolean_t is_loopback)483 viona_rx_common(viona_vring_t *ring, mblk_t *mp, boolean_t is_loopback)
484 {
485 viona_link_t *link = ring->vr_link;
486 mblk_t *mprx = NULL, **mprx_prevp = &mprx;
487 mblk_t *mpdrop = NULL, **mpdrop_prevp = &mpdrop;
488 const boolean_t do_merge =
489 (link->l_features & VIRTIO_NET_F_MRG_RXBUF) != 0;
490 const boolean_t allow_gro =
491 (link->l_features & VIRTIO_NET_F_GUEST_TSO4) != 0;
492
493 size_t cnt_accept = 0, size_accept = 0, cnt_drop = 0;
494
495 while (mp != NULL) {
496 mblk_t *next = mp->b_next;
497 mblk_t *pad = NULL;
498 size_t size = msgsize(mp);
499 int err = 0;
500
501 mp->b_next = NULL;
502
503 /*
504 * We treat both a 'drop' response and errors the same here
505 * and put the packet on the drop chain. As packets may be
506 * subject to different actions in ipf (which do not all
507 * return the same set of error values), an error processing
508 * one packet doesn't mean the next packet will also generate
509 * an error.
510 */
511 if (VNETHOOK_INTERESTED_IN(link->l_neti) &&
512 viona_hook(link, ring, &mp, B_FALSE) != 0) {
513 if (mp != NULL) {
514 *mpdrop_prevp = mp;
515 mpdrop_prevp = &mp->b_next;
516 } else {
517 /*
518 * If the hook consumer (e.g. ipf) already
519 * freed the mblk_t, update the drop count now.
520 */
521 cnt_drop++;
522 }
523 mp = next;
524 continue;
525 }
526
527 /*
528 * Virtio devices are prohibited from passing on packets larger
529 * than the MTU + Eth if the guest has not negotiated GRO flags
530 * (e.g., GUEST_TSO*). This occurs irrespective of `do_merge`.
531 */
532 if (size > sizeof (struct ether_header) + link->l_mtu) {
533 const boolean_t can_emu_lso = DB_LSOMSS(mp) != 0;
534 const boolean_t attempt_emu =
535 !allow_gro || size > VIONA_GRO_MAX_PACKET_SIZE;
536
537 if ((DB_CKSUMFLAGS(mp) & HW_LSO) == 0 ||
538 (attempt_emu && !can_emu_lso)) {
539 VIONA_PROBE3(rx_drop_over_mtu, viona_vring_t *,
540 ring, mblk_t *, mp, size_t, size);
541 VIONA_RING_STAT_INCR(ring, rx_drop_over_mtu);
542 err = E2BIG;
543 goto pad_drop;
544 }
545
546 /*
547 * If the packet has come from another device or viona
548 * which expected to make use of LSO, we can split the
549 * packet on its behalf.
550 */
551 if (attempt_emu) {
552 mblk_t *tail = NULL;
553 uint_t n_pkts = 0;
554
555 /*
556 * Emulation of LSO requires that cksum offload
557 * be enabled on the mblk. Since only IPv4 is
558 * supported by the LSO emulation, the v4 cksum
559 * is enabled unconditionally.
560 */
561 if ((DB_CKSUMFLAGS(mp) &
562 (HCK_FULLCKSUM | HCK_PARTIALCKSUM)) == 0) {
563 DB_CKSUMFLAGS(mp) |= HCK_FULLCKSUM;
564 }
565 DB_CKSUMFLAGS(mp) |= HCK_IPV4_HDRCKSUM;
566
567 mac_hw_emul(&mp, &tail, &n_pkts, MAC_ALL_EMULS);
568 if (mp == NULL) {
569 VIONA_RING_STAT_INCR(ring,
570 rx_gro_fallback_fail);
571 viona_ring_stat_error(ring);
572 mp = next;
573 continue;
574 }
575 VIONA_PROBE4(rx_gro_fallback, viona_vring_t *,
576 ring, mblk_t *, mp, size_t, size,
577 uint_t, n_pkts);
578 VIONA_RING_STAT_INCR(ring, rx_gro_fallback);
579 ASSERT3P(tail, !=, NULL);
580 if (tail != mp) {
581 tail->b_next = next;
582 next = mp->b_next;
583 mp->b_next = NULL;
584 }
585 size = msgsize(mp);
586 }
587 }
588
589 /*
590 * Ethernet frames are expected to be padded out in order to
591 * meet the minimum size.
592 *
593 * A special case is made for frames which are short by
594 * VLAN_TAGSZ, having been stripped of their VLAN tag while
595 * traversing MAC. A preallocated (and recycled) mblk is used
596 * for that specific condition.
597 *
598 * All other frames that fall short on length will have custom
599 * zero-padding allocated appended to them.
600 */
601 if (size == NEED_VLAN_PAD_SIZE) {
602 ASSERT(MBLKL(viona_vlan_pad_mp) == VLAN_TAGSZ);
603 ASSERT(viona_vlan_pad_mp->b_cont == NULL);
604
605 for (pad = mp; pad->b_cont != NULL; pad = pad->b_cont)
606 ;
607
608 pad->b_cont = viona_vlan_pad_mp;
609 size += VLAN_TAGSZ;
610 } else if (size < MIN_BUF_SIZE) {
611 const size_t pad_size = MIN_BUF_SIZE - size;
612 mblk_t *zero_mp;
613
614 zero_mp = allocb(pad_size, BPRI_MED);
615 if (zero_mp == NULL) {
616 err = ENOMEM;
617 goto pad_drop;
618 }
619
620 VIONA_PROBE3(rx_pad_short, viona_vring_t *, ring,
621 mblk_t *, mp, size_t, pad_size);
622 VIONA_RING_STAT_INCR(ring, rx_pad_short);
623 zero_mp->b_wptr += pad_size;
624 bzero(zero_mp->b_rptr, pad_size);
625 linkb(mp, zero_mp);
626 size += pad_size;
627 }
628
629 if (do_merge) {
630 err = viona_recv_merged(ring, mp, size);
631 } else {
632 err = viona_recv_plain(ring, mp, size);
633 }
634
635 /*
636 * The VLAN padding mblk is meant for continual reuse, so
637 * remove it from the chain to prevent it from being freed.
638 *
639 * Custom allocated padding does not require this treatment and
640 * is freed normally.
641 */
642 if (pad != NULL) {
643 pad->b_cont = NULL;
644 }
645
646 pad_drop:
647 /*
648 * While an error during rx processing
649 * (viona_recv_{merged,plain}) does not free mp on error,
650 * hook processing might or might not free mp. Handle either
651 * scenario -- if mp is not yet free, it is queued up and
652 * freed after the guest has been notified. If mp is
653 * already NULL, just proceed on.
654 */
655 if (err != 0) {
656 *mpdrop_prevp = mp;
657 mpdrop_prevp = &mp->b_next;
658
659 /*
660 * If the available ring is empty, do not bother
661 * attempting to deliver any more frames. Count the
662 * rest as dropped too.
663 */
664 if (err == ENOSPC) {
665 mp->b_next = next;
666 break;
667 } else {
668 /*
669 * Cases other than the ring being empty of
670 * available descriptors count as errors for the
671 * ring/link stats.
672 */
673 viona_ring_stat_error(ring);
674 }
675 } else {
676 /* Chain successful mblks to be freed later */
677 *mprx_prevp = mp;
678 mprx_prevp = &mp->b_next;
679 cnt_accept++;
680 size_accept += size;
681
682 VIONA_PROBE3(pkt__rx, viona_vring_t *, ring, mblk_t, mp,
683 size_t, size)
684 }
685 mp = next;
686 }
687
688 membar_enter();
689 viona_intr_ring(ring, B_FALSE);
690
691 /* Free successfully received frames */
692 if (mprx != NULL) {
693 freemsgchain(mprx);
694 }
695
696 /* Free dropped frames, also tallying them */
697 mp = mpdrop;
698 while (mp != NULL) {
699 mblk_t *next = mp->b_next;
700
701 mp->b_next = NULL;
702 freemsg(mp);
703 mp = next;
704 cnt_drop++;
705 }
706
707 if (cnt_accept != 0) {
708 viona_ring_stat_accept(ring, cnt_accept, size_accept);
709 }
710 if (cnt_drop != 0) {
711 viona_ring_stat_drop(ring, cnt_drop);
712 }
713 VIONA_PROBE3(rx, viona_link_t *, link, size_t, cnt_accept,
714 size_t, cnt_drop);
715 }
716
717 static void
viona_rx_classified(void * arg,mac_resource_handle_t mrh,mblk_t * mp,boolean_t is_loopback)718 viona_rx_classified(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
719 boolean_t is_loopback)
720 {
721 viona_vring_t *ring = (viona_vring_t *)arg;
722
723 /* Drop traffic if ring is inactive or renewing its lease */
724 if (ring->vr_state != VRS_RUN ||
725 (ring->vr_state_flags & VRSF_RENEW) != 0) {
726 freemsgchain(mp);
727 return;
728 }
729
730 viona_rx_common(ring, mp, is_loopback);
731 }
732
733 static void
viona_rx_mcast(void * arg,mac_resource_handle_t mrh,mblk_t * mp,boolean_t is_loopback)734 viona_rx_mcast(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
735 boolean_t is_loopback)
736 {
737 viona_vring_t *ring = (viona_vring_t *)arg;
738 mac_handle_t mh = ring->vr_link->l_mh;
739 mblk_t *mp_mcast_only = NULL;
740 mblk_t **mpp = &mp_mcast_only;
741
742 /* Drop traffic if ring is inactive or renewing its lease */
743 if (ring->vr_state != VRS_RUN ||
744 (ring->vr_state_flags & VRSF_RENEW) != 0) {
745 freemsgchain(mp);
746 return;
747 }
748
749 /*
750 * In addition to multicast traffic, broadcast packets will also arrive
751 * via the MAC_CLIENT_PROMISC_MULTI handler. The mac_rx_set() callback
752 * for fully-classified traffic has already delivered that broadcast
753 * traffic, so it should be suppressed here, rather than duplicating it
754 * to the guest.
755 */
756 while (mp != NULL) {
757 mblk_t *mp_next;
758 mac_header_info_t mhi;
759 int err;
760
761 mp_next = mp->b_next;
762 mp->b_next = NULL;
763
764 /* Determine the packet type */
765 err = mac_vlan_header_info(mh, mp, &mhi);
766 if (err != 0) {
767 mblk_t *pull;
768
769 /*
770 * It is possible that gathering of the header
771 * information was impeded by a leading mblk_t which
772 * was of inadequate length to reference the needed
773 * fields. Try again, in case that could be solved
774 * with a pull-up.
775 */
776 pull = msgpullup(mp, sizeof (struct ether_vlan_header));
777 if (pull == NULL) {
778 err = ENOMEM;
779 } else {
780 err = mac_vlan_header_info(mh, pull, &mhi);
781 freemsg(pull);
782 }
783
784 if (err != 0) {
785 VIONA_RING_STAT_INCR(ring, rx_mcast_check);
786 }
787 }
788
789 /* Chain up matching packets while discarding others */
790 if (err == 0 && mhi.mhi_dsttype == MAC_ADDRTYPE_MULTICAST) {
791 *mpp = mp;
792 mpp = &mp->b_next;
793 } else {
794 freemsg(mp);
795 }
796
797 mp = mp_next;
798 }
799
800 if (mp_mcast_only != NULL) {
801 viona_rx_common(ring, mp_mcast_only, is_loopback);
802 }
803 }
804
805 int
viona_rx_set(viona_link_t * link,viona_promisc_t mode)806 viona_rx_set(viona_link_t *link, viona_promisc_t mode)
807 {
808 viona_vring_t *ring = &link->l_vrings[VIONA_VQ_RX];
809 int err = 0;
810
811 if (link->l_mph != NULL) {
812 mac_promisc_remove(link->l_mph);
813 link->l_mph = NULL;
814 }
815
816 switch (mode) {
817 case VIONA_PROMISC_MULTI:
818 mac_rx_set(link->l_mch, viona_rx_classified, ring);
819 err = mac_promisc_add(link->l_mch, MAC_CLIENT_PROMISC_MULTI,
820 viona_rx_mcast, ring, &link->l_mph,
821 MAC_PROMISC_FLAGS_NO_TX_LOOP |
822 MAC_PROMISC_FLAGS_VLAN_TAG_STRIP);
823 break;
824 case VIONA_PROMISC_ALL:
825 mac_rx_clear(link->l_mch);
826 err = mac_promisc_add(link->l_mch, MAC_CLIENT_PROMISC_ALL,
827 viona_rx_classified, ring, &link->l_mph,
828 MAC_PROMISC_FLAGS_NO_TX_LOOP |
829 MAC_PROMISC_FLAGS_VLAN_TAG_STRIP);
830 /*
831 * In case adding the promisc handler failed, restore the
832 * generic classified callback so that packets continue to
833 * flow to the guest.
834 */
835 if (err != 0) {
836 mac_rx_set(link->l_mch, viona_rx_classified, ring);
837 }
838 break;
839 case VIONA_PROMISC_NONE:
840 default:
841 mac_rx_set(link->l_mch, viona_rx_classified, ring);
842 break;
843 }
844
845 return (err);
846 }
847
848 void
viona_rx_clear(viona_link_t * link)849 viona_rx_clear(viona_link_t *link)
850 {
851 if (link->l_mph != NULL) {
852 mac_promisc_remove(link->l_mph);
853 link->l_mph = NULL;
854 }
855 mac_rx_clear(link->l_mch);
856 }
857