1 /* 2 * Copyright (c) 2013 Chris Torek <torek @ torek net> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 /* 27 * This file and its contents are supplied under the terms of the 28 * Common Development and Distribution License ("CDDL"), version 1.0. 29 * You may only use this file in accordance with the terms of version 30 * 1.0 of the CDDL. 31 * 32 * A full copy of the text of the CDDL should have accompanied this 33 * source. A copy of the CDDL is also available via the Internet at 34 * http://www.illumos.org/license/CDDL. 35 * 36 * Copyright 2015 Pluribus Networks Inc. 37 * Copyright 2019 Joyent, Inc. 38 * Copyright 2025 Oxide Computer Company 39 * Copyright 2022 Michael Zeller 40 * Copyright 2022 OmniOS Community Edition (OmniOSce) Association. 41 */ 42 43 #include <sys/types.h> 44 #include <sys/strsubr.h> 45 46 #include <sys/dlpi.h> 47 #include <sys/pattr.h> 48 #include <sys/vlan.h> 49 50 #include "viona_impl.h" 51 52 53 54 #define VTNET_MAXSEGS 32 55 56 /* Min. octets in an ethernet frame minus FCS */ 57 #define MIN_BUF_SIZE 60 58 #define NEED_VLAN_PAD_SIZE (MIN_BUF_SIZE - VLAN_TAGSZ) 59 60 static mblk_t *viona_vlan_pad_mp; 61 62 void 63 viona_rx_init(void) 64 { 65 mblk_t *mp; 66 67 ASSERT(viona_vlan_pad_mp == NULL); 68 69 /* Create mblk for padding when VLAN tags are stripped */ 70 mp = allocb_wait(VLAN_TAGSZ, BPRI_HI, STR_NOSIG, NULL); 71 bzero(mp->b_rptr, VLAN_TAGSZ); 72 mp->b_wptr += VLAN_TAGSZ; 73 viona_vlan_pad_mp = mp; 74 } 75 76 void 77 viona_rx_fini(void) 78 { 79 mblk_t *mp; 80 81 /* Clean up the VLAN padding mblk */ 82 mp = viona_vlan_pad_mp; 83 viona_vlan_pad_mp = NULL; 84 VERIFY(mp != NULL && mp->b_cont == NULL); 85 freemsg(mp); 86 } 87 88 void 89 viona_worker_rx(viona_vring_t *ring, viona_link_t *link) 90 { 91 (void) thread_vsetname(curthread, "viona_rx_%p", ring); 92 93 ASSERT(MUTEX_HELD(&ring->vr_lock)); 94 ASSERT3U(ring->vr_state, ==, VRS_RUN); 95 96 viona_ring_disable_notify(ring); 97 98 do { 99 if (vmm_drv_lease_expired(ring->vr_lease)) { 100 /* 101 * Set the renewal flag, causing incoming traffic to be 102 * dropped, and issue an RX barrier to ensure any 103 * threads in the RX callbacks will have finished. 104 * The vr_lock cannot be held across the barrier as it 105 * poses a deadlock risk. 106 */ 107 ring->vr_state_flags |= VRSF_RENEW; 108 mutex_exit(&ring->vr_lock); 109 mac_rx_barrier(link->l_mch); 110 mutex_enter(&ring->vr_lock); 111 112 if (!viona_ring_lease_renew(ring)) { 113 break; 114 } 115 ring->vr_state_flags &= ~VRSF_RENEW; 116 } 117 118 /* 119 * For now, there is little to do in the RX worker as inbound 120 * data is delivered by MAC via the RX callbacks. If tap-like 121 * functionality is added later, this would be a convenient 122 * place to inject frames into the guest. 123 */ 124 (void) cv_wait_sig(&ring->vr_cv, &ring->vr_lock); 125 } while (!vring_need_bail(ring)); 126 127 ring->vr_state = VRS_STOP; 128 129 /* 130 * The RX ring is stopping, before we start tearing it down it 131 * is imperative that we perform an RX barrier so that 132 * incoming packets are dropped at viona_rx_classified(). 133 */ 134 mutex_exit(&ring->vr_lock); 135 mac_rx_barrier(link->l_mch); 136 mutex_enter(&ring->vr_lock); 137 138 /* 139 * If we bailed while renewing the ring lease, we cannot reset 140 * USED_NO_NOTIFY, since we lack a valid mapping to do so. 141 */ 142 if (ring->vr_lease != NULL) { 143 viona_ring_enable_notify(ring); 144 } 145 } 146 147 static size_t 148 viona_copy_mblk(const mblk_t *mp, size_t seek, caddr_t buf, size_t len, 149 boolean_t *end) 150 { 151 size_t copied = 0; 152 size_t off = 0; 153 154 /* Seek past already-consumed data */ 155 while (seek > 0 && mp != NULL) { 156 const size_t chunk = MBLKL(mp); 157 158 if (chunk > seek) { 159 off = seek; 160 break; 161 } 162 mp = mp->b_cont; 163 seek -= chunk; 164 } 165 166 while (mp != NULL) { 167 const size_t chunk = MBLKL(mp) - off; 168 const size_t to_copy = MIN(chunk, len); 169 170 bcopy(mp->b_rptr + off, buf, to_copy); 171 copied += to_copy; 172 buf += to_copy; 173 len -= to_copy; 174 175 /* 176 * If all the remaining data in the mblk_t was copied, move on 177 * to the next one in the chain. Any seek offset applied to 178 * the first mblk copy is zeroed out for subsequent operations. 179 */ 180 if (chunk == to_copy) { 181 mp = mp->b_cont; 182 off = 0; 183 } 184 #ifdef DEBUG 185 else { 186 /* 187 * The only valid reason for the copy to consume less 188 * than the entire contents of the mblk_t is because 189 * the output buffer has been filled. 190 */ 191 ASSERT0(len); 192 } 193 #endif 194 195 /* Go no further if the buffer has been filled */ 196 if (len == 0) { 197 break; 198 } 199 200 } 201 *end = (mp == NULL); 202 return (copied); 203 } 204 205 static int 206 viona_recv_plain(viona_vring_t *ring, const mblk_t *mp, size_t msz) 207 { 208 struct iovec iov[VTNET_MAXSEGS]; 209 uint16_t cookie; 210 int n; 211 const size_t hdr_sz = sizeof (struct virtio_net_hdr); 212 struct virtio_net_hdr *hdr; 213 size_t len, copied = 0; 214 caddr_t buf = NULL; 215 boolean_t end = B_FALSE; 216 const uint32_t features = ring->vr_link->l_features; 217 vmm_page_t *pages = NULL; 218 219 ASSERT(msz >= MIN_BUF_SIZE); 220 221 n = vq_popchain(ring, iov, VTNET_MAXSEGS, &cookie, &pages, NULL); 222 if (n <= 0) { 223 /* Without available buffers, the frame must be dropped. */ 224 return (ENOSPC); 225 } 226 if (iov[0].iov_len < hdr_sz) { 227 /* 228 * There is little to do if there is not even space available 229 * for the sole header. Zero the buffer and bail out as a last 230 * act of desperation. 231 */ 232 bzero(iov[0].iov_base, iov[0].iov_len); 233 goto bad_frame; 234 } 235 236 /* Grab the address of the header before anything else */ 237 hdr = (struct virtio_net_hdr *)iov[0].iov_base; 238 239 /* 240 * If there is any space remaining in the first buffer after writing 241 * the header, fill it with frame data. 242 */ 243 if (iov[0].iov_len > hdr_sz) { 244 buf = (caddr_t)iov[0].iov_base + hdr_sz; 245 len = iov[0].iov_len - hdr_sz; 246 247 copied += viona_copy_mblk(mp, copied, buf, len, &end); 248 } 249 250 /* Copy any remaining data into subsequent buffers, if present */ 251 for (int i = 1; i < n && !end; i++) { 252 buf = (caddr_t)iov[i].iov_base; 253 len = iov[i].iov_len; 254 255 copied += viona_copy_mblk(mp, copied, buf, len, &end); 256 } 257 258 /* Was the expected amount of data copied? */ 259 if (copied != msz) { 260 VIONA_PROBE5(too_short, viona_vring_t *, ring, 261 uint16_t, cookie, mblk_t *, mp, size_t, copied, 262 size_t, msz); 263 VIONA_RING_STAT_INCR(ring, too_short); 264 goto bad_frame; 265 } 266 267 /* Populate (read: zero) the header and account for it in the size */ 268 bzero(hdr, hdr_sz); 269 copied += hdr_sz; 270 271 /* Add chksum bits, if needed */ 272 if ((features & VIRTIO_NET_F_GUEST_CSUM) != 0) { 273 uint32_t cksum_flags; 274 275 if (((features & VIRTIO_NET_F_GUEST_TSO4) != 0) && 276 ((DB_CKSUMFLAGS(mp) & HW_LSO) != 0)) { 277 hdr->vrh_gso_type |= VIRTIO_NET_HDR_GSO_TCPV4; 278 hdr->vrh_gso_size = DB_LSOMSS(mp); 279 } 280 281 mac_hcksum_get((mblk_t *)mp, NULL, NULL, NULL, NULL, 282 &cksum_flags); 283 if ((cksum_flags & HCK_FULLCKSUM_OK) != 0) { 284 hdr->vrh_flags |= VIRTIO_NET_HDR_F_DATA_VALID; 285 } 286 } 287 288 /* Release this chain */ 289 vmm_drv_page_release_chain(pages); 290 vq_pushchain(ring, copied, cookie); 291 return (0); 292 293 bad_frame: 294 VIONA_PROBE3(bad_rx_frame, viona_vring_t *, ring, uint16_t, cookie, 295 mblk_t *, mp); 296 VIONA_RING_STAT_INCR(ring, bad_rx_frame); 297 298 vmm_drv_page_release_chain(pages); 299 vq_pushchain(ring, MAX(copied, MIN_BUF_SIZE + hdr_sz), cookie); 300 return (EINVAL); 301 } 302 303 static int 304 viona_recv_merged(viona_vring_t *ring, const mblk_t *mp, size_t msz) 305 { 306 struct iovec iov[VTNET_MAXSEGS]; 307 used_elem_t uelem[VTNET_MAXSEGS]; 308 vmm_page_t *pages = NULL, *hdr_pages = NULL; 309 int n, i = 0, buf_idx = 0, err = 0; 310 uint16_t cookie; 311 caddr_t buf; 312 size_t len, copied = 0, chunk = 0; 313 struct virtio_net_mrgrxhdr *hdr = NULL; 314 const size_t hdr_sz = sizeof (struct virtio_net_mrgrxhdr); 315 boolean_t end = B_FALSE; 316 const uint32_t features = ring->vr_link->l_features; 317 318 ASSERT(msz >= MIN_BUF_SIZE); 319 320 n = vq_popchain(ring, iov, VTNET_MAXSEGS, &cookie, &hdr_pages, NULL); 321 if (n <= 0) { 322 /* Without available buffers, the frame must be dropped. */ 323 VIONA_PROBE2(no_space, viona_vring_t *, ring, mblk_t *, mp); 324 VIONA_RING_STAT_INCR(ring, no_space); 325 return (ENOSPC); 326 } 327 if (iov[0].iov_len < hdr_sz) { 328 /* 329 * There is little to do if there is not even space available 330 * for the sole header. Zero the buffer and bail out as a last 331 * act of desperation. 332 */ 333 bzero(iov[0].iov_base, iov[0].iov_len); 334 uelem[0].id = cookie; 335 uelem[0].len = iov[0].iov_len; 336 err = EINVAL; 337 goto done; 338 } 339 340 /* Grab the address of the header and do initial population */ 341 hdr = (struct virtio_net_mrgrxhdr *)iov[0].iov_base; 342 bzero(hdr, hdr_sz); 343 hdr->vrh_bufs = 1; 344 345 /* 346 * If there is any space remaining in the first buffer after writing 347 * the header, fill it with frame data. The size of the header itself 348 * is accounted for later. 349 */ 350 if (iov[0].iov_len > hdr_sz) { 351 buf = iov[0].iov_base + hdr_sz; 352 len = iov[0].iov_len - hdr_sz; 353 354 size_t copy_len; 355 copy_len = viona_copy_mblk(mp, copied, buf, len, &end); 356 chunk += copy_len; 357 copied += copy_len; 358 } 359 i = 1; 360 361 do { 362 while (i < n && !end) { 363 buf = iov[i].iov_base; 364 len = iov[i].iov_len; 365 366 size_t copy_len; 367 copy_len = viona_copy_mblk(mp, copied, buf, len, &end); 368 chunk += copy_len; 369 copied += copy_len; 370 i++; 371 } 372 373 uelem[buf_idx].id = cookie; 374 uelem[buf_idx].len = chunk; 375 376 /* 377 * Try to grab another buffer from the ring if the mblk has not 378 * yet been entirely copied out. 379 */ 380 if (!end) { 381 if (buf_idx == (VTNET_MAXSEGS - 1)) { 382 /* 383 * Our arbitrary limit on the number of buffers 384 * to offer for merge has already been reached. 385 */ 386 err = EOVERFLOW; 387 break; 388 } 389 if (pages != NULL) { 390 vmm_drv_page_release_chain(pages); 391 pages = NULL; 392 } 393 n = vq_popchain(ring, iov, VTNET_MAXSEGS, &cookie, 394 &pages, NULL); 395 if (n <= 0) { 396 /* 397 * Without more immediate space to perform the 398 * copying, there is little choice left but to 399 * drop the packet. 400 */ 401 err = EMSGSIZE; 402 break; 403 } 404 chunk = 0; 405 i = 0; 406 buf_idx++; 407 /* 408 * Keep the header up-to-date with the number of 409 * buffers, but never reference its value since the 410 * guest could meddle with it. 411 */ 412 hdr->vrh_bufs++; 413 } 414 } while (!end && copied < msz); 415 416 /* Account for the header size in the first buffer */ 417 uelem[0].len += hdr_sz; 418 419 /* 420 * If no other errors were encounted during the copy, was the expected 421 * amount of data transferred? 422 */ 423 if (err == 0 && copied != msz) { 424 VIONA_PROBE5(too_short, viona_vring_t *, ring, 425 uint16_t, cookie, mblk_t *, mp, size_t, copied, 426 size_t, msz); 427 VIONA_RING_STAT_INCR(ring, too_short); 428 err = EINVAL; 429 } 430 431 /* Add chksum bits, if needed */ 432 if ((features & VIRTIO_NET_F_GUEST_CSUM) != 0) { 433 uint32_t cksum_flags; 434 435 if (((features & VIRTIO_NET_F_GUEST_TSO4) != 0) && 436 ((DB_CKSUMFLAGS(mp) & HW_LSO) != 0)) { 437 hdr->vrh_gso_type |= VIRTIO_NET_HDR_GSO_TCPV4; 438 hdr->vrh_gso_size = DB_LSOMSS(mp); 439 } 440 441 mac_hcksum_get((mblk_t *)mp, NULL, NULL, NULL, NULL, 442 &cksum_flags); 443 if ((cksum_flags & HCK_FULLCKSUM_OK) != 0) { 444 hdr->vrh_flags |= VIRTIO_NET_HDR_F_DATA_VALID; 445 } 446 } 447 448 done: 449 switch (err) { 450 case 0: 451 /* Success can fall right through to ring delivery */ 452 break; 453 454 case EMSGSIZE: 455 VIONA_PROBE3(rx_merge_underrun, viona_vring_t *, ring, 456 uint16_t, cookie, mblk_t *, mp); 457 VIONA_RING_STAT_INCR(ring, rx_merge_underrun); 458 break; 459 460 case EOVERFLOW: 461 VIONA_PROBE3(rx_merge_overrun, viona_vring_t *, ring, 462 uint16_t, cookie, mblk_t *, mp); 463 VIONA_RING_STAT_INCR(ring, rx_merge_overrun); 464 break; 465 466 default: 467 VIONA_PROBE3(bad_rx_frame, viona_vring_t *, ring, 468 uint16_t, cookie, mblk_t *, mp); 469 VIONA_RING_STAT_INCR(ring, bad_rx_frame); 470 } 471 472 if (hdr_pages != NULL) { 473 vmm_drv_page_release_chain(hdr_pages); 474 } 475 if (pages != NULL) { 476 vmm_drv_page_release_chain(pages); 477 } 478 vq_pushchain_many(ring, buf_idx + 1, uelem); 479 return (err); 480 } 481 482 static void 483 viona_rx_common(viona_vring_t *ring, mblk_t *mp, boolean_t is_loopback) 484 { 485 viona_link_t *link = ring->vr_link; 486 mblk_t *mprx = NULL, **mprx_prevp = &mprx; 487 mblk_t *mpdrop = NULL, **mpdrop_prevp = &mpdrop; 488 const boolean_t do_merge = 489 (link->l_features & VIRTIO_NET_F_MRG_RXBUF) != 0; 490 const boolean_t allow_gro = 491 (link->l_features & VIRTIO_NET_F_GUEST_TSO4) != 0; 492 493 size_t nrx = 0, ndrop = 0; 494 495 while (mp != NULL) { 496 mblk_t *next = mp->b_next; 497 mblk_t *pad = NULL; 498 size_t size = msgsize(mp); 499 int err = 0; 500 501 mp->b_next = NULL; 502 503 /* 504 * We treat both a 'drop' response and errors the same here 505 * and put the packet on the drop chain. As packets may be 506 * subject to different actions in ipf (which do not all 507 * return the same set of error values), an error processing 508 * one packet doesn't mean the next packet will also generate 509 * an error. 510 */ 511 if (VNETHOOK_INTERESTED_IN(link->l_neti) && 512 viona_hook(link, ring, &mp, B_FALSE) != 0) { 513 if (mp != NULL) { 514 *mpdrop_prevp = mp; 515 mpdrop_prevp = &mp->b_next; 516 } else { 517 /* 518 * If the hook consumer (e.g. ipf) already 519 * freed the mblk_t, update the drop count now. 520 */ 521 ndrop++; 522 } 523 mp = next; 524 continue; 525 } 526 527 /* 528 * Virtio devices are prohibited from passing on packets larger 529 * than the MTU + Eth if the guest has not negotiated GRO flags 530 * (e.g., GUEST_TSO*). This occurs irrespective of `do_merge`. 531 */ 532 if (size > sizeof (struct ether_header) + link->l_mtu) { 533 const boolean_t can_emu_lso = DB_LSOMSS(mp) != 0; 534 const boolean_t attempt_emu = 535 !allow_gro || size > VIONA_GRO_MAX_PACKET_SIZE; 536 537 if ((DB_CKSUMFLAGS(mp) & HW_LSO) == 0 || 538 (attempt_emu && !can_emu_lso)) { 539 VIONA_PROBE3(rx_drop_over_mtu, viona_vring_t *, 540 ring, mblk_t *, mp, size_t, size); 541 VIONA_RING_STAT_INCR(ring, rx_drop_over_mtu); 542 err = E2BIG; 543 goto pad_drop; 544 } 545 546 /* 547 * If the packet has come from another device or viona 548 * which expected to make use of LSO, we can split the 549 * packet on its behalf. 550 */ 551 if (attempt_emu) { 552 mblk_t *tail = NULL; 553 uint_t n_pkts = 0; 554 555 /* 556 * Emulation of LSO requires that cksum offload 557 * be enabled on the mblk. Since only IPv4 is 558 * supported by the LSO emulation, the v4 cksum 559 * is enabled unconditionally. 560 */ 561 if ((DB_CKSUMFLAGS(mp) & 562 (HCK_FULLCKSUM | HCK_PARTIALCKSUM)) == 0) { 563 DB_CKSUMFLAGS(mp) |= HCK_FULLCKSUM; 564 } 565 DB_CKSUMFLAGS(mp) |= HCK_IPV4_HDRCKSUM; 566 567 mac_hw_emul(&mp, &tail, &n_pkts, MAC_ALL_EMULS); 568 if (mp == NULL) { 569 VIONA_RING_STAT_INCR(ring, 570 rx_gro_fallback_fail); 571 viona_ring_stat_error(ring); 572 mp = next; 573 continue; 574 } 575 VIONA_PROBE4(rx_gro_fallback, viona_vring_t *, 576 ring, mblk_t *, mp, size_t, size, 577 uint_t, n_pkts); 578 VIONA_RING_STAT_INCR(ring, rx_gro_fallback); 579 ASSERT3P(tail, !=, NULL); 580 if (tail != mp) { 581 tail->b_next = next; 582 next = mp->b_next; 583 mp->b_next = NULL; 584 } 585 size = msgsize(mp); 586 } 587 } 588 589 /* 590 * Ethernet frames are expected to be padded out in order to 591 * meet the minimum size. 592 * 593 * A special case is made for frames which are short by 594 * VLAN_TAGSZ, having been stripped of their VLAN tag while 595 * traversing MAC. A preallocated (and recycled) mblk is used 596 * for that specific condition. 597 * 598 * All other frames that fall short on length will have custom 599 * zero-padding allocated appended to them. 600 */ 601 if (size == NEED_VLAN_PAD_SIZE) { 602 ASSERT(MBLKL(viona_vlan_pad_mp) == VLAN_TAGSZ); 603 ASSERT(viona_vlan_pad_mp->b_cont == NULL); 604 605 for (pad = mp; pad->b_cont != NULL; pad = pad->b_cont) 606 ; 607 608 pad->b_cont = viona_vlan_pad_mp; 609 size += VLAN_TAGSZ; 610 } else if (size < MIN_BUF_SIZE) { 611 const size_t pad_size = MIN_BUF_SIZE - size; 612 mblk_t *zero_mp; 613 614 zero_mp = allocb(pad_size, BPRI_MED); 615 if (zero_mp == NULL) { 616 err = ENOMEM; 617 goto pad_drop; 618 } 619 620 VIONA_PROBE3(rx_pad_short, viona_vring_t *, ring, 621 mblk_t *, mp, size_t, pad_size); 622 VIONA_RING_STAT_INCR(ring, rx_pad_short); 623 zero_mp->b_wptr += pad_size; 624 bzero(zero_mp->b_rptr, pad_size); 625 linkb(mp, zero_mp); 626 size += pad_size; 627 } 628 629 if (do_merge) { 630 err = viona_recv_merged(ring, mp, size); 631 } else { 632 err = viona_recv_plain(ring, mp, size); 633 } 634 635 /* 636 * The VLAN padding mblk is meant for continual reuse, so 637 * remove it from the chain to prevent it from being freed. 638 * 639 * Custom allocated padding does not require this treatment and 640 * is freed normally. 641 */ 642 if (pad != NULL) { 643 pad->b_cont = NULL; 644 } 645 646 pad_drop: 647 /* 648 * While an error during rx processing 649 * (viona_recv_{merged,plain}) does not free mp on error, 650 * hook processing might or might not free mp. Handle either 651 * scenario -- if mp is not yet free, it is queued up and 652 * freed after the guest has been notified. If mp is 653 * already NULL, just proceed on. 654 */ 655 if (err != 0) { 656 *mpdrop_prevp = mp; 657 mpdrop_prevp = &mp->b_next; 658 659 /* 660 * If the available ring is empty, do not bother 661 * attempting to deliver any more frames. Count the 662 * rest as dropped too. 663 */ 664 if (err == ENOSPC) { 665 mp->b_next = next; 666 break; 667 } else { 668 /* 669 * Cases other than the ring being empty of 670 * available descriptors count as errors for the 671 * ring/link stats. 672 */ 673 viona_ring_stat_error(ring); 674 } 675 } else { 676 /* Chain successful mblks to be freed later */ 677 *mprx_prevp = mp; 678 mprx_prevp = &mp->b_next; 679 nrx++; 680 viona_ring_stat_accept(ring, size); 681 } 682 mp = next; 683 } 684 685 membar_enter(); 686 viona_intr_ring(ring, B_FALSE); 687 688 /* Free successfully received frames */ 689 if (mprx != NULL) { 690 freemsgchain(mprx); 691 } 692 693 /* Free dropped frames, also tallying them */ 694 mp = mpdrop; 695 while (mp != NULL) { 696 mblk_t *next = mp->b_next; 697 698 mp->b_next = NULL; 699 freemsg(mp); 700 mp = next; 701 ndrop++; 702 viona_ring_stat_drop(ring); 703 } 704 VIONA_PROBE3(rx, viona_link_t *, link, size_t, nrx, size_t, ndrop); 705 } 706 707 static void 708 viona_rx_classified(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 709 boolean_t is_loopback) 710 { 711 viona_vring_t *ring = (viona_vring_t *)arg; 712 713 /* Drop traffic if ring is inactive or renewing its lease */ 714 if (ring->vr_state != VRS_RUN || 715 (ring->vr_state_flags & VRSF_RENEW) != 0) { 716 freemsgchain(mp); 717 return; 718 } 719 720 viona_rx_common(ring, mp, is_loopback); 721 } 722 723 static void 724 viona_rx_mcast(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 725 boolean_t is_loopback) 726 { 727 viona_vring_t *ring = (viona_vring_t *)arg; 728 mac_handle_t mh = ring->vr_link->l_mh; 729 mblk_t *mp_mcast_only = NULL; 730 mblk_t **mpp = &mp_mcast_only; 731 732 /* Drop traffic if ring is inactive or renewing its lease */ 733 if (ring->vr_state != VRS_RUN || 734 (ring->vr_state_flags & VRSF_RENEW) != 0) { 735 freemsgchain(mp); 736 return; 737 } 738 739 /* 740 * In addition to multicast traffic, broadcast packets will also arrive 741 * via the MAC_CLIENT_PROMISC_MULTI handler. The mac_rx_set() callback 742 * for fully-classified traffic has already delivered that broadcast 743 * traffic, so it should be suppressed here, rather than duplicating it 744 * to the guest. 745 */ 746 while (mp != NULL) { 747 mblk_t *mp_next; 748 mac_header_info_t mhi; 749 int err; 750 751 mp_next = mp->b_next; 752 mp->b_next = NULL; 753 754 /* Determine the packet type */ 755 err = mac_vlan_header_info(mh, mp, &mhi); 756 if (err != 0) { 757 mblk_t *pull; 758 759 /* 760 * It is possible that gathering of the header 761 * information was impeded by a leading mblk_t which 762 * was of inadequate length to reference the needed 763 * fields. Try again, in case that could be solved 764 * with a pull-up. 765 */ 766 pull = msgpullup(mp, sizeof (struct ether_vlan_header)); 767 if (pull == NULL) { 768 err = ENOMEM; 769 } else { 770 err = mac_vlan_header_info(mh, pull, &mhi); 771 freemsg(pull); 772 } 773 774 if (err != 0) { 775 VIONA_RING_STAT_INCR(ring, rx_mcast_check); 776 } 777 } 778 779 /* Chain up matching packets while discarding others */ 780 if (err == 0 && mhi.mhi_dsttype == MAC_ADDRTYPE_MULTICAST) { 781 *mpp = mp; 782 mpp = &mp->b_next; 783 } else { 784 freemsg(mp); 785 } 786 787 mp = mp_next; 788 } 789 790 if (mp_mcast_only != NULL) { 791 viona_rx_common(ring, mp_mcast_only, is_loopback); 792 } 793 } 794 795 int 796 viona_rx_set(viona_link_t *link, viona_promisc_t mode) 797 { 798 viona_vring_t *ring = &link->l_vrings[VIONA_VQ_RX]; 799 int err = 0; 800 801 if (link->l_mph != NULL) { 802 mac_promisc_remove(link->l_mph); 803 link->l_mph = NULL; 804 } 805 806 switch (mode) { 807 case VIONA_PROMISC_MULTI: 808 mac_rx_set(link->l_mch, viona_rx_classified, ring); 809 err = mac_promisc_add(link->l_mch, MAC_CLIENT_PROMISC_MULTI, 810 viona_rx_mcast, ring, &link->l_mph, 811 MAC_PROMISC_FLAGS_NO_TX_LOOP | 812 MAC_PROMISC_FLAGS_VLAN_TAG_STRIP); 813 break; 814 case VIONA_PROMISC_ALL: 815 mac_rx_clear(link->l_mch); 816 err = mac_promisc_add(link->l_mch, MAC_CLIENT_PROMISC_ALL, 817 viona_rx_classified, ring, &link->l_mph, 818 MAC_PROMISC_FLAGS_NO_TX_LOOP | 819 MAC_PROMISC_FLAGS_VLAN_TAG_STRIP); 820 /* 821 * In case adding the promisc handler failed, restore the 822 * generic classified callback so that packets continue to 823 * flow to the guest. 824 */ 825 if (err != 0) { 826 mac_rx_set(link->l_mch, viona_rx_classified, ring); 827 } 828 break; 829 case VIONA_PROMISC_NONE: 830 default: 831 mac_rx_set(link->l_mch, viona_rx_classified, ring); 832 break; 833 } 834 835 return (err); 836 } 837 838 void 839 viona_rx_clear(viona_link_t *link) 840 { 841 if (link->l_mph != NULL) { 842 mac_promisc_remove(link->l_mph); 843 link->l_mph = NULL; 844 } 845 mac_rx_clear(link->l_mch); 846 } 847