1 /* 2 * Copyright (c) 2013 Chris Torek <torek @ torek net> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 /* 27 * This file and its contents are supplied under the terms of the 28 * Common Development and Distribution License ("CDDL"), version 1.0. 29 * You may only use this file in accordance with the terms of version 30 * 1.0 of the CDDL. 31 * 32 * A full copy of the text of the CDDL should have accompanied this 33 * source. A copy of the CDDL is also available via the Internet at 34 * http://www.illumos.org/license/CDDL. 35 * 36 * Copyright 2015 Pluribus Networks Inc. 37 * Copyright 2019 Joyent, Inc. 38 * Copyright 2025 Oxide Computer Company 39 * Copyright 2022 Michael Zeller 40 * Copyright 2022 OmniOS Community Edition (OmniOSce) Association. 41 */ 42 43 #include <sys/types.h> 44 #include <sys/strsubr.h> 45 46 #include <sys/dlpi.h> 47 #include <sys/pattr.h> 48 #include <sys/vlan.h> 49 50 #include "viona_impl.h" 51 52 53 54 #define VTNET_MAXSEGS 32 55 56 /* Min. octets in an ethernet frame minus FCS */ 57 #define MIN_BUF_SIZE 60 58 #define NEED_VLAN_PAD_SIZE (MIN_BUF_SIZE - VLAN_TAGSZ) 59 60 static mblk_t *viona_vlan_pad_mp; 61 62 void 63 viona_rx_init(void) 64 { 65 mblk_t *mp; 66 67 ASSERT(viona_vlan_pad_mp == NULL); 68 69 /* Create mblk for padding when VLAN tags are stripped */ 70 mp = allocb_wait(VLAN_TAGSZ, BPRI_HI, STR_NOSIG, NULL); 71 bzero(mp->b_rptr, VLAN_TAGSZ); 72 mp->b_wptr += VLAN_TAGSZ; 73 viona_vlan_pad_mp = mp; 74 } 75 76 void 77 viona_rx_fini(void) 78 { 79 mblk_t *mp; 80 81 /* Clean up the VLAN padding mblk */ 82 mp = viona_vlan_pad_mp; 83 viona_vlan_pad_mp = NULL; 84 VERIFY(mp != NULL && mp->b_cont == NULL); 85 freemsg(mp); 86 } 87 88 void 89 viona_worker_rx(viona_vring_t *ring, viona_link_t *link) 90 { 91 (void) thread_vsetname(curthread, "viona_rx_%p", ring); 92 93 ASSERT(MUTEX_HELD(&ring->vr_lock)); 94 ASSERT3U(ring->vr_state, ==, VRS_RUN); 95 96 viona_ring_disable_notify(ring); 97 98 do { 99 if (vmm_drv_lease_expired(ring->vr_lease)) { 100 /* 101 * Set the renewal flag, causing incoming traffic to be 102 * dropped, and issue an RX barrier to ensure any 103 * threads in the RX callbacks will have finished. 104 * The vr_lock cannot be held across the barrier as it 105 * poses a deadlock risk. 106 */ 107 ring->vr_state_flags |= VRSF_RENEW; 108 mutex_exit(&ring->vr_lock); 109 mac_rx_barrier(link->l_mch); 110 mutex_enter(&ring->vr_lock); 111 112 if (!viona_ring_lease_renew(ring)) { 113 break; 114 } 115 ring->vr_state_flags &= ~VRSF_RENEW; 116 } 117 118 /* 119 * For now, there is little to do in the RX worker as inbound 120 * data is delivered by MAC via the RX callbacks. If tap-like 121 * functionality is added later, this would be a convenient 122 * place to inject frames into the guest. 123 */ 124 (void) cv_wait_sig(&ring->vr_cv, &ring->vr_lock); 125 } while (!vring_need_bail(ring)); 126 127 ring->vr_state = VRS_STOP; 128 129 /* 130 * The RX ring is stopping, before we start tearing it down it 131 * is imperative that we perform an RX barrier so that 132 * incoming packets are dropped at viona_rx_classified(). 133 */ 134 mutex_exit(&ring->vr_lock); 135 mac_rx_barrier(link->l_mch); 136 mutex_enter(&ring->vr_lock); 137 138 /* 139 * If we bailed while renewing the ring lease, we cannot reset 140 * USED_NO_NOTIFY, since we lack a valid mapping to do so. 141 */ 142 if (ring->vr_lease != NULL) { 143 viona_ring_enable_notify(ring); 144 } 145 } 146 147 static size_t 148 viona_copy_mblk(const mblk_t *mp, size_t seek, caddr_t buf, size_t len, 149 boolean_t *end) 150 { 151 size_t copied = 0; 152 size_t off = 0; 153 154 /* Seek past already-consumed data */ 155 while (seek > 0 && mp != NULL) { 156 const size_t chunk = MBLKL(mp); 157 158 if (chunk > seek) { 159 off = seek; 160 break; 161 } 162 mp = mp->b_cont; 163 seek -= chunk; 164 } 165 166 while (mp != NULL) { 167 const size_t chunk = MBLKL(mp) - off; 168 const size_t to_copy = MIN(chunk, len); 169 170 bcopy(mp->b_rptr + off, buf, to_copy); 171 copied += to_copy; 172 buf += to_copy; 173 len -= to_copy; 174 175 /* 176 * If all the remaining data in the mblk_t was copied, move on 177 * to the next one in the chain. Any seek offset applied to 178 * the first mblk copy is zeroed out for subsequent operations. 179 */ 180 if (chunk == to_copy) { 181 mp = mp->b_cont; 182 off = 0; 183 } 184 #ifdef DEBUG 185 else { 186 /* 187 * The only valid reason for the copy to consume less 188 * than the entire contents of the mblk_t is because 189 * the output buffer has been filled. 190 */ 191 ASSERT0(len); 192 } 193 #endif 194 195 /* Go no further if the buffer has been filled */ 196 if (len == 0) { 197 break; 198 } 199 200 } 201 *end = (mp == NULL); 202 return (copied); 203 } 204 205 static int 206 viona_recv_plain(viona_vring_t *ring, const mblk_t *mp, size_t msz) 207 { 208 struct iovec iov[VTNET_MAXSEGS]; 209 uint16_t cookie; 210 int n; 211 const size_t hdr_sz = sizeof (struct virtio_net_hdr); 212 struct virtio_net_hdr *hdr; 213 size_t len, copied = 0; 214 caddr_t buf = NULL; 215 boolean_t end = B_FALSE; 216 const uint32_t features = ring->vr_link->l_features; 217 vmm_page_t *pages = NULL; 218 219 ASSERT(msz >= MIN_BUF_SIZE); 220 221 n = vq_popchain(ring, iov, VTNET_MAXSEGS, &cookie, &pages, NULL); 222 if (n <= 0) { 223 /* Without available buffers, the frame must be dropped. */ 224 return (ENOSPC); 225 } 226 if (iov[0].iov_len < hdr_sz) { 227 /* 228 * There is little to do if there is not even space available 229 * for the sole header. Zero the buffer and bail out as a last 230 * act of desperation. 231 */ 232 bzero(iov[0].iov_base, iov[0].iov_len); 233 goto bad_frame; 234 } 235 236 /* Grab the address of the header before anything else */ 237 hdr = (struct virtio_net_hdr *)iov[0].iov_base; 238 239 /* 240 * If there is any space remaining in the first buffer after writing 241 * the header, fill it with frame data. 242 */ 243 if (iov[0].iov_len > hdr_sz) { 244 buf = (caddr_t)iov[0].iov_base + hdr_sz; 245 len = iov[0].iov_len - hdr_sz; 246 247 copied += viona_copy_mblk(mp, copied, buf, len, &end); 248 } 249 250 /* Copy any remaining data into subsequent buffers, if present */ 251 for (int i = 1; i < n && !end; i++) { 252 buf = (caddr_t)iov[i].iov_base; 253 len = iov[i].iov_len; 254 255 copied += viona_copy_mblk(mp, copied, buf, len, &end); 256 } 257 258 /* Was the expected amount of data copied? */ 259 if (copied != msz) { 260 VIONA_PROBE5(too_short, viona_vring_t *, ring, 261 uint16_t, cookie, mblk_t *, mp, size_t, copied, 262 size_t, msz); 263 VIONA_RING_STAT_INCR(ring, too_short); 264 goto bad_frame; 265 } 266 267 /* Populate (read: zero) the header and account for it in the size */ 268 bzero(hdr, hdr_sz); 269 copied += hdr_sz; 270 271 /* Add chksum bits, if needed */ 272 if ((features & VIRTIO_NET_F_GUEST_CSUM) != 0) { 273 uint32_t cksum_flags; 274 275 if (((features & VIRTIO_NET_F_GUEST_TSO4) != 0) && 276 ((DB_CKSUMFLAGS(mp) & HW_LSO) != 0)) { 277 hdr->vrh_gso_type |= VIRTIO_NET_HDR_GSO_TCPV4; 278 hdr->vrh_gso_size = DB_LSOMSS(mp); 279 } 280 281 mac_hcksum_get((mblk_t *)mp, NULL, NULL, NULL, NULL, 282 &cksum_flags); 283 if ((cksum_flags & HCK_FULLCKSUM_OK) != 0) { 284 hdr->vrh_flags |= VIRTIO_NET_HDR_F_DATA_VALID; 285 } 286 } 287 288 /* Release this chain */ 289 vmm_drv_page_release_chain(pages); 290 vq_pushchain(ring, copied, cookie); 291 return (0); 292 293 bad_frame: 294 VIONA_PROBE3(bad_rx_frame, viona_vring_t *, ring, uint16_t, cookie, 295 mblk_t *, mp); 296 VIONA_RING_STAT_INCR(ring, bad_rx_frame); 297 298 vmm_drv_page_release_chain(pages); 299 vq_pushchain(ring, MAX(copied, MIN_BUF_SIZE + hdr_sz), cookie); 300 return (EINVAL); 301 } 302 303 static int 304 viona_recv_merged(viona_vring_t *ring, const mblk_t *mp, size_t msz) 305 { 306 struct iovec iov[VTNET_MAXSEGS]; 307 used_elem_t uelem[VTNET_MAXSEGS]; 308 vmm_page_t *pages = NULL, *hdr_pages = NULL; 309 int n, i = 0, buf_idx = 0, err = 0; 310 uint16_t cookie; 311 caddr_t buf; 312 size_t len, copied = 0, chunk = 0; 313 struct virtio_net_mrgrxhdr *hdr = NULL; 314 const size_t hdr_sz = sizeof (struct virtio_net_mrgrxhdr); 315 boolean_t end = B_FALSE; 316 const uint32_t features = ring->vr_link->l_features; 317 318 ASSERT(msz >= MIN_BUF_SIZE); 319 320 n = vq_popchain(ring, iov, VTNET_MAXSEGS, &cookie, &hdr_pages, NULL); 321 if (n <= 0) { 322 /* Without available buffers, the frame must be dropped. */ 323 VIONA_PROBE2(no_space, viona_vring_t *, ring, mblk_t *, mp); 324 VIONA_RING_STAT_INCR(ring, no_space); 325 return (ENOSPC); 326 } 327 if (iov[0].iov_len < hdr_sz) { 328 /* 329 * There is little to do if there is not even space available 330 * for the sole header. Zero the buffer and bail out as a last 331 * act of desperation. 332 */ 333 bzero(iov[0].iov_base, iov[0].iov_len); 334 uelem[0].id = cookie; 335 uelem[0].len = iov[0].iov_len; 336 err = EINVAL; 337 goto done; 338 } 339 340 /* Grab the address of the header and do initial population */ 341 hdr = (struct virtio_net_mrgrxhdr *)iov[0].iov_base; 342 bzero(hdr, hdr_sz); 343 hdr->vrh_bufs = 1; 344 345 /* 346 * If there is any space remaining in the first buffer after writing 347 * the header, fill it with frame data. The size of the header itself 348 * is accounted for later. 349 */ 350 if (iov[0].iov_len > hdr_sz) { 351 buf = iov[0].iov_base + hdr_sz; 352 len = iov[0].iov_len - hdr_sz; 353 354 size_t copy_len; 355 copy_len = viona_copy_mblk(mp, copied, buf, len, &end); 356 chunk += copy_len; 357 copied += copy_len; 358 } 359 i = 1; 360 361 do { 362 while (i < n && !end) { 363 buf = iov[i].iov_base; 364 len = iov[i].iov_len; 365 366 size_t copy_len; 367 copy_len = viona_copy_mblk(mp, copied, buf, len, &end); 368 chunk += copy_len; 369 copied += copy_len; 370 i++; 371 } 372 373 uelem[buf_idx].id = cookie; 374 uelem[buf_idx].len = chunk; 375 376 /* 377 * Try to grab another buffer from the ring if the mblk has not 378 * yet been entirely copied out. 379 */ 380 if (!end) { 381 if (buf_idx == (VTNET_MAXSEGS - 1)) { 382 /* 383 * Our arbitrary limit on the number of buffers 384 * to offer for merge has already been reached. 385 */ 386 err = EOVERFLOW; 387 break; 388 } 389 if (pages != NULL) { 390 vmm_drv_page_release_chain(pages); 391 pages = NULL; 392 } 393 n = vq_popchain(ring, iov, VTNET_MAXSEGS, &cookie, 394 &pages, NULL); 395 if (n <= 0) { 396 /* 397 * Without more immediate space to perform the 398 * copying, there is little choice left but to 399 * drop the packet. 400 */ 401 err = EMSGSIZE; 402 break; 403 } 404 chunk = 0; 405 i = 0; 406 buf_idx++; 407 /* 408 * Keep the header up-to-date with the number of 409 * buffers, but never reference its value since the 410 * guest could meddle with it. 411 */ 412 hdr->vrh_bufs++; 413 } 414 } while (!end && copied < msz); 415 416 /* Account for the header size in the first buffer */ 417 uelem[0].len += hdr_sz; 418 419 /* 420 * If no other errors were encounted during the copy, was the expected 421 * amount of data transferred? 422 */ 423 if (err == 0 && copied != msz) { 424 VIONA_PROBE5(too_short, viona_vring_t *, ring, 425 uint16_t, cookie, mblk_t *, mp, size_t, copied, 426 size_t, msz); 427 VIONA_RING_STAT_INCR(ring, too_short); 428 err = EINVAL; 429 } 430 431 /* Add chksum bits, if needed */ 432 if ((features & VIRTIO_NET_F_GUEST_CSUM) != 0) { 433 uint32_t cksum_flags; 434 435 if (((features & VIRTIO_NET_F_GUEST_TSO4) != 0) && 436 ((DB_CKSUMFLAGS(mp) & HW_LSO) != 0)) { 437 hdr->vrh_gso_type |= VIRTIO_NET_HDR_GSO_TCPV4; 438 hdr->vrh_gso_size = DB_LSOMSS(mp); 439 } 440 441 mac_hcksum_get((mblk_t *)mp, NULL, NULL, NULL, NULL, 442 &cksum_flags); 443 if ((cksum_flags & HCK_FULLCKSUM_OK) != 0) { 444 hdr->vrh_flags |= VIRTIO_NET_HDR_F_DATA_VALID; 445 } 446 } 447 448 done: 449 switch (err) { 450 case 0: 451 /* Success can fall right through to ring delivery */ 452 break; 453 454 case EMSGSIZE: 455 VIONA_PROBE3(rx_merge_underrun, viona_vring_t *, ring, 456 uint16_t, cookie, mblk_t *, mp); 457 VIONA_RING_STAT_INCR(ring, rx_merge_underrun); 458 break; 459 460 case EOVERFLOW: 461 VIONA_PROBE3(rx_merge_overrun, viona_vring_t *, ring, 462 uint16_t, cookie, mblk_t *, mp); 463 VIONA_RING_STAT_INCR(ring, rx_merge_overrun); 464 break; 465 466 default: 467 VIONA_PROBE3(bad_rx_frame, viona_vring_t *, ring, 468 uint16_t, cookie, mblk_t *, mp); 469 VIONA_RING_STAT_INCR(ring, bad_rx_frame); 470 } 471 472 if (hdr_pages != NULL) { 473 vmm_drv_page_release_chain(hdr_pages); 474 } 475 if (pages != NULL) { 476 vmm_drv_page_release_chain(pages); 477 } 478 vq_pushchain_many(ring, buf_idx + 1, uelem); 479 return (err); 480 } 481 482 static void 483 viona_rx_common(viona_vring_t *ring, mblk_t *mp, boolean_t is_loopback) 484 { 485 viona_link_t *link = ring->vr_link; 486 mblk_t *mprx = NULL, **mprx_prevp = &mprx; 487 mblk_t *mpdrop = NULL, **mpdrop_prevp = &mpdrop; 488 const boolean_t do_merge = 489 (link->l_features & VIRTIO_NET_F_MRG_RXBUF) != 0; 490 const boolean_t allow_gro = 491 (link->l_features & VIRTIO_NET_F_GUEST_TSO4) != 0; 492 493 size_t nrx = 0, ndrop = 0; 494 495 while (mp != NULL) { 496 mblk_t *next = mp->b_next; 497 mblk_t *pad = NULL; 498 size_t size = msgsize(mp); 499 int err = 0; 500 501 mp->b_next = NULL; 502 503 /* 504 * We treat both a 'drop' response and errors the same here 505 * and put the packet on the drop chain. As packets may be 506 * subject to different actions in ipf (which do not all 507 * return the same set of error values), an error processing 508 * one packet doesn't mean the next packet will also generate 509 * an error. 510 */ 511 if (VNETHOOK_INTERESTED_IN(link->l_neti) && 512 viona_hook(link, ring, &mp, B_FALSE) != 0) { 513 if (mp != NULL) { 514 *mpdrop_prevp = mp; 515 mpdrop_prevp = &mp->b_next; 516 } else { 517 /* 518 * If the hook consumer (e.g. ipf) already 519 * freed the mblk_t, update the drop count now. 520 */ 521 ndrop++; 522 } 523 mp = next; 524 continue; 525 } 526 527 /* 528 * Virtio devices are prohibited from passing on packets larger 529 * than the MTU + Eth if the guest has not negotiated GRO flags 530 * (e.g., GUEST_TSO*). This occurs irrespective of `do_merge`. 531 */ 532 if (size > sizeof (struct ether_header) + link->l_mtu) { 533 const boolean_t can_emu_lso = DB_LSOMSS(mp) != 0; 534 const boolean_t attempt_emu = 535 !allow_gro || size > VIONA_GRO_MAX_PACKET_SIZE; 536 537 if ((DB_CKSUMFLAGS(mp) & HW_LSO) == 0 || 538 (attempt_emu && !can_emu_lso)) { 539 VIONA_PROBE3(rx_drop_over_mtu, viona_vring_t *, 540 ring, mblk_t *, mp, size_t, size); 541 VIONA_RING_STAT_INCR(ring, rx_drop_over_mtu); 542 err = E2BIG; 543 goto pad_drop; 544 } 545 546 /* 547 * If the packet has come from another device or viona 548 * which expected to make use of LSO, we can split the 549 * packet on its behalf. 550 */ 551 if (attempt_emu) { 552 mblk_t *tail = NULL; 553 uint_t n_pkts = 0; 554 555 DB_CKSUMFLAGS(mp) |= HCK_IPV4_HDRCKSUM | 556 HCK_FULLCKSUM; 557 mac_hw_emul(&mp, &tail, &n_pkts, MAC_ALL_EMULS); 558 if (mp == NULL) { 559 VIONA_RING_STAT_INCR(ring, 560 rx_gro_fallback_fail); 561 viona_ring_stat_error(ring); 562 mp = next; 563 continue; 564 } 565 VIONA_PROBE4(rx_gro_fallback, viona_vring_t *, 566 ring, mblk_t *, mp, size_t, size, 567 uint_t, n_pkts); 568 VIONA_RING_STAT_INCR(ring, rx_gro_fallback); 569 ASSERT3P(tail, !=, NULL); 570 if (tail != mp) { 571 tail->b_next = next; 572 next = mp->b_next; 573 mp->b_next = NULL; 574 } 575 size = msgsize(mp); 576 } 577 } 578 579 /* 580 * Ethernet frames are expected to be padded out in order to 581 * meet the minimum size. 582 * 583 * A special case is made for frames which are short by 584 * VLAN_TAGSZ, having been stripped of their VLAN tag while 585 * traversing MAC. A preallocated (and recycled) mblk is used 586 * for that specific condition. 587 * 588 * All other frames that fall short on length will have custom 589 * zero-padding allocated appended to them. 590 */ 591 if (size == NEED_VLAN_PAD_SIZE) { 592 ASSERT(MBLKL(viona_vlan_pad_mp) == VLAN_TAGSZ); 593 ASSERT(viona_vlan_pad_mp->b_cont == NULL); 594 595 for (pad = mp; pad->b_cont != NULL; pad = pad->b_cont) 596 ; 597 598 pad->b_cont = viona_vlan_pad_mp; 599 size += VLAN_TAGSZ; 600 } else if (size < MIN_BUF_SIZE) { 601 const size_t pad_size = MIN_BUF_SIZE - size; 602 mblk_t *zero_mp; 603 604 zero_mp = allocb(pad_size, BPRI_MED); 605 if (zero_mp == NULL) { 606 err = ENOMEM; 607 goto pad_drop; 608 } 609 610 VIONA_PROBE3(rx_pad_short, viona_vring_t *, ring, 611 mblk_t *, mp, size_t, pad_size); 612 VIONA_RING_STAT_INCR(ring, rx_pad_short); 613 zero_mp->b_wptr += pad_size; 614 bzero(zero_mp->b_rptr, pad_size); 615 linkb(mp, zero_mp); 616 size += pad_size; 617 } 618 619 if (do_merge) { 620 err = viona_recv_merged(ring, mp, size); 621 } else { 622 err = viona_recv_plain(ring, mp, size); 623 } 624 625 /* 626 * The VLAN padding mblk is meant for continual reuse, so 627 * remove it from the chain to prevent it from being freed. 628 * 629 * Custom allocated padding does not require this treatment and 630 * is freed normally. 631 */ 632 if (pad != NULL) { 633 pad->b_cont = NULL; 634 } 635 636 pad_drop: 637 /* 638 * While an error during rx processing 639 * (viona_recv_{merged,plain}) does not free mp on error, 640 * hook processing might or might not free mp. Handle either 641 * scenario -- if mp is not yet free, it is queued up and 642 * freed after the guest has been notified. If mp is 643 * already NULL, just proceed on. 644 */ 645 if (err != 0) { 646 *mpdrop_prevp = mp; 647 mpdrop_prevp = &mp->b_next; 648 649 /* 650 * If the available ring is empty, do not bother 651 * attempting to deliver any more frames. Count the 652 * rest as dropped too. 653 */ 654 if (err == ENOSPC) { 655 mp->b_next = next; 656 break; 657 } else { 658 /* 659 * Cases other than the ring being empty of 660 * available descriptors count as errors for the 661 * ring/link stats. 662 */ 663 viona_ring_stat_error(ring); 664 } 665 } else { 666 /* Chain successful mblks to be freed later */ 667 *mprx_prevp = mp; 668 mprx_prevp = &mp->b_next; 669 nrx++; 670 viona_ring_stat_accept(ring, size); 671 } 672 mp = next; 673 } 674 675 membar_enter(); 676 viona_intr_ring(ring, B_FALSE); 677 678 /* Free successfully received frames */ 679 if (mprx != NULL) { 680 freemsgchain(mprx); 681 } 682 683 /* Free dropped frames, also tallying them */ 684 mp = mpdrop; 685 while (mp != NULL) { 686 mblk_t *next = mp->b_next; 687 688 mp->b_next = NULL; 689 freemsg(mp); 690 mp = next; 691 ndrop++; 692 viona_ring_stat_drop(ring); 693 } 694 VIONA_PROBE3(rx, viona_link_t *, link, size_t, nrx, size_t, ndrop); 695 } 696 697 static void 698 viona_rx_classified(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 699 boolean_t is_loopback) 700 { 701 viona_vring_t *ring = (viona_vring_t *)arg; 702 703 /* Drop traffic if ring is inactive or renewing its lease */ 704 if (ring->vr_state != VRS_RUN || 705 (ring->vr_state_flags & VRSF_RENEW) != 0) { 706 freemsgchain(mp); 707 return; 708 } 709 710 viona_rx_common(ring, mp, is_loopback); 711 } 712 713 static void 714 viona_rx_mcast(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 715 boolean_t is_loopback) 716 { 717 viona_vring_t *ring = (viona_vring_t *)arg; 718 mac_handle_t mh = ring->vr_link->l_mh; 719 mblk_t *mp_mcast_only = NULL; 720 mblk_t **mpp = &mp_mcast_only; 721 722 /* Drop traffic if ring is inactive or renewing its lease */ 723 if (ring->vr_state != VRS_RUN || 724 (ring->vr_state_flags & VRSF_RENEW) != 0) { 725 freemsgchain(mp); 726 return; 727 } 728 729 /* 730 * In addition to multicast traffic, broadcast packets will also arrive 731 * via the MAC_CLIENT_PROMISC_MULTI handler. The mac_rx_set() callback 732 * for fully-classified traffic has already delivered that broadcast 733 * traffic, so it should be suppressed here, rather than duplicating it 734 * to the guest. 735 */ 736 while (mp != NULL) { 737 mblk_t *mp_next; 738 mac_header_info_t mhi; 739 int err; 740 741 mp_next = mp->b_next; 742 mp->b_next = NULL; 743 744 /* Determine the packet type */ 745 err = mac_vlan_header_info(mh, mp, &mhi); 746 if (err != 0) { 747 mblk_t *pull; 748 749 /* 750 * It is possible that gathering of the header 751 * information was impeded by a leading mblk_t which 752 * was of inadequate length to reference the needed 753 * fields. Try again, in case that could be solved 754 * with a pull-up. 755 */ 756 pull = msgpullup(mp, sizeof (struct ether_vlan_header)); 757 if (pull == NULL) { 758 err = ENOMEM; 759 } else { 760 err = mac_vlan_header_info(mh, pull, &mhi); 761 freemsg(pull); 762 } 763 764 if (err != 0) { 765 VIONA_RING_STAT_INCR(ring, rx_mcast_check); 766 } 767 } 768 769 /* Chain up matching packets while discarding others */ 770 if (err == 0 && mhi.mhi_dsttype == MAC_ADDRTYPE_MULTICAST) { 771 *mpp = mp; 772 mpp = &mp->b_next; 773 } else { 774 freemsg(mp); 775 } 776 777 mp = mp_next; 778 } 779 780 if (mp_mcast_only != NULL) { 781 viona_rx_common(ring, mp_mcast_only, is_loopback); 782 } 783 } 784 785 int 786 viona_rx_set(viona_link_t *link, viona_promisc_t mode) 787 { 788 viona_vring_t *ring = &link->l_vrings[VIONA_VQ_RX]; 789 int err = 0; 790 791 if (link->l_mph != NULL) { 792 mac_promisc_remove(link->l_mph); 793 link->l_mph = NULL; 794 } 795 796 switch (mode) { 797 case VIONA_PROMISC_MULTI: 798 mac_rx_set(link->l_mch, viona_rx_classified, ring); 799 err = mac_promisc_add(link->l_mch, MAC_CLIENT_PROMISC_MULTI, 800 viona_rx_mcast, ring, &link->l_mph, 801 MAC_PROMISC_FLAGS_NO_TX_LOOP | 802 MAC_PROMISC_FLAGS_VLAN_TAG_STRIP); 803 break; 804 case VIONA_PROMISC_ALL: 805 mac_rx_clear(link->l_mch); 806 err = mac_promisc_add(link->l_mch, MAC_CLIENT_PROMISC_ALL, 807 viona_rx_classified, ring, &link->l_mph, 808 MAC_PROMISC_FLAGS_NO_TX_LOOP | 809 MAC_PROMISC_FLAGS_VLAN_TAG_STRIP); 810 /* 811 * In case adding the promisc handler failed, restore the 812 * generic classified callback so that packets continue to 813 * flow to the guest. 814 */ 815 if (err != 0) { 816 mac_rx_set(link->l_mch, viona_rx_classified, ring); 817 } 818 break; 819 case VIONA_PROMISC_NONE: 820 default: 821 mac_rx_set(link->l_mch, viona_rx_classified, ring); 822 break; 823 } 824 825 return (err); 826 } 827 828 void 829 viona_rx_clear(viona_link_t *link) 830 { 831 if (link->l_mph != NULL) { 832 mac_promisc_remove(link->l_mph); 833 link->l_mph = NULL; 834 } 835 mac_rx_clear(link->l_mch); 836 } 837