1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2013 Nexenta Inc. All rights reserved. 14 * Copyright (c) 2014, 2016 by Delphix. All rights reserved. 15 * Copyright 2021 Joyent, Inc. 16 * Copyright 2019 Joshua M. Clulow <josh@sysmgr.org> 17 */ 18 19 /* Based on the NetBSD virtio driver by Minoura Makoto. */ 20 /* 21 * Copyright (c) 2010 Minoura Makoto. 22 * All rights reserved. 23 * 24 * Redistribution and use in source and binary forms, with or without 25 * modification, are permitted provided that the following conditions 26 * are met: 27 * 1. Redistributions of source code must retain the above copyright 28 * notice, this list of conditions and the following disclaimer. 29 * 2. Redistributions in binary form must reproduce the above copyright 30 * notice, this list of conditions and the following disclaimer in the 31 * documentation and/or other materials provided with the distribution. 32 * 33 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 34 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 35 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 36 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 37 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 38 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 39 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 40 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 41 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 42 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 43 */ 44 45 /* 46 * VIRTIO NETWORK DRIVER 47 */ 48 49 #include <sys/types.h> 50 #include <sys/errno.h> 51 #include <sys/param.h> 52 #include <sys/stropts.h> 53 #include <sys/stream.h> 54 #include <sys/strsubr.h> 55 #include <sys/kmem.h> 56 #include <sys/conf.h> 57 #include <sys/devops.h> 58 #include <sys/ksynch.h> 59 #include <sys/stat.h> 60 #include <sys/modctl.h> 61 #include <sys/debug.h> 62 #include <sys/pci.h> 63 #include <sys/ethernet.h> 64 #include <sys/vlan.h> 65 #include <sys/sysmacros.h> 66 #include <sys/smbios.h> 67 68 #include <sys/dlpi.h> 69 #include <sys/taskq.h> 70 71 #include <sys/pattr.h> 72 #include <sys/strsun.h> 73 74 #include <sys/random.h> 75 #include <sys/containerof.h> 76 #include <sys/stream.h> 77 #include <inet/tcp.h> 78 79 #include <sys/mac.h> 80 #include <sys/mac_provider.h> 81 #include <sys/mac_ether.h> 82 83 #include "virtio.h" 84 #include "vioif.h" 85 86 /* 87 * While most hypervisors support the control queue, older versions of bhyve 88 * on illumos did not. To allow the historic behaviour of the illumos vioif 89 * driver, the following tuneable causes us to pretend that the request always 90 * succeeds if the underlying virtual device does not have support. 91 */ 92 int vioif_fake_promisc_success = 1; 93 94 static int vioif_quiesce(dev_info_t *); 95 static int vioif_attach(dev_info_t *, ddi_attach_cmd_t); 96 static int vioif_detach(dev_info_t *, ddi_detach_cmd_t); 97 static boolean_t vioif_has_feature(vioif_t *, uint32_t); 98 static void vioif_reclaim_restart(vioif_t *); 99 static int vioif_m_stat(void *, uint_t, uint64_t *); 100 static void vioif_m_stop(void *); 101 static int vioif_m_start(void *); 102 static int vioif_m_multicst(void *, boolean_t, const uint8_t *); 103 static int vioif_m_setpromisc(void *, boolean_t); 104 static int vioif_m_unicst(void *, const uint8_t *); 105 static mblk_t *vioif_m_tx(void *, mblk_t *); 106 static int vioif_m_setprop(void *, const char *, mac_prop_id_t, uint_t, 107 const void *); 108 static int vioif_m_getprop(void *, const char *, mac_prop_id_t, uint_t, void *); 109 static void vioif_m_propinfo(void *, const char *, mac_prop_id_t, 110 mac_prop_info_handle_t); 111 static boolean_t vioif_m_getcapab(void *, mac_capab_t, void *); 112 static uint_t vioif_add_rx(vioif_t *); 113 114 115 static struct cb_ops vioif_cb_ops = { 116 .cb_rev = CB_REV, 117 .cb_flag = D_MP | D_NEW, 118 119 .cb_open = nulldev, 120 .cb_close = nulldev, 121 .cb_strategy = nodev, 122 .cb_print = nodev, 123 .cb_dump = nodev, 124 .cb_read = nodev, 125 .cb_write = nodev, 126 .cb_ioctl = nodev, 127 .cb_devmap = nodev, 128 .cb_mmap = nodev, 129 .cb_segmap = nodev, 130 .cb_chpoll = nochpoll, 131 .cb_prop_op = ddi_prop_op, 132 .cb_str = NULL, 133 .cb_aread = nodev, 134 .cb_awrite = nodev, 135 }; 136 137 static struct dev_ops vioif_dev_ops = { 138 .devo_rev = DEVO_REV, 139 .devo_refcnt = 0, 140 141 .devo_attach = vioif_attach, 142 .devo_detach = vioif_detach, 143 .devo_quiesce = vioif_quiesce, 144 145 .devo_cb_ops = &vioif_cb_ops, 146 147 .devo_getinfo = NULL, 148 .devo_identify = nulldev, 149 .devo_probe = nulldev, 150 .devo_reset = nodev, 151 .devo_bus_ops = NULL, 152 .devo_power = NULL, 153 }; 154 155 static struct modldrv vioif_modldrv = { 156 .drv_modops = &mod_driverops, 157 .drv_linkinfo = "VIRTIO network driver", 158 .drv_dev_ops = &vioif_dev_ops 159 }; 160 161 static struct modlinkage vioif_modlinkage = { 162 .ml_rev = MODREV_1, 163 .ml_linkage = { &vioif_modldrv, NULL } 164 }; 165 166 static mac_callbacks_t vioif_mac_callbacks = { 167 .mc_getstat = vioif_m_stat, 168 .mc_start = vioif_m_start, 169 .mc_stop = vioif_m_stop, 170 .mc_setpromisc = vioif_m_setpromisc, 171 .mc_multicst = vioif_m_multicst, 172 .mc_unicst = vioif_m_unicst, 173 .mc_tx = vioif_m_tx, 174 175 .mc_callbacks = (MC_GETCAPAB | MC_SETPROP | 176 MC_GETPROP | MC_PROPINFO), 177 .mc_getcapab = vioif_m_getcapab, 178 .mc_setprop = vioif_m_setprop, 179 .mc_getprop = vioif_m_getprop, 180 .mc_propinfo = vioif_m_propinfo, 181 }; 182 183 static const uchar_t vioif_broadcast[ETHERADDRL] = { 184 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF 185 }; 186 187 /* 188 * Interval for the periodic TX reclaim. 189 */ 190 uint_t vioif_reclaim_ms = 200; 191 192 /* 193 * Allow the operator to override the kinds of interrupts we'll use for 194 * vioif. This value defaults to -1 so that it can be overridden to 0 in 195 * /etc/system. 196 */ 197 int vioif_allowed_int_types = -1; 198 199 /* 200 * DMA attribute template for transmit and receive buffers. The SGL entry 201 * count will be modified before using the template. Note that these 202 * allocations are aligned so that VIOIF_HEADER_SKIP places the IP header in 203 * received frames at the correct offset for the networking stack. 204 */ 205 ddi_dma_attr_t vioif_dma_attr_bufs = { 206 .dma_attr_version = DMA_ATTR_V0, 207 .dma_attr_addr_lo = 0x0000000000000000, 208 .dma_attr_addr_hi = 0xFFFFFFFFFFFFFFFF, 209 .dma_attr_count_max = 0x00000000FFFFFFFF, 210 .dma_attr_align = VIOIF_HEADER_ALIGN, 211 .dma_attr_burstsizes = 1, 212 .dma_attr_minxfer = 1, 213 .dma_attr_maxxfer = 0x00000000FFFFFFFF, 214 .dma_attr_seg = 0x00000000FFFFFFFF, 215 .dma_attr_sgllen = 0, 216 .dma_attr_granular = 1, 217 .dma_attr_flags = 0 218 }; 219 220 /* 221 * DMA attributes for mapping larger transmit buffers from the networking 222 * stack. The requirements are quite loose, but note that the SGL entry length 223 * field is 32-bit. 224 */ 225 ddi_dma_attr_t vioif_dma_attr_external = { 226 .dma_attr_version = DMA_ATTR_V0, 227 .dma_attr_addr_lo = 0x0000000000000000, 228 .dma_attr_addr_hi = 0xFFFFFFFFFFFFFFFF, 229 .dma_attr_count_max = 0x00000000FFFFFFFF, 230 .dma_attr_align = 1, 231 .dma_attr_burstsizes = 1, 232 .dma_attr_minxfer = 1, 233 .dma_attr_maxxfer = 0x00000000FFFFFFFF, 234 .dma_attr_seg = 0x00000000FFFFFFFF, 235 .dma_attr_sgllen = VIOIF_MAX_SEGS - 1, 236 .dma_attr_granular = 1, 237 .dma_attr_flags = 0 238 }; 239 240 241 /* 242 * VIRTIO NET MAC PROPERTIES 243 */ 244 #define VIOIF_MACPROP_TXCOPY_THRESH "_txcopy_thresh" 245 #define VIOIF_MACPROP_TXCOPY_THRESH_DEF 300 246 #define VIOIF_MACPROP_TXCOPY_THRESH_MAX 640 247 248 #define VIOIF_MACPROP_RXCOPY_THRESH "_rxcopy_thresh" 249 #define VIOIF_MACPROP_RXCOPY_THRESH_DEF 300 250 #define VIOIF_MACPROP_RXCOPY_THRESH_MAX 640 251 252 static char *vioif_priv_props[] = { 253 VIOIF_MACPROP_TXCOPY_THRESH, 254 VIOIF_MACPROP_RXCOPY_THRESH, 255 NULL 256 }; 257 258 259 static vioif_txbuf_t * 260 vioif_txbuf_alloc(vioif_t *vif) 261 { 262 vioif_txbuf_t *tb; 263 264 VERIFY(MUTEX_HELD(&vif->vif_mutex)); 265 266 if ((tb = list_remove_head(&vif->vif_txbufs)) != NULL) { 267 vif->vif_ntxbufs_alloc++; 268 } 269 270 return (tb); 271 } 272 273 static void 274 vioif_txbuf_free(vioif_t *vif, vioif_txbuf_t *tb) 275 { 276 VERIFY(MUTEX_HELD(&vif->vif_mutex)); 277 278 VERIFY3U(vif->vif_ntxbufs_alloc, >, 0); 279 vif->vif_ntxbufs_alloc--; 280 281 virtio_chain_clear(tb->tb_chain); 282 list_insert_head(&vif->vif_txbufs, tb); 283 } 284 285 static vioif_rxbuf_t * 286 vioif_rxbuf_alloc(vioif_t *vif) 287 { 288 vioif_rxbuf_t *rb; 289 290 VERIFY(MUTEX_HELD(&vif->vif_mutex)); 291 292 if ((rb = list_remove_head(&vif->vif_rxbufs)) != NULL) { 293 vif->vif_nrxbufs_alloc++; 294 } 295 296 return (rb); 297 } 298 299 static void 300 vioif_rxbuf_free(vioif_t *vif, vioif_rxbuf_t *rb) 301 { 302 VERIFY(MUTEX_HELD(&vif->vif_mutex)); 303 304 VERIFY3U(vif->vif_nrxbufs_alloc, >, 0); 305 vif->vif_nrxbufs_alloc--; 306 307 virtio_chain_clear(rb->rb_chain); 308 list_insert_head(&vif->vif_rxbufs, rb); 309 } 310 311 static void 312 vioif_rx_free_callback(caddr_t free_arg) 313 { 314 vioif_rxbuf_t *rb = (vioif_rxbuf_t *)free_arg; 315 vioif_t *vif = rb->rb_vioif; 316 317 mutex_enter(&vif->vif_mutex); 318 319 /* 320 * Return this receive buffer to the free list. 321 */ 322 vioif_rxbuf_free(vif, rb); 323 324 VERIFY3U(vif->vif_nrxbufs_onloan, >, 0); 325 vif->vif_nrxbufs_onloan--; 326 327 /* 328 * Attempt to replenish the receive queue with at least the buffer we 329 * just freed. There isn't a great way to deal with failure here, 330 * though because we'll only loan at most half of the buffers there 331 * should always be at least some available even if this fails. 332 */ 333 (void) vioif_add_rx(vif); 334 335 mutex_exit(&vif->vif_mutex); 336 } 337 338 static vioif_ctrlbuf_t * 339 vioif_ctrlbuf_alloc(vioif_t *vif) 340 { 341 vioif_ctrlbuf_t *cb; 342 343 VERIFY(MUTEX_HELD(&vif->vif_mutex)); 344 345 if ((cb = list_remove_head(&vif->vif_ctrlbufs)) != NULL) { 346 vif->vif_nctrlbufs_alloc++; 347 } 348 349 return (cb); 350 } 351 352 static void 353 vioif_ctrlbuf_free(vioif_t *vif, vioif_ctrlbuf_t *cb) 354 { 355 VERIFY(MUTEX_HELD(&vif->vif_mutex)); 356 357 VERIFY3U(vif->vif_nctrlbufs_alloc, >, 0); 358 vif->vif_nctrlbufs_alloc--; 359 360 virtio_chain_clear(cb->cb_chain); 361 list_insert_head(&vif->vif_ctrlbufs, cb); 362 } 363 364 static void 365 vioif_free_bufs(vioif_t *vif) 366 { 367 VERIFY(MUTEX_HELD(&vif->vif_mutex)); 368 369 VERIFY3U(vif->vif_ntxbufs_alloc, ==, 0); 370 for (uint_t i = 0; i < vif->vif_txbufs_capacity; i++) { 371 vioif_txbuf_t *tb = &vif->vif_txbufs_mem[i]; 372 373 /* 374 * Ensure that this txbuf is now in the free list: 375 */ 376 VERIFY(list_link_active(&tb->tb_link)); 377 list_remove(&vif->vif_txbufs, tb); 378 379 /* 380 * We should not have an mblk chain at this point. 381 */ 382 VERIFY3P(tb->tb_mp, ==, NULL); 383 384 if (tb->tb_dma != NULL) { 385 virtio_dma_free(tb->tb_dma); 386 tb->tb_dma = NULL; 387 } 388 389 if (tb->tb_chain != NULL) { 390 virtio_chain_free(tb->tb_chain); 391 tb->tb_chain = NULL; 392 } 393 394 if (tb->tb_dmaext != NULL) { 395 for (uint_t j = 0; j < tb->tb_dmaext_capacity; j++) { 396 if (tb->tb_dmaext[j] != NULL) { 397 virtio_dma_free( 398 tb->tb_dmaext[j]); 399 tb->tb_dmaext[j] = NULL; 400 } 401 } 402 403 kmem_free(tb->tb_dmaext, 404 sizeof (virtio_dma_t *) * tb->tb_dmaext_capacity); 405 tb->tb_dmaext = NULL; 406 tb->tb_dmaext_capacity = 0; 407 } 408 } 409 VERIFY(list_is_empty(&vif->vif_txbufs)); 410 if (vif->vif_txbufs_mem != NULL) { 411 kmem_free(vif->vif_txbufs_mem, 412 sizeof (vioif_txbuf_t) * vif->vif_txbufs_capacity); 413 vif->vif_txbufs_mem = NULL; 414 vif->vif_txbufs_capacity = 0; 415 } 416 417 VERIFY3U(vif->vif_nrxbufs_alloc, ==, 0); 418 for (uint_t i = 0; i < vif->vif_rxbufs_capacity; i++) { 419 vioif_rxbuf_t *rb = &vif->vif_rxbufs_mem[i]; 420 421 /* 422 * Ensure that this rxbuf is now in the free list: 423 */ 424 VERIFY(list_link_active(&rb->rb_link)); 425 list_remove(&vif->vif_rxbufs, rb); 426 427 if (rb->rb_dma != NULL) { 428 virtio_dma_free(rb->rb_dma); 429 rb->rb_dma = NULL; 430 } 431 432 if (rb->rb_chain != NULL) { 433 virtio_chain_free(rb->rb_chain); 434 rb->rb_chain = NULL; 435 } 436 } 437 VERIFY(list_is_empty(&vif->vif_rxbufs)); 438 if (vif->vif_rxbufs_mem != NULL) { 439 kmem_free(vif->vif_rxbufs_mem, 440 sizeof (vioif_rxbuf_t) * vif->vif_rxbufs_capacity); 441 vif->vif_rxbufs_mem = NULL; 442 vif->vif_rxbufs_capacity = 0; 443 } 444 445 if (vif->vif_has_ctrlq) { 446 VERIFY3U(vif->vif_nctrlbufs_alloc, ==, 0); 447 for (uint_t i = 0; i < vif->vif_ctrlbufs_capacity; i++) { 448 vioif_ctrlbuf_t *cb = &vif->vif_ctrlbufs_mem[i]; 449 450 /* 451 * Ensure that this ctrlbuf is now in the free list 452 */ 453 VERIFY(list_link_active(&cb->cb_link)); 454 list_remove(&vif->vif_ctrlbufs, cb); 455 456 if (cb->cb_dma != NULL) { 457 virtio_dma_free(cb->cb_dma); 458 cb->cb_dma = NULL; 459 } 460 461 if (cb->cb_chain != NULL) { 462 virtio_chain_free(cb->cb_chain); 463 cb->cb_chain = NULL; 464 } 465 } 466 VERIFY(list_is_empty(&vif->vif_ctrlbufs)); 467 if (vif->vif_ctrlbufs_mem != NULL) { 468 kmem_free(vif->vif_ctrlbufs_mem, 469 sizeof (vioif_ctrlbuf_t) * 470 vif->vif_ctrlbufs_capacity); 471 vif->vif_ctrlbufs_mem = NULL; 472 vif->vif_ctrlbufs_capacity = 0; 473 } 474 } 475 } 476 477 static int 478 vioif_alloc_bufs(vioif_t *vif) 479 { 480 VERIFY(MUTEX_HELD(&vif->vif_mutex)); 481 482 /* 483 * Allocate one contiguous chunk of memory for the transmit and receive 484 * buffer tracking objects. If the ring is unusually small, we'll 485 * reduce our target buffer count accordingly. 486 */ 487 vif->vif_txbufs_capacity = MIN(VIRTIO_NET_TX_BUFS, 488 virtio_queue_size(vif->vif_tx_vq)); 489 vif->vif_txbufs_mem = kmem_zalloc( 490 sizeof (vioif_txbuf_t) * vif->vif_txbufs_capacity, KM_SLEEP); 491 list_create(&vif->vif_txbufs, sizeof (vioif_txbuf_t), 492 offsetof(vioif_txbuf_t, tb_link)); 493 494 vif->vif_rxbufs_capacity = MIN(VIRTIO_NET_RX_BUFS, 495 virtio_queue_size(vif->vif_rx_vq)); 496 vif->vif_rxbufs_mem = kmem_zalloc( 497 sizeof (vioif_rxbuf_t) * vif->vif_rxbufs_capacity, KM_SLEEP); 498 list_create(&vif->vif_rxbufs, sizeof (vioif_rxbuf_t), 499 offsetof(vioif_rxbuf_t, rb_link)); 500 501 if (vif->vif_has_ctrlq) { 502 vif->vif_ctrlbufs_capacity = MIN(VIRTIO_NET_CTRL_BUFS, 503 virtio_queue_size(vif->vif_ctrl_vq)); 504 vif->vif_ctrlbufs_mem = kmem_zalloc( 505 sizeof (vioif_ctrlbuf_t) * vif->vif_ctrlbufs_capacity, 506 KM_SLEEP); 507 } 508 list_create(&vif->vif_ctrlbufs, sizeof (vioif_ctrlbuf_t), 509 offsetof(vioif_ctrlbuf_t, cb_link)); 510 511 /* 512 * Do not loan more than half of our allocated receive buffers into 513 * the networking stack. 514 */ 515 vif->vif_nrxbufs_onloan_max = vif->vif_rxbufs_capacity / 2; 516 517 /* 518 * Put everything in the free list straight away in order to simplify 519 * the use of vioif_free_bufs() for cleanup on allocation failure. 520 */ 521 for (uint_t i = 0; i < vif->vif_txbufs_capacity; i++) { 522 list_insert_tail(&vif->vif_txbufs, &vif->vif_txbufs_mem[i]); 523 } 524 for (uint_t i = 0; i < vif->vif_rxbufs_capacity; i++) { 525 list_insert_tail(&vif->vif_rxbufs, &vif->vif_rxbufs_mem[i]); 526 } 527 for (uint_t i = 0; i < vif->vif_ctrlbufs_capacity; i++) { 528 list_insert_tail(&vif->vif_ctrlbufs, &vif->vif_ctrlbufs_mem[i]); 529 } 530 531 /* 532 * Start from the DMA attribute template common to both transmit and 533 * receive buffers. The SGL entry count will be modified for each 534 * buffer type. 535 */ 536 ddi_dma_attr_t attr = vioif_dma_attr_bufs; 537 538 /* 539 * The transmit inline buffer is small (less than a page), so it's 540 * reasonable to request a single cookie. 541 */ 542 attr.dma_attr_sgllen = 1; 543 544 for (vioif_txbuf_t *tb = list_head(&vif->vif_txbufs); tb != NULL; 545 tb = list_next(&vif->vif_txbufs, tb)) { 546 if ((tb->tb_dma = virtio_dma_alloc(vif->vif_virtio, 547 VIOIF_TX_INLINE_SIZE, &attr, 548 DDI_DMA_STREAMING | DDI_DMA_WRITE, KM_SLEEP)) == NULL) { 549 goto fail; 550 } 551 VERIFY3U(virtio_dma_ncookies(tb->tb_dma), ==, 1); 552 553 if ((tb->tb_chain = virtio_chain_alloc(vif->vif_tx_vq, 554 KM_SLEEP)) == NULL) { 555 goto fail; 556 } 557 virtio_chain_data_set(tb->tb_chain, tb); 558 559 tb->tb_dmaext_capacity = VIOIF_MAX_SEGS - 1; 560 tb->tb_dmaext = kmem_zalloc( 561 sizeof (virtio_dma_t *) * tb->tb_dmaext_capacity, 562 KM_SLEEP); 563 } 564 565 /* 566 * Control queue buffers are also small (less than a page), so we'll 567 * also request a single cookie for them. 568 */ 569 for (vioif_ctrlbuf_t *cb = list_head(&vif->vif_ctrlbufs); cb != NULL; 570 cb = list_next(&vif->vif_ctrlbufs, cb)) { 571 if ((cb->cb_dma = virtio_dma_alloc(vif->vif_virtio, 572 VIOIF_CTRL_SIZE, &attr, 573 DDI_DMA_STREAMING | DDI_DMA_RDWR, KM_SLEEP)) == NULL) { 574 goto fail; 575 } 576 VERIFY3U(virtio_dma_ncookies(cb->cb_dma), ==, 1); 577 578 if ((cb->cb_chain = virtio_chain_alloc(vif->vif_ctrl_vq, 579 KM_SLEEP)) == NULL) { 580 goto fail; 581 } 582 virtio_chain_data_set(cb->cb_chain, cb); 583 } 584 585 /* 586 * The receive buffers are larger, and we can tolerate a large number 587 * of segments. Adjust the SGL entry count, setting aside one segment 588 * for the virtio net header. 589 */ 590 attr.dma_attr_sgllen = VIOIF_MAX_SEGS - 1; 591 592 for (vioif_rxbuf_t *rb = list_head(&vif->vif_rxbufs); rb != NULL; 593 rb = list_next(&vif->vif_rxbufs, rb)) { 594 if ((rb->rb_dma = virtio_dma_alloc(vif->vif_virtio, 595 VIOIF_RX_BUF_SIZE, &attr, DDI_DMA_STREAMING | DDI_DMA_READ, 596 KM_SLEEP)) == NULL) { 597 goto fail; 598 } 599 600 if ((rb->rb_chain = virtio_chain_alloc(vif->vif_rx_vq, 601 KM_SLEEP)) == NULL) { 602 goto fail; 603 } 604 virtio_chain_data_set(rb->rb_chain, rb); 605 606 /* 607 * Ensure that the first cookie is sufficient to cover the 608 * header skip region plus one byte. 609 */ 610 VERIFY3U(virtio_dma_cookie_size(rb->rb_dma, 0), >=, 611 VIOIF_HEADER_SKIP + 1); 612 613 /* 614 * Ensure that the frame data begins at a location with a 615 * correctly aligned IP header. 616 */ 617 VERIFY3U((uintptr_t)virtio_dma_va(rb->rb_dma, 618 VIOIF_HEADER_SKIP) % 4, ==, 2); 619 620 rb->rb_vioif = vif; 621 rb->rb_frtn.free_func = vioif_rx_free_callback; 622 rb->rb_frtn.free_arg = (caddr_t)rb; 623 } 624 625 return (0); 626 627 fail: 628 vioif_free_bufs(vif); 629 return (ENOMEM); 630 } 631 632 static int 633 vioif_ctrlq_req(vioif_t *vif, uint8_t class, uint8_t cmd, void *data, 634 size_t datalen) 635 { 636 vioif_ctrlbuf_t *cb = NULL; 637 virtio_chain_t *vic = NULL; 638 uint8_t *p = NULL; 639 uint64_t pa = 0; 640 uint8_t *ackp = NULL; 641 struct virtio_net_ctrlq_hdr hdr = { 642 .vnch_class = class, 643 .vnch_command = cmd, 644 }; 645 const size_t hdrlen = sizeof (hdr); 646 const size_t acklen = 1; /* the ack is always 1 byte */ 647 size_t totlen = hdrlen + datalen + acklen; 648 int r = DDI_SUCCESS; 649 650 /* 651 * We shouldn't be called unless the ctrlq feature has been 652 * negotiated with the host 653 */ 654 VERIFY(vif->vif_has_ctrlq); 655 656 mutex_enter(&vif->vif_mutex); 657 cb = vioif_ctrlbuf_alloc(vif); 658 if (cb == NULL) { 659 vif->vif_noctrlbuf++; 660 mutex_exit(&vif->vif_mutex); 661 r = DDI_FAILURE; 662 goto done; 663 } 664 mutex_exit(&vif->vif_mutex); 665 666 if (totlen > virtio_dma_size(cb->cb_dma)) { 667 vif->vif_ctrlbuf_toosmall++; 668 r = DDI_FAILURE; 669 goto done; 670 } 671 672 /* 673 * Clear the entire buffer. Technically not necessary, but useful 674 * if trying to troubleshoot an issue, and probably not a bad idea 675 * to not let any old data linger. 676 */ 677 p = virtio_dma_va(cb->cb_dma, 0); 678 bzero(p, virtio_dma_size(cb->cb_dma)); 679 680 /* 681 * We currently do not support VIRTIO_F_ANY_LAYOUT. That means, 682 * that we must put the header, the data, and the ack in their 683 * own respective descriptors. Since all the currently supported 684 * control queue commands take _very_ small amounts of data, we 685 * use a single DMA buffer for all of it, but use 3 descriptors to 686 * reference (respectively) the header, the data, and the ack byte 687 * within that memory to adhere to the virtio spec. 688 * 689 * If we add support for control queue features such as custom 690 * MAC filtering tables, which might require larger amounts of 691 * memory, we likely will want to add more sophistication here 692 * and optionally use additional allocated memory to hold that 693 * data instead of a fixed size buffer. 694 * 695 * Copy the header. 696 */ 697 bcopy(&hdr, p, sizeof (hdr)); 698 pa = virtio_dma_cookie_pa(cb->cb_dma, 0); 699 if ((r = virtio_chain_append(cb->cb_chain, 700 pa, hdrlen, VIRTIO_DIR_DEVICE_READS)) != DDI_SUCCESS) { 701 goto done; 702 } 703 704 /* 705 * Copy the request data 706 */ 707 p = virtio_dma_va(cb->cb_dma, hdrlen); 708 bcopy(data, p, datalen); 709 if ((r = virtio_chain_append(cb->cb_chain, 710 pa + hdrlen, datalen, VIRTIO_DIR_DEVICE_READS)) != DDI_SUCCESS) { 711 goto done; 712 } 713 714 /* 715 * We already cleared the buffer, so don't need to copy out a 0 for 716 * the ack byte. Just add a descriptor for that spot. 717 */ 718 ackp = virtio_dma_va(cb->cb_dma, hdrlen + datalen); 719 if ((r = virtio_chain_append(cb->cb_chain, 720 pa + hdrlen + datalen, acklen, 721 VIRTIO_DIR_DEVICE_WRITES)) != DDI_SUCCESS) { 722 goto done; 723 } 724 725 virtio_dma_sync(cb->cb_dma, DDI_DMA_SYNC_FORDEV); 726 virtio_chain_submit(cb->cb_chain, B_TRUE); 727 728 /* 729 * Spin waiting for response. 730 */ 731 mutex_enter(&vif->vif_mutex); 732 while ((vic = virtio_queue_poll(vif->vif_ctrl_vq)) == NULL) { 733 mutex_exit(&vif->vif_mutex); 734 delay(drv_usectohz(1000)); 735 mutex_enter(&vif->vif_mutex); 736 } 737 738 virtio_dma_sync(cb->cb_dma, DDI_DMA_SYNC_FORCPU); 739 VERIFY3P(virtio_chain_data(vic), ==, cb); 740 mutex_exit(&vif->vif_mutex); 741 742 if (*ackp != VIRTIO_NET_CQ_OK) { 743 r = DDI_FAILURE; 744 } 745 746 done: 747 mutex_enter(&vif->vif_mutex); 748 vioif_ctrlbuf_free(vif, cb); 749 mutex_exit(&vif->vif_mutex); 750 751 return (r); 752 } 753 754 static int 755 vioif_m_multicst(void *arg, boolean_t add, const uint8_t *mcst_addr) 756 { 757 /* 758 * Even though we currently do not have support for programming 759 * multicast filters, or even enabling promiscuous mode, we return 760 * success here to avoid the networking stack falling back to link 761 * layer broadcast for multicast traffic. Some hypervisors already 762 * pass received multicast frames onto the guest, so at least on those 763 * systems multicast will work as expected anyway. 764 */ 765 return (0); 766 } 767 768 static int 769 vioif_m_setpromisc(void *arg, boolean_t on) 770 { 771 vioif_t *vif = arg; 772 uint8_t val = on ? 1 : 0; 773 774 if (!vif->vif_has_ctrlq_rx) { 775 if (vioif_fake_promisc_success) 776 return (0); 777 778 return (ENOTSUP); 779 } 780 781 return (vioif_ctrlq_req(vif, VIRTIO_NET_CTRL_RX, 782 VIRTIO_NET_CTRL_RX_PROMISC, &val, sizeof (val))); 783 } 784 785 static int 786 vioif_m_unicst(void *arg, const uint8_t *mac) 787 { 788 return (ENOTSUP); 789 } 790 791 static uint_t 792 vioif_add_rx(vioif_t *vif) 793 { 794 VERIFY(MUTEX_HELD(&vif->vif_mutex)); 795 796 if (vif->vif_runstate != VIOIF_RUNSTATE_RUNNING) { 797 /* 798 * If the NIC is not running, do not give the device any 799 * receive buffers. 800 */ 801 return (0); 802 } 803 804 uint_t num_added = 0; 805 806 vioif_rxbuf_t *rb; 807 while ((rb = vioif_rxbuf_alloc(vif)) != NULL) { 808 /* 809 * For legacy devices, and those that have not negotiated 810 * VIRTIO_F_ANY_LAYOUT, the virtio net header must appear in a 811 * separate descriptor entry to the rest of the buffer. 812 */ 813 if (virtio_chain_append(rb->rb_chain, 814 virtio_dma_cookie_pa(rb->rb_dma, 0), 815 sizeof (struct virtio_net_hdr), 816 VIRTIO_DIR_DEVICE_WRITES) != DDI_SUCCESS) { 817 goto fail; 818 } 819 820 for (uint_t n = 0; n < virtio_dma_ncookies(rb->rb_dma); n++) { 821 uint64_t pa = virtio_dma_cookie_pa(rb->rb_dma, n); 822 size_t sz = virtio_dma_cookie_size(rb->rb_dma, n); 823 824 if (n == 0) { 825 pa += VIOIF_HEADER_SKIP; 826 VERIFY3U(sz, >, VIOIF_HEADER_SKIP); 827 sz -= VIOIF_HEADER_SKIP; 828 } 829 830 if (virtio_chain_append(rb->rb_chain, pa, sz, 831 VIRTIO_DIR_DEVICE_WRITES) != DDI_SUCCESS) { 832 goto fail; 833 } 834 } 835 836 virtio_chain_submit(rb->rb_chain, B_FALSE); 837 num_added++; 838 continue; 839 840 fail: 841 vioif_rxbuf_free(vif, rb); 842 vif->vif_norecvbuf++; 843 break; 844 } 845 846 if (num_added > 0) { 847 virtio_queue_flush(vif->vif_rx_vq); 848 } 849 850 return (num_added); 851 } 852 853 static uint_t 854 vioif_process_rx(vioif_t *vif) 855 { 856 virtio_chain_t *vic; 857 mblk_t *mphead = NULL, *lastmp = NULL, *mp; 858 uint_t num_processed = 0; 859 860 VERIFY(MUTEX_HELD(&vif->vif_mutex)); 861 862 while ((vic = virtio_queue_poll(vif->vif_rx_vq)) != NULL) { 863 /* 864 * We have to use the chain received length here, as the device 865 * does not tell us the received frame length any other way. 866 * In a limited survey of hypervisors, virtio network devices 867 * appear to provide the right value here. 868 */ 869 size_t len = virtio_chain_received_length(vic); 870 vioif_rxbuf_t *rb = virtio_chain_data(vic); 871 872 virtio_dma_sync(rb->rb_dma, DDI_DMA_SYNC_FORCPU); 873 874 /* 875 * If the NIC is not running, discard any received frames. 876 */ 877 if (vif->vif_runstate != VIOIF_RUNSTATE_RUNNING) { 878 vioif_rxbuf_free(vif, rb); 879 continue; 880 } 881 882 if (len < sizeof (struct virtio_net_hdr)) { 883 vif->vif_rxfail_chain_undersize++; 884 vif->vif_ierrors++; 885 vioif_rxbuf_free(vif, rb); 886 continue; 887 } 888 len -= sizeof (struct virtio_net_hdr); 889 890 /* 891 * We copy small packets that happen to fit into a single 892 * cookie and reuse the buffers. For bigger ones, we loan 893 * the buffers upstream. 894 */ 895 if (len < vif->vif_rxcopy_thresh || 896 vif->vif_nrxbufs_onloan >= vif->vif_nrxbufs_onloan_max) { 897 mutex_exit(&vif->vif_mutex); 898 if ((mp = allocb(len, 0)) == NULL) { 899 mutex_enter(&vif->vif_mutex); 900 vif->vif_norecvbuf++; 901 vif->vif_ierrors++; 902 903 vioif_rxbuf_free(vif, rb); 904 continue; 905 } 906 907 bcopy(virtio_dma_va(rb->rb_dma, VIOIF_HEADER_SKIP), 908 mp->b_rptr, len); 909 mp->b_wptr = mp->b_rptr + len; 910 911 /* 912 * As the packet contents was copied rather than 913 * loaned, we can return the receive buffer resources 914 * to the free list. 915 */ 916 mutex_enter(&vif->vif_mutex); 917 vioif_rxbuf_free(vif, rb); 918 919 } else { 920 mutex_exit(&vif->vif_mutex); 921 if ((mp = desballoc(virtio_dma_va(rb->rb_dma, 922 VIOIF_HEADER_SKIP), len, 0, 923 &rb->rb_frtn)) == NULL) { 924 mutex_enter(&vif->vif_mutex); 925 vif->vif_norecvbuf++; 926 vif->vif_ierrors++; 927 928 vioif_rxbuf_free(vif, rb); 929 continue; 930 } 931 mp->b_wptr = mp->b_rptr + len; 932 933 mutex_enter(&vif->vif_mutex); 934 vif->vif_nrxbufs_onloan++; 935 } 936 937 /* 938 * virtio-net does not tell us if this packet is multicast 939 * or broadcast, so we have to check it. 940 */ 941 if (mp->b_rptr[0] & 0x1) { 942 if (bcmp(mp->b_rptr, vioif_broadcast, ETHERADDRL) != 0) 943 vif->vif_multircv++; 944 else 945 vif->vif_brdcstrcv++; 946 } 947 948 vif->vif_rbytes += len; 949 vif->vif_ipackets++; 950 951 if (lastmp == NULL) { 952 mphead = mp; 953 } else { 954 lastmp->b_next = mp; 955 } 956 lastmp = mp; 957 num_processed++; 958 } 959 960 if (mphead != NULL) { 961 if (vif->vif_runstate == VIOIF_RUNSTATE_RUNNING) { 962 mutex_exit(&vif->vif_mutex); 963 mac_rx(vif->vif_mac_handle, NULL, mphead); 964 mutex_enter(&vif->vif_mutex); 965 } else { 966 /* 967 * The NIC was disabled part way through our execution, 968 * so free the messages we allocated. 969 */ 970 freemsgchain(mphead); 971 } 972 } 973 974 return (num_processed); 975 } 976 977 static uint_t 978 vioif_reclaim_used_tx(vioif_t *vif) 979 { 980 virtio_chain_t *vic; 981 uint_t num_reclaimed = 0; 982 983 VERIFY(MUTEX_NOT_HELD(&vif->vif_mutex)); 984 985 while ((vic = virtio_queue_poll(vif->vif_tx_vq)) != NULL) { 986 vioif_txbuf_t *tb = virtio_chain_data(vic); 987 988 if (tb->tb_mp != NULL) { 989 /* 990 * Unbind the external mapping. 991 */ 992 for (uint_t i = 0; i < tb->tb_dmaext_capacity; i++) { 993 if (tb->tb_dmaext[i] == NULL) { 994 continue; 995 } 996 997 virtio_dma_unbind(tb->tb_dmaext[i]); 998 } 999 1000 freemsg(tb->tb_mp); 1001 tb->tb_mp = NULL; 1002 } 1003 1004 /* 1005 * Return this transmit buffer to the free list for reuse. 1006 */ 1007 mutex_enter(&vif->vif_mutex); 1008 vioif_txbuf_free(vif, tb); 1009 mutex_exit(&vif->vif_mutex); 1010 1011 num_reclaimed++; 1012 } 1013 1014 /* Return ring to transmitting state if descriptors were reclaimed. */ 1015 if (num_reclaimed > 0) { 1016 boolean_t do_update = B_FALSE; 1017 1018 mutex_enter(&vif->vif_mutex); 1019 vif->vif_stat_tx_reclaim += num_reclaimed; 1020 if (vif->vif_tx_corked) { 1021 /* 1022 * TX was corked on a lack of available descriptors. 1023 * That dire state has passed so the TX interrupt can 1024 * be disabled and MAC can be notified that 1025 * transmission is possible again. 1026 */ 1027 vif->vif_tx_corked = B_FALSE; 1028 virtio_queue_no_interrupt(vif->vif_tx_vq, B_TRUE); 1029 do_update = B_TRUE; 1030 } 1031 1032 if (do_update) { 1033 mac_tx_update(vif->vif_mac_handle); 1034 } 1035 mutex_exit(&vif->vif_mutex); 1036 } 1037 1038 return (num_reclaimed); 1039 } 1040 1041 static void 1042 vioif_reclaim_periodic(void *arg) 1043 { 1044 vioif_t *vif = arg; 1045 uint_t num_reclaimed; 1046 1047 num_reclaimed = vioif_reclaim_used_tx(vif); 1048 1049 mutex_enter(&vif->vif_mutex); 1050 vif->vif_tx_reclaim_tid = 0; 1051 /* 1052 * If used descriptors were reclaimed or TX descriptors appear to be 1053 * outstanding, the ring is considered active and periodic reclamation 1054 * is necessary for now. 1055 */ 1056 if (num_reclaimed != 0 || virtio_queue_nactive(vif->vif_tx_vq) != 0) { 1057 /* Do not reschedule if the ring is being drained. */ 1058 if (!vif->vif_tx_drain) { 1059 vioif_reclaim_restart(vif); 1060 } 1061 } 1062 mutex_exit(&vif->vif_mutex); 1063 } 1064 1065 static void 1066 vioif_reclaim_restart(vioif_t *vif) 1067 { 1068 VERIFY(MUTEX_HELD(&vif->vif_mutex)); 1069 VERIFY(!vif->vif_tx_drain); 1070 1071 if (vif->vif_tx_reclaim_tid == 0) { 1072 vif->vif_tx_reclaim_tid = timeout(vioif_reclaim_periodic, vif, 1073 MSEC_TO_TICK_ROUNDUP(vioif_reclaim_ms)); 1074 } 1075 } 1076 1077 static void 1078 vioif_tx_drain(vioif_t *vif) 1079 { 1080 VERIFY(MUTEX_HELD(&vif->vif_mutex)); 1081 VERIFY3S(vif->vif_runstate, ==, VIOIF_RUNSTATE_STOPPING); 1082 1083 vif->vif_tx_drain = B_TRUE; 1084 /* Put a stop to the periodic reclaim if it is running */ 1085 if (vif->vif_tx_reclaim_tid != 0) { 1086 timeout_id_t tid = vif->vif_tx_reclaim_tid; 1087 1088 /* 1089 * With vif_tx_drain set, there is no risk that a racing 1090 * vioif_reclaim_periodic() call will reschedule itself. 1091 * 1092 * Being part of the mc_stop hook also guarantees that 1093 * vioif_m_tx() will not be called to restart it. 1094 */ 1095 vif->vif_tx_reclaim_tid = 0; 1096 mutex_exit(&vif->vif_mutex); 1097 (void) untimeout(tid); 1098 mutex_enter(&vif->vif_mutex); 1099 } 1100 virtio_queue_no_interrupt(vif->vif_tx_vq, B_TRUE); 1101 1102 /* 1103 * Wait for all of the TX descriptors to be processed by the host so 1104 * they can be reclaimed. 1105 */ 1106 while (vif->vif_ntxbufs_alloc > 0) { 1107 mutex_exit(&vif->vif_mutex); 1108 (void) vioif_reclaim_used_tx(vif); 1109 delay(5); 1110 mutex_enter(&vif->vif_mutex); 1111 } 1112 VERIFY(!vif->vif_tx_corked); 1113 VERIFY3U(vif->vif_tx_reclaim_tid, ==, 0); 1114 VERIFY3U(virtio_queue_nactive(vif->vif_tx_vq), ==, 0); 1115 } 1116 1117 static int 1118 vioif_tx_inline(vioif_t *vif, vioif_txbuf_t *tb, mblk_t *mp, size_t msg_size) 1119 { 1120 VERIFY(MUTEX_NOT_HELD(&vif->vif_mutex)); 1121 1122 VERIFY3U(msg_size, <=, virtio_dma_size(tb->tb_dma) - VIOIF_HEADER_SKIP); 1123 1124 /* 1125 * Copy the message into the inline buffer and then free the message. 1126 */ 1127 mcopymsg(mp, virtio_dma_va(tb->tb_dma, VIOIF_HEADER_SKIP)); 1128 1129 if (virtio_chain_append(tb->tb_chain, 1130 virtio_dma_cookie_pa(tb->tb_dma, 0) + VIOIF_HEADER_SKIP, 1131 msg_size, VIRTIO_DIR_DEVICE_READS) != DDI_SUCCESS) { 1132 return (DDI_FAILURE); 1133 } 1134 1135 return (DDI_SUCCESS); 1136 } 1137 1138 static int 1139 vioif_tx_external(vioif_t *vif, vioif_txbuf_t *tb, mblk_t *mp, size_t msg_size) 1140 { 1141 VERIFY(MUTEX_NOT_HELD(&vif->vif_mutex)); 1142 1143 mblk_t *nmp = mp; 1144 tb->tb_ndmaext = 0; 1145 1146 while (nmp != NULL) { 1147 size_t len; 1148 1149 if ((len = MBLKL(nmp)) == 0) { 1150 /* 1151 * Skip any zero-length entries in the chain. 1152 */ 1153 nmp = nmp->b_cont; 1154 continue; 1155 } 1156 1157 if (tb->tb_ndmaext >= tb->tb_dmaext_capacity) { 1158 mutex_enter(&vif->vif_mutex); 1159 vif->vif_txfail_indirect_limit++; 1160 vif->vif_notxbuf++; 1161 mutex_exit(&vif->vif_mutex); 1162 goto fail; 1163 } 1164 1165 if (tb->tb_dmaext[tb->tb_ndmaext] == NULL) { 1166 /* 1167 * Allocate a DMA handle for this slot. 1168 */ 1169 if ((tb->tb_dmaext[tb->tb_ndmaext] = 1170 virtio_dma_alloc_nomem(vif->vif_virtio, 1171 &vioif_dma_attr_external, KM_SLEEP)) == NULL) { 1172 mutex_enter(&vif->vif_mutex); 1173 vif->vif_notxbuf++; 1174 mutex_exit(&vif->vif_mutex); 1175 goto fail; 1176 } 1177 } 1178 virtio_dma_t *extdma = tb->tb_dmaext[tb->tb_ndmaext++]; 1179 1180 if (virtio_dma_bind(extdma, nmp->b_rptr, len, 1181 DDI_DMA_WRITE | DDI_DMA_STREAMING, KM_SLEEP) != 1182 DDI_SUCCESS) { 1183 mutex_enter(&vif->vif_mutex); 1184 vif->vif_txfail_dma_bind++; 1185 mutex_exit(&vif->vif_mutex); 1186 goto fail; 1187 } 1188 1189 for (uint_t n = 0; n < virtio_dma_ncookies(extdma); n++) { 1190 uint64_t pa = virtio_dma_cookie_pa(extdma, n); 1191 size_t sz = virtio_dma_cookie_size(extdma, n); 1192 1193 if (virtio_chain_append(tb->tb_chain, pa, sz, 1194 VIRTIO_DIR_DEVICE_READS) != DDI_SUCCESS) { 1195 mutex_enter(&vif->vif_mutex); 1196 vif->vif_txfail_indirect_limit++; 1197 vif->vif_notxbuf++; 1198 mutex_exit(&vif->vif_mutex); 1199 goto fail; 1200 } 1201 } 1202 1203 nmp = nmp->b_cont; 1204 } 1205 1206 /* 1207 * We need to keep the message around until we reclaim the buffer from 1208 * the device before freeing it. 1209 */ 1210 tb->tb_mp = mp; 1211 1212 return (DDI_SUCCESS); 1213 1214 fail: 1215 for (uint_t n = 0; n < tb->tb_ndmaext; n++) { 1216 if (tb->tb_dmaext[n] != NULL) { 1217 virtio_dma_unbind(tb->tb_dmaext[n]); 1218 } 1219 } 1220 tb->tb_ndmaext = 0; 1221 1222 freemsg(mp); 1223 1224 return (DDI_FAILURE); 1225 } 1226 1227 static boolean_t 1228 vioif_send(vioif_t *vif, mblk_t *mp) 1229 { 1230 VERIFY(MUTEX_NOT_HELD(&vif->vif_mutex)); 1231 1232 vioif_txbuf_t *tb = NULL; 1233 struct virtio_net_hdr *vnh = NULL; 1234 size_t msg_size = 0; 1235 uint32_t csum_start; 1236 uint32_t csum_stuff; 1237 uint32_t csum_flags; 1238 uint32_t lso_flags; 1239 uint32_t lso_mss; 1240 mblk_t *nmp; 1241 int ret; 1242 boolean_t lso_required = B_FALSE; 1243 struct ether_header *ether = (void *)mp->b_rptr; 1244 1245 for (nmp = mp; nmp; nmp = nmp->b_cont) 1246 msg_size += MBLKL(nmp); 1247 1248 if (vif->vif_tx_tso4 || vif->vif_tx_tso6) { 1249 mac_lso_get(mp, &lso_mss, &lso_flags); 1250 lso_required = (lso_flags & HW_LSO) != 0; 1251 } 1252 1253 mutex_enter(&vif->vif_mutex); 1254 if ((tb = vioif_txbuf_alloc(vif)) == NULL) { 1255 vif->vif_notxbuf++; 1256 goto fail; 1257 } 1258 mutex_exit(&vif->vif_mutex); 1259 1260 /* 1261 * Use the inline buffer for the virtio net header. Zero the portion 1262 * of our DMA allocation prior to the packet data. 1263 */ 1264 vnh = virtio_dma_va(tb->tb_dma, 0); 1265 bzero(vnh, VIOIF_HEADER_SKIP); 1266 1267 /* 1268 * For legacy devices, and those that have not negotiated 1269 * VIRTIO_F_ANY_LAYOUT, the virtio net header must appear in a separate 1270 * descriptor entry to the rest of the buffer. 1271 */ 1272 if (virtio_chain_append(tb->tb_chain, 1273 virtio_dma_cookie_pa(tb->tb_dma, 0), sizeof (struct virtio_net_hdr), 1274 VIRTIO_DIR_DEVICE_READS) != DDI_SUCCESS) { 1275 mutex_enter(&vif->vif_mutex); 1276 vif->vif_notxbuf++; 1277 goto fail; 1278 } 1279 1280 mac_hcksum_get(mp, &csum_start, &csum_stuff, NULL, NULL, &csum_flags); 1281 1282 /* 1283 * They want us to do the TCP/UDP csum calculation. 1284 */ 1285 if (csum_flags & HCK_PARTIALCKSUM) { 1286 int eth_hsize; 1287 1288 /* 1289 * Did we ask for it? 1290 */ 1291 ASSERT(vif->vif_tx_csum); 1292 1293 /* 1294 * We only asked for partial csum packets. 1295 */ 1296 ASSERT(!(csum_flags & HCK_IPV4_HDRCKSUM)); 1297 ASSERT(!(csum_flags & HCK_FULLCKSUM)); 1298 1299 if (ether->ether_type == htons(ETHERTYPE_VLAN)) { 1300 eth_hsize = sizeof (struct ether_vlan_header); 1301 } else { 1302 eth_hsize = sizeof (struct ether_header); 1303 } 1304 1305 vnh->vnh_flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 1306 vnh->vnh_csum_start = eth_hsize + csum_start; 1307 vnh->vnh_csum_offset = csum_stuff - csum_start; 1308 } 1309 1310 /* 1311 * Setup LSO fields if required. 1312 */ 1313 if (lso_required) { 1314 mac_ether_offload_flags_t needed; 1315 mac_ether_offload_info_t meo; 1316 uint32_t cksum; 1317 size_t len; 1318 mblk_t *pullmp = NULL; 1319 tcpha_t *tcpha; 1320 1321 if (mac_ether_offload_info(mp, &meo) != 0) { 1322 goto fail; 1323 } 1324 1325 needed = MEOI_L2INFO_SET | MEOI_L3INFO_SET | MEOI_L4INFO_SET; 1326 if ((meo.meoi_flags & needed) != needed) { 1327 goto fail; 1328 } 1329 1330 if (meo.meoi_l4proto != IPPROTO_TCP) { 1331 goto fail; 1332 } 1333 1334 if (meo.meoi_l3proto == ETHERTYPE_IP && vif->vif_tx_tso4) { 1335 vnh->vnh_gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 1336 } else if (meo.meoi_l3proto == ETHERTYPE_IPV6 && 1337 vif->vif_tx_tso6) { 1338 vnh->vnh_gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 1339 } else { 1340 goto fail; 1341 } 1342 1343 /* 1344 * The TCP stack does not include the length in the TCP 1345 * pseudo-header when it is performing LSO since hardware 1346 * generally asks for it to be removed (as it'll change). 1347 * Unfortunately, for virtio, we actually need it. This means we 1348 * need to go through and calculate the actual length and fix 1349 * things up. Because the virtio spec cares about the ECN flag 1350 * and indicating that, at least this means we'll have that 1351 * available as well. 1352 */ 1353 if (MBLKL(mp) < vnh->vnh_hdr_len) { 1354 pullmp = msgpullup(mp, vnh->vnh_hdr_len); 1355 if (pullmp == NULL) 1356 goto fail; 1357 tcpha = (tcpha_t *)(pullmp->b_rptr + meo.meoi_l2hlen + 1358 meo.meoi_l3hlen); 1359 } else { 1360 tcpha = (tcpha_t *)(mp->b_rptr + meo.meoi_l2hlen + 1361 meo.meoi_l3hlen); 1362 } 1363 1364 len = meo.meoi_len - meo.meoi_l2hlen - meo.meoi_l3hlen; 1365 cksum = ntohs(tcpha->tha_sum) + len; 1366 cksum = (cksum >> 16) + (cksum & 0xffff); 1367 cksum = (cksum >> 16) + (cksum & 0xffff); 1368 tcpha->tha_sum = htons(cksum); 1369 1370 if (tcpha->tha_flags & TH_CWR) { 1371 vnh->vnh_gso_type |= VIRTIO_NET_HDR_GSO_ECN; 1372 } 1373 vnh->vnh_gso_size = (uint16_t)lso_mss; 1374 vnh->vnh_hdr_len = meo.meoi_l2hlen + meo.meoi_l3hlen + 1375 meo.meoi_l4hlen; 1376 1377 freemsg(pullmp); 1378 } 1379 1380 /* 1381 * The device does not maintain its own statistics about broadcast or 1382 * multicast packets, so we have to check the destination address 1383 * ourselves. 1384 */ 1385 if ((ether->ether_dhost.ether_addr_octet[0] & 0x01) != 0) { 1386 mutex_enter(&vif->vif_mutex); 1387 if (ether_cmp(ðer->ether_dhost, vioif_broadcast) == 0) { 1388 vif->vif_brdcstxmt++; 1389 } else { 1390 vif->vif_multixmt++; 1391 } 1392 mutex_exit(&vif->vif_mutex); 1393 } 1394 1395 /* 1396 * For small packets, copy into the preallocated inline buffer rather 1397 * than incur the overhead of mapping. Note that both of these 1398 * functions ensure that "mp" is freed before returning. 1399 */ 1400 if (msg_size < vif->vif_txcopy_thresh) { 1401 ret = vioif_tx_inline(vif, tb, mp, msg_size); 1402 } else { 1403 ret = vioif_tx_external(vif, tb, mp, msg_size); 1404 } 1405 mp = NULL; 1406 1407 mutex_enter(&vif->vif_mutex); 1408 1409 if (ret != DDI_SUCCESS) { 1410 goto fail; 1411 } 1412 1413 vif->vif_opackets++; 1414 vif->vif_obytes += msg_size; 1415 mutex_exit(&vif->vif_mutex); 1416 1417 virtio_dma_sync(tb->tb_dma, DDI_DMA_SYNC_FORDEV); 1418 virtio_chain_submit(tb->tb_chain, B_TRUE); 1419 1420 return (B_TRUE); 1421 1422 fail: 1423 vif->vif_oerrors++; 1424 if (tb != NULL) { 1425 vioif_txbuf_free(vif, tb); 1426 } 1427 mutex_exit(&vif->vif_mutex); 1428 1429 return (mp == NULL); 1430 } 1431 1432 static mblk_t * 1433 vioif_m_tx(void *arg, mblk_t *mp) 1434 { 1435 vioif_t *vif = arg; 1436 mblk_t *nmp; 1437 1438 /* 1439 * Prior to attempting to send any more frames, do a reclaim to pick up 1440 * any descriptors which have been processed by the host. 1441 */ 1442 if (virtio_queue_nactive(vif->vif_tx_vq) != 0) { 1443 (void) vioif_reclaim_used_tx(vif); 1444 } 1445 1446 while (mp != NULL) { 1447 nmp = mp->b_next; 1448 mp->b_next = NULL; 1449 1450 if (!vioif_send(vif, mp)) { 1451 /* 1452 * If there are no descriptors available, try to 1453 * reclaim some, allowing a retry of the send if some 1454 * are found. 1455 */ 1456 mp->b_next = nmp; 1457 if (vioif_reclaim_used_tx(vif) != 0) { 1458 continue; 1459 } 1460 1461 /* 1462 * Otherwise, enable the TX ring interrupt so that as 1463 * soon as a descriptor becomes available, transmission 1464 * can begin again. For safety, make sure the periodic 1465 * reclaim is running as well. 1466 */ 1467 mutex_enter(&vif->vif_mutex); 1468 vif->vif_tx_corked = B_TRUE; 1469 virtio_queue_no_interrupt(vif->vif_tx_vq, B_FALSE); 1470 vioif_reclaim_restart(vif); 1471 mutex_exit(&vif->vif_mutex); 1472 return (mp); 1473 } 1474 mp = nmp; 1475 } 1476 1477 /* Ensure the periodic reclaim has been started. */ 1478 mutex_enter(&vif->vif_mutex); 1479 vioif_reclaim_restart(vif); 1480 mutex_exit(&vif->vif_mutex); 1481 1482 return (NULL); 1483 } 1484 1485 static int 1486 vioif_m_start(void *arg) 1487 { 1488 vioif_t *vif = arg; 1489 1490 mutex_enter(&vif->vif_mutex); 1491 1492 VERIFY3S(vif->vif_runstate, ==, VIOIF_RUNSTATE_STOPPED); 1493 vif->vif_runstate = VIOIF_RUNSTATE_RUNNING; 1494 1495 mac_link_update(vif->vif_mac_handle, LINK_STATE_UP); 1496 1497 virtio_queue_no_interrupt(vif->vif_rx_vq, B_FALSE); 1498 1499 /* 1500 * Starting interrupts on the TX virtqueue is unnecessary at this time. 1501 * Descriptor reclamation is handling during transmit, via a periodic 1502 * timer, and when resources are tight, via the then-enabled interrupt. 1503 */ 1504 vif->vif_tx_drain = B_FALSE; 1505 1506 /* 1507 * Add as many receive buffers as we can to the receive queue. If we 1508 * cannot add any, it may be because we have stopped and started again 1509 * and the descriptors are all in the queue already. 1510 */ 1511 (void) vioif_add_rx(vif); 1512 1513 mutex_exit(&vif->vif_mutex); 1514 return (DDI_SUCCESS); 1515 } 1516 1517 static void 1518 vioif_m_stop(void *arg) 1519 { 1520 vioif_t *vif = arg; 1521 1522 mutex_enter(&vif->vif_mutex); 1523 1524 VERIFY3S(vif->vif_runstate, ==, VIOIF_RUNSTATE_RUNNING); 1525 vif->vif_runstate = VIOIF_RUNSTATE_STOPPING; 1526 1527 /* Ensure all TX descriptors have been processed and reclaimed */ 1528 vioif_tx_drain(vif); 1529 1530 virtio_queue_no_interrupt(vif->vif_rx_vq, B_TRUE); 1531 1532 vif->vif_runstate = VIOIF_RUNSTATE_STOPPED; 1533 mutex_exit(&vif->vif_mutex); 1534 } 1535 1536 static int 1537 vioif_m_stat(void *arg, uint_t stat, uint64_t *val) 1538 { 1539 vioif_t *vif = arg; 1540 1541 switch (stat) { 1542 case MAC_STAT_IERRORS: 1543 *val = vif->vif_ierrors; 1544 break; 1545 case MAC_STAT_OERRORS: 1546 *val = vif->vif_oerrors; 1547 break; 1548 case MAC_STAT_MULTIRCV: 1549 *val = vif->vif_multircv; 1550 break; 1551 case MAC_STAT_BRDCSTRCV: 1552 *val = vif->vif_brdcstrcv; 1553 break; 1554 case MAC_STAT_MULTIXMT: 1555 *val = vif->vif_multixmt; 1556 break; 1557 case MAC_STAT_BRDCSTXMT: 1558 *val = vif->vif_brdcstxmt; 1559 break; 1560 case MAC_STAT_IPACKETS: 1561 *val = vif->vif_ipackets; 1562 break; 1563 case MAC_STAT_RBYTES: 1564 *val = vif->vif_rbytes; 1565 break; 1566 case MAC_STAT_OPACKETS: 1567 *val = vif->vif_opackets; 1568 break; 1569 case MAC_STAT_OBYTES: 1570 *val = vif->vif_obytes; 1571 break; 1572 case MAC_STAT_NORCVBUF: 1573 *val = vif->vif_norecvbuf; 1574 break; 1575 case MAC_STAT_NOXMTBUF: 1576 *val = vif->vif_notxbuf; 1577 break; 1578 case MAC_STAT_IFSPEED: 1579 /* always 1 Gbit */ 1580 *val = 1000000000ULL; 1581 break; 1582 case ETHER_STAT_LINK_DUPLEX: 1583 /* virtual device, always full-duplex */ 1584 *val = LINK_DUPLEX_FULL; 1585 break; 1586 1587 default: 1588 return (ENOTSUP); 1589 } 1590 1591 return (DDI_SUCCESS); 1592 } 1593 1594 static int 1595 vioif_m_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, 1596 uint_t pr_valsize, const void *pr_val) 1597 { 1598 vioif_t *vif = arg; 1599 1600 switch (pr_num) { 1601 case MAC_PROP_MTU: { 1602 int r; 1603 uint32_t mtu; 1604 if (pr_valsize < sizeof (mtu)) { 1605 return (EOVERFLOW); 1606 } 1607 bcopy(pr_val, &mtu, sizeof (mtu)); 1608 1609 if (mtu < ETHERMIN || mtu > vif->vif_mtu_max) { 1610 return (EINVAL); 1611 } 1612 1613 mutex_enter(&vif->vif_mutex); 1614 if ((r = mac_maxsdu_update(vif->vif_mac_handle, mtu)) == 0) { 1615 vif->vif_mtu = mtu; 1616 } 1617 mutex_exit(&vif->vif_mutex); 1618 1619 return (r); 1620 } 1621 1622 case MAC_PROP_PRIVATE: { 1623 long max, result; 1624 uint_t *resp; 1625 char *endptr; 1626 1627 if (strcmp(pr_name, VIOIF_MACPROP_TXCOPY_THRESH) == 0) { 1628 max = VIOIF_MACPROP_TXCOPY_THRESH_MAX; 1629 resp = &vif->vif_txcopy_thresh; 1630 } else if (strcmp(pr_name, VIOIF_MACPROP_RXCOPY_THRESH) == 0) { 1631 max = VIOIF_MACPROP_RXCOPY_THRESH_MAX; 1632 resp = &vif->vif_rxcopy_thresh; 1633 } else { 1634 return (ENOTSUP); 1635 } 1636 1637 if (pr_val == NULL) { 1638 return (EINVAL); 1639 } 1640 1641 if (ddi_strtol(pr_val, &endptr, 10, &result) != 0 || 1642 *endptr != '\0' || result < 0 || result > max) { 1643 return (EINVAL); 1644 } 1645 1646 mutex_enter(&vif->vif_mutex); 1647 *resp = result; 1648 mutex_exit(&vif->vif_mutex); 1649 1650 return (0); 1651 } 1652 1653 default: 1654 return (ENOTSUP); 1655 } 1656 } 1657 1658 static int 1659 vioif_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, 1660 uint_t pr_valsize, void *pr_val) 1661 { 1662 vioif_t *vif = arg; 1663 1664 switch (pr_num) { 1665 case MAC_PROP_PRIVATE: { 1666 uint_t value; 1667 1668 if (strcmp(pr_name, VIOIF_MACPROP_TXCOPY_THRESH) == 0) { 1669 value = vif->vif_txcopy_thresh; 1670 } else if (strcmp(pr_name, VIOIF_MACPROP_RXCOPY_THRESH) == 0) { 1671 value = vif->vif_rxcopy_thresh; 1672 } else { 1673 return (ENOTSUP); 1674 } 1675 1676 if (snprintf(pr_val, pr_valsize, "%u", value) >= pr_valsize) { 1677 return (EOVERFLOW); 1678 } 1679 1680 return (0); 1681 } 1682 1683 default: 1684 return (ENOTSUP); 1685 } 1686 } 1687 1688 static void 1689 vioif_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num, 1690 mac_prop_info_handle_t prh) 1691 { 1692 vioif_t *vif = arg; 1693 char valstr[64]; 1694 int value; 1695 1696 switch (pr_num) { 1697 case MAC_PROP_MTU: 1698 mac_prop_info_set_perm(prh, MAC_PROP_PERM_RW); 1699 mac_prop_info_set_range_uint32(prh, ETHERMIN, vif->vif_mtu_max); 1700 return; 1701 1702 case MAC_PROP_PRIVATE: 1703 if (strcmp(pr_name, VIOIF_MACPROP_TXCOPY_THRESH) == 0) { 1704 value = VIOIF_MACPROP_TXCOPY_THRESH_DEF; 1705 } else if (strcmp(pr_name, VIOIF_MACPROP_RXCOPY_THRESH) == 0) { 1706 value = VIOIF_MACPROP_RXCOPY_THRESH_DEF; 1707 } else { 1708 /* 1709 * We do not recognise this private property name. 1710 */ 1711 return; 1712 } 1713 mac_prop_info_set_perm(prh, MAC_PROP_PERM_RW); 1714 (void) snprintf(valstr, sizeof (valstr), "%d", value); 1715 mac_prop_info_set_default_str(prh, valstr); 1716 return; 1717 1718 default: 1719 return; 1720 } 1721 } 1722 1723 static boolean_t 1724 vioif_m_getcapab(void *arg, mac_capab_t cap, void *cap_data) 1725 { 1726 vioif_t *vif = arg; 1727 1728 switch (cap) { 1729 case MAC_CAPAB_HCKSUM: { 1730 if (!vif->vif_tx_csum) { 1731 return (B_FALSE); 1732 } 1733 1734 *(uint32_t *)cap_data = HCKSUM_INET_PARTIAL; 1735 1736 return (B_TRUE); 1737 } 1738 1739 case MAC_CAPAB_LSO: { 1740 if (!vif->vif_tx_tso4) { 1741 return (B_FALSE); 1742 } 1743 1744 mac_capab_lso_t *lso = cap_data; 1745 lso->lso_flags = LSO_TX_BASIC_TCP_IPV4 | LSO_TX_BASIC_TCP_IPV6; 1746 lso->lso_basic_tcp_ipv4.lso_max = VIOIF_RX_DATA_SIZE; 1747 lso->lso_basic_tcp_ipv6.lso_max = VIOIF_RX_DATA_SIZE; 1748 1749 return (B_TRUE); 1750 } 1751 1752 default: 1753 return (B_FALSE); 1754 } 1755 } 1756 1757 static boolean_t 1758 vioif_has_feature(vioif_t *vif, uint32_t feature) 1759 { 1760 return (virtio_feature_present(vif->vif_virtio, feature)); 1761 } 1762 1763 /* 1764 * Read the primary MAC address from the device if one is provided. If not, 1765 * generate a random locally administered MAC address and write it back to the 1766 * device. 1767 */ 1768 static void 1769 vioif_get_mac(vioif_t *vif) 1770 { 1771 VERIFY(MUTEX_HELD(&vif->vif_mutex)); 1772 1773 if (vioif_has_feature(vif, VIRTIO_NET_F_MAC)) { 1774 for (uint_t i = 0; i < ETHERADDRL; i++) { 1775 vif->vif_mac[i] = virtio_dev_get8(vif->vif_virtio, 1776 VIRTIO_NET_CONFIG_MAC + i); 1777 } 1778 vif->vif_mac_from_host = 1; 1779 1780 return; 1781 } 1782 1783 /* Get a few random bytes */ 1784 (void) random_get_pseudo_bytes(vif->vif_mac, ETHERADDRL); 1785 /* Make sure it's a unicast MAC */ 1786 vif->vif_mac[0] &= ~1; 1787 /* Set the "locally administered" bit */ 1788 vif->vif_mac[1] |= 2; 1789 1790 /* 1791 * Write the random MAC address back to the device. 1792 */ 1793 for (uint_t i = 0; i < ETHERADDRL; i++) { 1794 virtio_dev_put8(vif->vif_virtio, VIRTIO_NET_CONFIG_MAC + i, 1795 vif->vif_mac[i]); 1796 } 1797 vif->vif_mac_from_host = 0; 1798 1799 dev_err(vif->vif_dip, CE_NOTE, "!Generated a random MAC address: " 1800 "%02x:%02x:%02x:%02x:%02x:%02x", 1801 (uint_t)vif->vif_mac[0], (uint_t)vif->vif_mac[1], 1802 (uint_t)vif->vif_mac[2], (uint_t)vif->vif_mac[3], 1803 (uint_t)vif->vif_mac[4], (uint_t)vif->vif_mac[5]); 1804 } 1805 1806 /* 1807 * Virtqueue interrupt handlers 1808 */ 1809 static uint_t 1810 vioif_rx_handler(caddr_t arg0, caddr_t arg1) 1811 { 1812 vioif_t *vif = (vioif_t *)arg0; 1813 1814 mutex_enter(&vif->vif_mutex); 1815 (void) vioif_process_rx(vif); 1816 1817 /* 1818 * Attempt to replenish the receive queue. If we cannot add any 1819 * descriptors here, it may be because all of the recently received 1820 * packets were loaned up to the networking stack. 1821 */ 1822 (void) vioif_add_rx(vif); 1823 mutex_exit(&vif->vif_mutex); 1824 1825 return (DDI_INTR_CLAIMED); 1826 } 1827 1828 static uint_t 1829 vioif_tx_handler(caddr_t arg0, caddr_t arg1) 1830 { 1831 vioif_t *vif = (vioif_t *)arg0; 1832 1833 /* 1834 * The TX interrupt could race with other reclamation activity, so 1835 * interpreting the return value is unimportant. 1836 */ 1837 (void) vioif_reclaim_used_tx(vif); 1838 1839 return (DDI_INTR_CLAIMED); 1840 } 1841 1842 static void 1843 vioif_check_features(vioif_t *vif) 1844 { 1845 VERIFY(MUTEX_HELD(&vif->vif_mutex)); 1846 1847 vif->vif_tx_csum = 0; 1848 vif->vif_tx_tso4 = 0; 1849 vif->vif_tx_tso6 = 0; 1850 1851 if (vioif_has_feature(vif, VIRTIO_NET_F_CSUM)) { 1852 /* 1853 * The host will accept packets with partial checksums from us. 1854 */ 1855 vif->vif_tx_csum = 1; 1856 1857 /* 1858 * The legacy GSO feature represents the combination of 1859 * HOST_TSO4, HOST_TSO6, and HOST_ECN. 1860 */ 1861 boolean_t gso = vioif_has_feature(vif, VIRTIO_NET_F_GSO); 1862 boolean_t tso4 = vioif_has_feature(vif, VIRTIO_NET_F_HOST_TSO4); 1863 boolean_t tso6 = vioif_has_feature(vif, VIRTIO_NET_F_HOST_TSO6); 1864 boolean_t ecn = vioif_has_feature(vif, VIRTIO_NET_F_HOST_ECN); 1865 1866 /* 1867 * Explicit congestion notification (ECN) is configured 1868 * globally; see "tcp_ecn_permitted". As we cannot currently 1869 * request that the stack disable ECN on a per interface basis, 1870 * we require the device to support the combination of 1871 * segmentation offload and ECN support. 1872 */ 1873 if (gso) { 1874 vif->vif_tx_tso4 = 1; 1875 vif->vif_tx_tso6 = 1; 1876 } 1877 if (tso4 && ecn) { 1878 vif->vif_tx_tso4 = 1; 1879 } 1880 if (tso6 && ecn) { 1881 vif->vif_tx_tso6 = 1; 1882 } 1883 } 1884 1885 if (vioif_has_feature(vif, VIRTIO_NET_F_CTRL_VQ)) { 1886 vif->vif_has_ctrlq = 1; 1887 1888 /* 1889 * The VIRTIO_NET_F_CTRL_VQ feature must be enabled if there's 1890 * any chance of the VIRTIO_NET_F_CTRL_RX being enabled. 1891 */ 1892 if (vioif_has_feature(vif, VIRTIO_NET_F_CTRL_RX)) 1893 vif->vif_has_ctrlq_rx = 1; 1894 } 1895 } 1896 1897 static int 1898 vioif_select_interrupt_types(void) 1899 { 1900 id_t id; 1901 smbios_system_t sys; 1902 smbios_info_t info; 1903 1904 if (vioif_allowed_int_types != -1) { 1905 /* 1906 * If this value was tuned via /etc/system or the debugger, 1907 * use the provided value directly. 1908 */ 1909 return (vioif_allowed_int_types); 1910 } 1911 1912 if (ksmbios == NULL || 1913 (id = smbios_info_system(ksmbios, &sys)) == SMB_ERR || 1914 smbios_info_common(ksmbios, id, &info) == SMB_ERR) { 1915 /* 1916 * The system may not have valid SMBIOS data, so ignore a 1917 * failure here. 1918 */ 1919 return (VIRTIO_ANY_INTR_TYPE); 1920 } 1921 1922 if (strcmp(info.smbi_manufacturer, "Google") == 0 && 1923 strcmp(info.smbi_product, "Google Compute Engine") == 0) { 1924 /* 1925 * An undiagnosed issue with the Google Compute Engine (GCE) 1926 * hypervisor exists. In this environment, no RX interrupts 1927 * are received if MSI-X handlers are installed. This does not 1928 * appear to be true for the Virtio SCSI driver. Fixed 1929 * interrupts do appear to work, so we fall back for now: 1930 */ 1931 return (DDI_INTR_TYPE_FIXED); 1932 } 1933 1934 return (VIRTIO_ANY_INTR_TYPE); 1935 } 1936 1937 static int 1938 vioif_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 1939 { 1940 int ret; 1941 vioif_t *vif; 1942 virtio_t *vio; 1943 mac_register_t *macp = NULL; 1944 1945 if (cmd != DDI_ATTACH) { 1946 return (DDI_FAILURE); 1947 } 1948 1949 if ((vio = virtio_init(dip, VIRTIO_NET_WANTED_FEATURES, B_TRUE)) == 1950 NULL) { 1951 return (DDI_FAILURE); 1952 } 1953 1954 vif = kmem_zalloc(sizeof (*vif), KM_SLEEP); 1955 vif->vif_dip = dip; 1956 vif->vif_virtio = vio; 1957 vif->vif_runstate = VIOIF_RUNSTATE_STOPPED; 1958 ddi_set_driver_private(dip, vif); 1959 1960 if ((vif->vif_rx_vq = virtio_queue_alloc(vio, VIRTIO_NET_VIRTQ_RX, 1961 "rx", vioif_rx_handler, vif, B_FALSE, VIOIF_MAX_SEGS)) == NULL || 1962 (vif->vif_tx_vq = virtio_queue_alloc(vio, VIRTIO_NET_VIRTQ_TX, 1963 "tx", vioif_tx_handler, vif, B_FALSE, VIOIF_MAX_SEGS)) == NULL) { 1964 goto fail; 1965 } 1966 1967 if (vioif_has_feature(vif, VIRTIO_NET_F_CTRL_VQ) && 1968 (vif->vif_ctrl_vq = virtio_queue_alloc(vio, 1969 VIRTIO_NET_VIRTQ_CONTROL, "ctrlq", NULL, vif, 1970 B_FALSE, VIOIF_MAX_SEGS)) == NULL) { 1971 goto fail; 1972 } 1973 1974 if (virtio_init_complete(vio, vioif_select_interrupt_types()) != 1975 DDI_SUCCESS) { 1976 dev_err(dip, CE_WARN, "failed to complete Virtio init"); 1977 goto fail; 1978 } 1979 1980 virtio_queue_no_interrupt(vif->vif_rx_vq, B_TRUE); 1981 virtio_queue_no_interrupt(vif->vif_tx_vq, B_TRUE); 1982 if (vif->vif_ctrl_vq != NULL) 1983 virtio_queue_no_interrupt(vif->vif_ctrl_vq, B_TRUE); 1984 1985 mutex_init(&vif->vif_mutex, NULL, MUTEX_DRIVER, virtio_intr_pri(vio)); 1986 mutex_enter(&vif->vif_mutex); 1987 1988 vioif_get_mac(vif); 1989 1990 vif->vif_rxcopy_thresh = VIOIF_MACPROP_RXCOPY_THRESH_DEF; 1991 vif->vif_txcopy_thresh = VIOIF_MACPROP_TXCOPY_THRESH_DEF; 1992 1993 if (vioif_has_feature(vif, VIRTIO_NET_F_MTU)) { 1994 vif->vif_mtu_max = virtio_dev_get16(vio, VIRTIO_NET_CONFIG_MTU); 1995 } else { 1996 vif->vif_mtu_max = ETHERMTU; 1997 } 1998 1999 vif->vif_mtu = ETHERMTU; 2000 if (vif->vif_mtu > vif->vif_mtu_max) { 2001 vif->vif_mtu = vif->vif_mtu_max; 2002 } 2003 2004 vioif_check_features(vif); 2005 2006 if (vioif_alloc_bufs(vif) != 0) { 2007 mutex_exit(&vif->vif_mutex); 2008 dev_err(dip, CE_WARN, "failed to allocate memory"); 2009 goto fail; 2010 } 2011 2012 mutex_exit(&vif->vif_mutex); 2013 2014 if (virtio_interrupts_enable(vio) != DDI_SUCCESS) { 2015 dev_err(dip, CE_WARN, "failed to enable interrupts"); 2016 goto fail; 2017 } 2018 2019 if ((macp = mac_alloc(MAC_VERSION)) == NULL) { 2020 dev_err(dip, CE_WARN, "failed to allocate a mac_register"); 2021 goto fail; 2022 } 2023 2024 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 2025 macp->m_driver = vif; 2026 macp->m_dip = dip; 2027 macp->m_src_addr = vif->vif_mac; 2028 macp->m_callbacks = &vioif_mac_callbacks; 2029 macp->m_min_sdu = 0; 2030 macp->m_max_sdu = vif->vif_mtu; 2031 macp->m_margin = VLAN_TAGSZ; 2032 macp->m_priv_props = vioif_priv_props; 2033 2034 if ((ret = mac_register(macp, &vif->vif_mac_handle)) != 0) { 2035 dev_err(dip, CE_WARN, "mac_register() failed (%d)", ret); 2036 goto fail; 2037 } 2038 mac_free(macp); 2039 2040 mac_link_update(vif->vif_mac_handle, LINK_STATE_UP); 2041 2042 return (DDI_SUCCESS); 2043 2044 fail: 2045 vioif_free_bufs(vif); 2046 if (macp != NULL) { 2047 mac_free(macp); 2048 } 2049 (void) virtio_fini(vio, B_TRUE); 2050 kmem_free(vif, sizeof (*vif)); 2051 return (DDI_FAILURE); 2052 } 2053 2054 static int 2055 vioif_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 2056 { 2057 int r; 2058 vioif_t *vif; 2059 2060 if (cmd != DDI_DETACH) { 2061 return (DDI_FAILURE); 2062 } 2063 2064 if ((vif = ddi_get_driver_private(dip)) == NULL) { 2065 return (DDI_FAILURE); 2066 } 2067 2068 mutex_enter(&vif->vif_mutex); 2069 if (vif->vif_runstate != VIOIF_RUNSTATE_STOPPED) { 2070 dev_err(dip, CE_WARN, "!NIC still running, cannot detach"); 2071 mutex_exit(&vif->vif_mutex); 2072 return (DDI_FAILURE); 2073 } 2074 2075 /* 2076 * There should be no outstanding transmit buffers once the NIC is 2077 * completely stopped. 2078 */ 2079 VERIFY3U(vif->vif_ntxbufs_alloc, ==, 0); 2080 2081 /* 2082 * Though we cannot claw back all of the receive buffers until we reset 2083 * the device, we must ensure all those loaned to MAC have been 2084 * returned before calling mac_unregister(). 2085 */ 2086 if (vif->vif_nrxbufs_onloan > 0) { 2087 dev_err(dip, CE_WARN, "!%u receive buffers still loaned, " 2088 "cannot detach", vif->vif_nrxbufs_onloan); 2089 mutex_exit(&vif->vif_mutex); 2090 return (DDI_FAILURE); 2091 } 2092 2093 if ((r = mac_unregister(vif->vif_mac_handle)) != 0) { 2094 dev_err(dip, CE_WARN, "!MAC unregister failed (%d)", r); 2095 return (DDI_FAILURE); 2096 } 2097 2098 /* 2099 * Shut down the device so that we can recover any previously 2100 * submitted receive buffers. 2101 */ 2102 virtio_shutdown(vif->vif_virtio); 2103 for (;;) { 2104 virtio_chain_t *vic; 2105 2106 if ((vic = virtio_queue_evacuate(vif->vif_rx_vq)) == NULL) { 2107 break; 2108 } 2109 2110 vioif_rxbuf_t *rb = virtio_chain_data(vic); 2111 vioif_rxbuf_free(vif, rb); 2112 } 2113 2114 /* 2115 * vioif_free_bufs() must be called before virtio_fini() 2116 * as it uses virtio_chain_free() which itself depends on some 2117 * virtio data structures still being around. 2118 */ 2119 vioif_free_bufs(vif); 2120 (void) virtio_fini(vif->vif_virtio, B_FALSE); 2121 2122 mutex_exit(&vif->vif_mutex); 2123 mutex_destroy(&vif->vif_mutex); 2124 2125 kmem_free(vif, sizeof (*vif)); 2126 2127 return (DDI_SUCCESS); 2128 } 2129 2130 static int 2131 vioif_quiesce(dev_info_t *dip) 2132 { 2133 vioif_t *vif; 2134 2135 if ((vif = ddi_get_driver_private(dip)) == NULL) 2136 return (DDI_FAILURE); 2137 2138 return (virtio_quiesce(vif->vif_virtio)); 2139 } 2140 2141 int 2142 _init(void) 2143 { 2144 int ret; 2145 2146 mac_init_ops(&vioif_dev_ops, "vioif"); 2147 2148 if ((ret = mod_install(&vioif_modlinkage)) != DDI_SUCCESS) { 2149 mac_fini_ops(&vioif_dev_ops); 2150 } 2151 2152 return (ret); 2153 } 2154 2155 int 2156 _fini(void) 2157 { 2158 int ret; 2159 2160 if ((ret = mod_remove(&vioif_modlinkage)) == DDI_SUCCESS) { 2161 mac_fini_ops(&vioif_dev_ops); 2162 } 2163 2164 return (ret); 2165 } 2166 2167 int 2168 _info(struct modinfo *modinfop) 2169 { 2170 return (mod_info(&vioif_modlinkage, modinfop)); 2171 } 2172