1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2013 Nexenta Inc. All rights reserved. 14 * Copyright (c) 2014, 2016 by Delphix. All rights reserved. 15 * Copyright 2021 Joyent, Inc. 16 * Copyright 2019 Joshua M. Clulow <josh@sysmgr.org> 17 */ 18 19 /* Based on the NetBSD virtio driver by Minoura Makoto. */ 20 /* 21 * Copyright (c) 2010 Minoura Makoto. 22 * All rights reserved. 23 * 24 * Redistribution and use in source and binary forms, with or without 25 * modification, are permitted provided that the following conditions 26 * are met: 27 * 1. Redistributions of source code must retain the above copyright 28 * notice, this list of conditions and the following disclaimer. 29 * 2. Redistributions in binary form must reproduce the above copyright 30 * notice, this list of conditions and the following disclaimer in the 31 * documentation and/or other materials provided with the distribution. 32 * 33 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 34 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 35 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 36 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 37 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 38 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 39 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 40 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 41 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 42 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 43 */ 44 45 /* 46 * VIRTIO NETWORK DRIVER 47 */ 48 49 #include <sys/types.h> 50 #include <sys/errno.h> 51 #include <sys/param.h> 52 #include <sys/stropts.h> 53 #include <sys/stream.h> 54 #include <sys/strsubr.h> 55 #include <sys/kmem.h> 56 #include <sys/conf.h> 57 #include <sys/devops.h> 58 #include <sys/ksynch.h> 59 #include <sys/stat.h> 60 #include <sys/modctl.h> 61 #include <sys/debug.h> 62 #include <sys/pci.h> 63 #include <sys/ethernet.h> 64 #include <sys/vlan.h> 65 #include <sys/sysmacros.h> 66 #include <sys/smbios.h> 67 68 #include <sys/dlpi.h> 69 #include <sys/taskq.h> 70 71 #include <sys/pattr.h> 72 #include <sys/strsun.h> 73 74 #include <sys/random.h> 75 #include <sys/containerof.h> 76 #include <sys/stream.h> 77 #include <inet/tcp.h> 78 79 #include <sys/mac.h> 80 #include <sys/mac_provider.h> 81 #include <sys/mac_ether.h> 82 83 #include "virtio.h" 84 #include "vioif.h" 85 86 87 static int vioif_quiesce(dev_info_t *); 88 static int vioif_attach(dev_info_t *, ddi_attach_cmd_t); 89 static int vioif_detach(dev_info_t *, ddi_detach_cmd_t); 90 static boolean_t vioif_has_feature(vioif_t *, uint32_t); 91 static void vioif_reclaim_restart(vioif_t *); 92 static int vioif_m_stat(void *, uint_t, uint64_t *); 93 static void vioif_m_stop(void *); 94 static int vioif_m_start(void *); 95 static int vioif_m_multicst(void *, boolean_t, const uint8_t *); 96 static int vioif_m_setpromisc(void *, boolean_t); 97 static int vioif_m_unicst(void *, const uint8_t *); 98 static mblk_t *vioif_m_tx(void *, mblk_t *); 99 static int vioif_m_setprop(void *, const char *, mac_prop_id_t, uint_t, 100 const void *); 101 static int vioif_m_getprop(void *, const char *, mac_prop_id_t, uint_t, void *); 102 static void vioif_m_propinfo(void *, const char *, mac_prop_id_t, 103 mac_prop_info_handle_t); 104 static boolean_t vioif_m_getcapab(void *, mac_capab_t, void *); 105 static uint_t vioif_add_rx(vioif_t *); 106 107 108 static struct cb_ops vioif_cb_ops = { 109 .cb_rev = CB_REV, 110 .cb_flag = D_MP | D_NEW, 111 112 .cb_open = nulldev, 113 .cb_close = nulldev, 114 .cb_strategy = nodev, 115 .cb_print = nodev, 116 .cb_dump = nodev, 117 .cb_read = nodev, 118 .cb_write = nodev, 119 .cb_ioctl = nodev, 120 .cb_devmap = nodev, 121 .cb_mmap = nodev, 122 .cb_segmap = nodev, 123 .cb_chpoll = nochpoll, 124 .cb_prop_op = ddi_prop_op, 125 .cb_str = NULL, 126 .cb_aread = nodev, 127 .cb_awrite = nodev, 128 }; 129 130 static struct dev_ops vioif_dev_ops = { 131 .devo_rev = DEVO_REV, 132 .devo_refcnt = 0, 133 134 .devo_attach = vioif_attach, 135 .devo_detach = vioif_detach, 136 .devo_quiesce = vioif_quiesce, 137 138 .devo_cb_ops = &vioif_cb_ops, 139 140 .devo_getinfo = NULL, 141 .devo_identify = nulldev, 142 .devo_probe = nulldev, 143 .devo_reset = nodev, 144 .devo_bus_ops = NULL, 145 .devo_power = NULL, 146 }; 147 148 static struct modldrv vioif_modldrv = { 149 .drv_modops = &mod_driverops, 150 .drv_linkinfo = "VIRTIO network driver", 151 .drv_dev_ops = &vioif_dev_ops 152 }; 153 154 static struct modlinkage vioif_modlinkage = { 155 .ml_rev = MODREV_1, 156 .ml_linkage = { &vioif_modldrv, NULL } 157 }; 158 159 static mac_callbacks_t vioif_mac_callbacks = { 160 .mc_getstat = vioif_m_stat, 161 .mc_start = vioif_m_start, 162 .mc_stop = vioif_m_stop, 163 .mc_setpromisc = vioif_m_setpromisc, 164 .mc_multicst = vioif_m_multicst, 165 .mc_unicst = vioif_m_unicst, 166 .mc_tx = vioif_m_tx, 167 168 .mc_callbacks = (MC_GETCAPAB | MC_SETPROP | 169 MC_GETPROP | MC_PROPINFO), 170 .mc_getcapab = vioif_m_getcapab, 171 .mc_setprop = vioif_m_setprop, 172 .mc_getprop = vioif_m_getprop, 173 .mc_propinfo = vioif_m_propinfo, 174 }; 175 176 static const uchar_t vioif_broadcast[ETHERADDRL] = { 177 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF 178 }; 179 180 /* 181 * Interval for the periodic TX reclaim. 182 */ 183 uint_t vioif_reclaim_ms = 200; 184 185 /* 186 * Allow the operator to override the kinds of interrupts we'll use for 187 * vioif. This value defaults to -1 so that it can be overridden to 0 in 188 * /etc/system. 189 */ 190 int vioif_allowed_int_types = -1; 191 192 /* 193 * DMA attribute template for transmit and receive buffers. The SGL entry 194 * count will be modified before using the template. Note that these 195 * allocations are aligned so that VIOIF_HEADER_SKIP places the IP header in 196 * received frames at the correct offset for the networking stack. 197 */ 198 ddi_dma_attr_t vioif_dma_attr_bufs = { 199 .dma_attr_version = DMA_ATTR_V0, 200 .dma_attr_addr_lo = 0x0000000000000000, 201 .dma_attr_addr_hi = 0xFFFFFFFFFFFFFFFF, 202 .dma_attr_count_max = 0x00000000FFFFFFFF, 203 .dma_attr_align = VIOIF_HEADER_ALIGN, 204 .dma_attr_burstsizes = 1, 205 .dma_attr_minxfer = 1, 206 .dma_attr_maxxfer = 0x00000000FFFFFFFF, 207 .dma_attr_seg = 0x00000000FFFFFFFF, 208 .dma_attr_sgllen = 0, 209 .dma_attr_granular = 1, 210 .dma_attr_flags = 0 211 }; 212 213 /* 214 * DMA attributes for mapping larger transmit buffers from the networking 215 * stack. The requirements are quite loose, but note that the SGL entry length 216 * field is 32-bit. 217 */ 218 ddi_dma_attr_t vioif_dma_attr_external = { 219 .dma_attr_version = DMA_ATTR_V0, 220 .dma_attr_addr_lo = 0x0000000000000000, 221 .dma_attr_addr_hi = 0xFFFFFFFFFFFFFFFF, 222 .dma_attr_count_max = 0x00000000FFFFFFFF, 223 .dma_attr_align = 1, 224 .dma_attr_burstsizes = 1, 225 .dma_attr_minxfer = 1, 226 .dma_attr_maxxfer = 0x00000000FFFFFFFF, 227 .dma_attr_seg = 0x00000000FFFFFFFF, 228 .dma_attr_sgllen = VIOIF_MAX_SEGS - 1, 229 .dma_attr_granular = 1, 230 .dma_attr_flags = 0 231 }; 232 233 234 /* 235 * VIRTIO NET MAC PROPERTIES 236 */ 237 #define VIOIF_MACPROP_TXCOPY_THRESH "_txcopy_thresh" 238 #define VIOIF_MACPROP_TXCOPY_THRESH_DEF 300 239 #define VIOIF_MACPROP_TXCOPY_THRESH_MAX 640 240 241 #define VIOIF_MACPROP_RXCOPY_THRESH "_rxcopy_thresh" 242 #define VIOIF_MACPROP_RXCOPY_THRESH_DEF 300 243 #define VIOIF_MACPROP_RXCOPY_THRESH_MAX 640 244 245 static char *vioif_priv_props[] = { 246 VIOIF_MACPROP_TXCOPY_THRESH, 247 VIOIF_MACPROP_RXCOPY_THRESH, 248 NULL 249 }; 250 251 252 static vioif_txbuf_t * 253 vioif_txbuf_alloc(vioif_t *vif) 254 { 255 vioif_txbuf_t *tb; 256 257 VERIFY(MUTEX_HELD(&vif->vif_mutex)); 258 259 if ((tb = list_remove_head(&vif->vif_txbufs)) != NULL) { 260 vif->vif_ntxbufs_alloc++; 261 } 262 263 return (tb); 264 } 265 266 static void 267 vioif_txbuf_free(vioif_t *vif, vioif_txbuf_t *tb) 268 { 269 VERIFY(MUTEX_HELD(&vif->vif_mutex)); 270 271 VERIFY3U(vif->vif_ntxbufs_alloc, >, 0); 272 vif->vif_ntxbufs_alloc--; 273 274 virtio_chain_clear(tb->tb_chain); 275 list_insert_head(&vif->vif_txbufs, tb); 276 } 277 278 static vioif_rxbuf_t * 279 vioif_rxbuf_alloc(vioif_t *vif) 280 { 281 vioif_rxbuf_t *rb; 282 283 VERIFY(MUTEX_HELD(&vif->vif_mutex)); 284 285 if ((rb = list_remove_head(&vif->vif_rxbufs)) != NULL) { 286 vif->vif_nrxbufs_alloc++; 287 } 288 289 return (rb); 290 } 291 292 static void 293 vioif_rxbuf_free(vioif_t *vif, vioif_rxbuf_t *rb) 294 { 295 VERIFY(MUTEX_HELD(&vif->vif_mutex)); 296 297 VERIFY3U(vif->vif_nrxbufs_alloc, >, 0); 298 vif->vif_nrxbufs_alloc--; 299 300 virtio_chain_clear(rb->rb_chain); 301 list_insert_head(&vif->vif_rxbufs, rb); 302 } 303 304 static void 305 vioif_rx_free_callback(caddr_t free_arg) 306 { 307 vioif_rxbuf_t *rb = (vioif_rxbuf_t *)free_arg; 308 vioif_t *vif = rb->rb_vioif; 309 310 mutex_enter(&vif->vif_mutex); 311 312 /* 313 * Return this receive buffer to the free list. 314 */ 315 vioif_rxbuf_free(vif, rb); 316 317 VERIFY3U(vif->vif_nrxbufs_onloan, >, 0); 318 vif->vif_nrxbufs_onloan--; 319 320 /* 321 * Attempt to replenish the receive queue with at least the buffer we 322 * just freed. There isn't a great way to deal with failure here, 323 * though because we'll only loan at most half of the buffers there 324 * should always be at least some available even if this fails. 325 */ 326 (void) vioif_add_rx(vif); 327 328 mutex_exit(&vif->vif_mutex); 329 } 330 331 static vioif_ctrlbuf_t * 332 vioif_ctrlbuf_alloc(vioif_t *vif) 333 { 334 vioif_ctrlbuf_t *cb; 335 336 VERIFY(MUTEX_HELD(&vif->vif_mutex)); 337 338 if ((cb = list_remove_head(&vif->vif_ctrlbufs)) != NULL) { 339 vif->vif_nctrlbufs_alloc++; 340 } 341 342 return (cb); 343 } 344 345 static void 346 vioif_ctrlbuf_free(vioif_t *vif, vioif_ctrlbuf_t *cb) 347 { 348 VERIFY(MUTEX_HELD(&vif->vif_mutex)); 349 350 VERIFY3U(vif->vif_nctrlbufs_alloc, >, 0); 351 vif->vif_nctrlbufs_alloc--; 352 353 virtio_chain_clear(cb->cb_chain); 354 list_insert_head(&vif->vif_ctrlbufs, cb); 355 } 356 357 static void 358 vioif_free_bufs(vioif_t *vif) 359 { 360 VERIFY(MUTEX_HELD(&vif->vif_mutex)); 361 362 VERIFY3U(vif->vif_ntxbufs_alloc, ==, 0); 363 for (uint_t i = 0; i < vif->vif_txbufs_capacity; i++) { 364 vioif_txbuf_t *tb = &vif->vif_txbufs_mem[i]; 365 366 /* 367 * Ensure that this txbuf is now in the free list: 368 */ 369 VERIFY(list_link_active(&tb->tb_link)); 370 list_remove(&vif->vif_txbufs, tb); 371 372 /* 373 * We should not have an mblk chain at this point. 374 */ 375 VERIFY3P(tb->tb_mp, ==, NULL); 376 377 if (tb->tb_dma != NULL) { 378 virtio_dma_free(tb->tb_dma); 379 tb->tb_dma = NULL; 380 } 381 382 if (tb->tb_chain != NULL) { 383 virtio_chain_free(tb->tb_chain); 384 tb->tb_chain = NULL; 385 } 386 387 if (tb->tb_dmaext != NULL) { 388 for (uint_t j = 0; j < tb->tb_dmaext_capacity; j++) { 389 if (tb->tb_dmaext[j] != NULL) { 390 virtio_dma_free( 391 tb->tb_dmaext[j]); 392 tb->tb_dmaext[j] = NULL; 393 } 394 } 395 396 kmem_free(tb->tb_dmaext, 397 sizeof (virtio_dma_t *) * tb->tb_dmaext_capacity); 398 tb->tb_dmaext = NULL; 399 tb->tb_dmaext_capacity = 0; 400 } 401 } 402 VERIFY(list_is_empty(&vif->vif_txbufs)); 403 if (vif->vif_txbufs_mem != NULL) { 404 kmem_free(vif->vif_txbufs_mem, 405 sizeof (vioif_txbuf_t) * vif->vif_txbufs_capacity); 406 vif->vif_txbufs_mem = NULL; 407 vif->vif_txbufs_capacity = 0; 408 } 409 410 VERIFY3U(vif->vif_nrxbufs_alloc, ==, 0); 411 for (uint_t i = 0; i < vif->vif_rxbufs_capacity; i++) { 412 vioif_rxbuf_t *rb = &vif->vif_rxbufs_mem[i]; 413 414 /* 415 * Ensure that this rxbuf is now in the free list: 416 */ 417 VERIFY(list_link_active(&rb->rb_link)); 418 list_remove(&vif->vif_rxbufs, rb); 419 420 if (rb->rb_dma != NULL) { 421 virtio_dma_free(rb->rb_dma); 422 rb->rb_dma = NULL; 423 } 424 425 if (rb->rb_chain != NULL) { 426 virtio_chain_free(rb->rb_chain); 427 rb->rb_chain = NULL; 428 } 429 } 430 VERIFY(list_is_empty(&vif->vif_rxbufs)); 431 if (vif->vif_rxbufs_mem != NULL) { 432 kmem_free(vif->vif_rxbufs_mem, 433 sizeof (vioif_rxbuf_t) * vif->vif_rxbufs_capacity); 434 vif->vif_rxbufs_mem = NULL; 435 vif->vif_rxbufs_capacity = 0; 436 } 437 438 if (vif->vif_has_ctrlq) { 439 VERIFY3U(vif->vif_nctrlbufs_alloc, ==, 0); 440 for (uint_t i = 0; i < vif->vif_ctrlbufs_capacity; i++) { 441 vioif_ctrlbuf_t *cb = &vif->vif_ctrlbufs_mem[i]; 442 443 /* 444 * Ensure that this ctrlbuf is now in the free list 445 */ 446 VERIFY(list_link_active(&cb->cb_link)); 447 list_remove(&vif->vif_ctrlbufs, cb); 448 449 if (cb->cb_dma != NULL) { 450 virtio_dma_free(cb->cb_dma); 451 cb->cb_dma = NULL; 452 } 453 454 if (cb->cb_chain != NULL) { 455 virtio_chain_free(cb->cb_chain); 456 cb->cb_chain = NULL; 457 } 458 } 459 VERIFY(list_is_empty(&vif->vif_ctrlbufs)); 460 if (vif->vif_ctrlbufs_mem != NULL) { 461 kmem_free(vif->vif_ctrlbufs_mem, 462 sizeof (vioif_ctrlbuf_t) * 463 vif->vif_ctrlbufs_capacity); 464 vif->vif_ctrlbufs_mem = NULL; 465 vif->vif_ctrlbufs_capacity = 0; 466 } 467 } 468 } 469 470 static int 471 vioif_alloc_bufs(vioif_t *vif) 472 { 473 VERIFY(MUTEX_HELD(&vif->vif_mutex)); 474 475 /* 476 * Allocate one contiguous chunk of memory for the transmit and receive 477 * buffer tracking objects. If the ring is unusually small, we'll 478 * reduce our target buffer count accordingly. 479 */ 480 vif->vif_txbufs_capacity = MIN(VIRTIO_NET_TX_BUFS, 481 virtio_queue_size(vif->vif_tx_vq)); 482 vif->vif_txbufs_mem = kmem_zalloc( 483 sizeof (vioif_txbuf_t) * vif->vif_txbufs_capacity, KM_SLEEP); 484 list_create(&vif->vif_txbufs, sizeof (vioif_txbuf_t), 485 offsetof(vioif_txbuf_t, tb_link)); 486 487 vif->vif_rxbufs_capacity = MIN(VIRTIO_NET_RX_BUFS, 488 virtio_queue_size(vif->vif_rx_vq)); 489 vif->vif_rxbufs_mem = kmem_zalloc( 490 sizeof (vioif_rxbuf_t) * vif->vif_rxbufs_capacity, KM_SLEEP); 491 list_create(&vif->vif_rxbufs, sizeof (vioif_rxbuf_t), 492 offsetof(vioif_rxbuf_t, rb_link)); 493 494 if (vif->vif_has_ctrlq) { 495 vif->vif_ctrlbufs_capacity = MIN(VIRTIO_NET_CTRL_BUFS, 496 virtio_queue_size(vif->vif_ctrl_vq)); 497 vif->vif_ctrlbufs_mem = kmem_zalloc( 498 sizeof (vioif_ctrlbuf_t) * vif->vif_ctrlbufs_capacity, 499 KM_SLEEP); 500 } 501 list_create(&vif->vif_ctrlbufs, sizeof (vioif_ctrlbuf_t), 502 offsetof(vioif_ctrlbuf_t, cb_link)); 503 504 /* 505 * Do not loan more than half of our allocated receive buffers into 506 * the networking stack. 507 */ 508 vif->vif_nrxbufs_onloan_max = vif->vif_rxbufs_capacity / 2; 509 510 /* 511 * Put everything in the free list straight away in order to simplify 512 * the use of vioif_free_bufs() for cleanup on allocation failure. 513 */ 514 for (uint_t i = 0; i < vif->vif_txbufs_capacity; i++) { 515 list_insert_tail(&vif->vif_txbufs, &vif->vif_txbufs_mem[i]); 516 } 517 for (uint_t i = 0; i < vif->vif_rxbufs_capacity; i++) { 518 list_insert_tail(&vif->vif_rxbufs, &vif->vif_rxbufs_mem[i]); 519 } 520 for (uint_t i = 0; i < vif->vif_ctrlbufs_capacity; i++) { 521 list_insert_tail(&vif->vif_ctrlbufs, &vif->vif_ctrlbufs_mem[i]); 522 } 523 524 /* 525 * Start from the DMA attribute template common to both transmit and 526 * receive buffers. The SGL entry count will be modified for each 527 * buffer type. 528 */ 529 ddi_dma_attr_t attr = vioif_dma_attr_bufs; 530 531 /* 532 * The transmit inline buffer is small (less than a page), so it's 533 * reasonable to request a single cookie. 534 */ 535 attr.dma_attr_sgllen = 1; 536 537 for (vioif_txbuf_t *tb = list_head(&vif->vif_txbufs); tb != NULL; 538 tb = list_next(&vif->vif_txbufs, tb)) { 539 if ((tb->tb_dma = virtio_dma_alloc(vif->vif_virtio, 540 VIOIF_TX_INLINE_SIZE, &attr, 541 DDI_DMA_STREAMING | DDI_DMA_WRITE, KM_SLEEP)) == NULL) { 542 goto fail; 543 } 544 VERIFY3U(virtio_dma_ncookies(tb->tb_dma), ==, 1); 545 546 if ((tb->tb_chain = virtio_chain_alloc(vif->vif_tx_vq, 547 KM_SLEEP)) == NULL) { 548 goto fail; 549 } 550 virtio_chain_data_set(tb->tb_chain, tb); 551 552 tb->tb_dmaext_capacity = VIOIF_MAX_SEGS - 1; 553 tb->tb_dmaext = kmem_zalloc( 554 sizeof (virtio_dma_t *) * tb->tb_dmaext_capacity, 555 KM_SLEEP); 556 } 557 558 /* 559 * Control queue buffers are also small (less than a page), so we'll 560 * also request a single cookie for them. 561 */ 562 for (vioif_ctrlbuf_t *cb = list_head(&vif->vif_ctrlbufs); cb != NULL; 563 cb = list_next(&vif->vif_ctrlbufs, cb)) { 564 if ((cb->cb_dma = virtio_dma_alloc(vif->vif_virtio, 565 VIOIF_CTRL_SIZE, &attr, 566 DDI_DMA_STREAMING | DDI_DMA_RDWR, KM_SLEEP)) == NULL) { 567 goto fail; 568 } 569 VERIFY3U(virtio_dma_ncookies(cb->cb_dma), ==, 1); 570 571 if ((cb->cb_chain = virtio_chain_alloc(vif->vif_ctrl_vq, 572 KM_SLEEP)) == NULL) { 573 goto fail; 574 } 575 virtio_chain_data_set(cb->cb_chain, cb); 576 } 577 578 /* 579 * The receive buffers are larger, and we can tolerate a large number 580 * of segments. Adjust the SGL entry count, setting aside one segment 581 * for the virtio net header. 582 */ 583 attr.dma_attr_sgllen = VIOIF_MAX_SEGS - 1; 584 585 for (vioif_rxbuf_t *rb = list_head(&vif->vif_rxbufs); rb != NULL; 586 rb = list_next(&vif->vif_rxbufs, rb)) { 587 if ((rb->rb_dma = virtio_dma_alloc(vif->vif_virtio, 588 VIOIF_RX_BUF_SIZE, &attr, DDI_DMA_STREAMING | DDI_DMA_READ, 589 KM_SLEEP)) == NULL) { 590 goto fail; 591 } 592 593 if ((rb->rb_chain = virtio_chain_alloc(vif->vif_rx_vq, 594 KM_SLEEP)) == NULL) { 595 goto fail; 596 } 597 virtio_chain_data_set(rb->rb_chain, rb); 598 599 /* 600 * Ensure that the first cookie is sufficient to cover the 601 * header skip region plus one byte. 602 */ 603 VERIFY3U(virtio_dma_cookie_size(rb->rb_dma, 0), >=, 604 VIOIF_HEADER_SKIP + 1); 605 606 /* 607 * Ensure that the frame data begins at a location with a 608 * correctly aligned IP header. 609 */ 610 VERIFY3U((uintptr_t)virtio_dma_va(rb->rb_dma, 611 VIOIF_HEADER_SKIP) % 4, ==, 2); 612 613 rb->rb_vioif = vif; 614 rb->rb_frtn.free_func = vioif_rx_free_callback; 615 rb->rb_frtn.free_arg = (caddr_t)rb; 616 } 617 618 return (0); 619 620 fail: 621 vioif_free_bufs(vif); 622 return (ENOMEM); 623 } 624 625 static int 626 vioif_ctrlq_req(vioif_t *vif, uint8_t class, uint8_t cmd, void *data, 627 size_t datalen) 628 { 629 vioif_ctrlbuf_t *cb = NULL; 630 virtio_chain_t *vic = NULL; 631 uint8_t *p = NULL; 632 uint64_t pa = 0; 633 uint8_t *ackp = NULL; 634 struct virtio_net_ctrlq_hdr hdr = { 635 .vnch_class = class, 636 .vnch_command = cmd, 637 }; 638 const size_t hdrlen = sizeof (hdr); 639 const size_t acklen = 1; /* the ack is always 1 byte */ 640 size_t totlen = hdrlen + datalen + acklen; 641 int r = DDI_SUCCESS; 642 643 /* 644 * We shouldn't be called unless the ctrlq feature has been 645 * negotiated with the host 646 */ 647 VERIFY(vif->vif_has_ctrlq); 648 649 mutex_enter(&vif->vif_mutex); 650 cb = vioif_ctrlbuf_alloc(vif); 651 if (cb == NULL) { 652 vif->vif_noctrlbuf++; 653 mutex_exit(&vif->vif_mutex); 654 r = DDI_FAILURE; 655 goto done; 656 } 657 mutex_exit(&vif->vif_mutex); 658 659 if (totlen > virtio_dma_size(cb->cb_dma)) { 660 vif->vif_ctrlbuf_toosmall++; 661 r = DDI_FAILURE; 662 goto done; 663 } 664 665 /* 666 * Clear the entire buffer. Technically not necessary, but useful 667 * if trying to troubleshoot an issue, and probably not a bad idea 668 * to not let any old data linger. 669 */ 670 p = virtio_dma_va(cb->cb_dma, 0); 671 bzero(p, virtio_dma_size(cb->cb_dma)); 672 673 /* 674 * We currently do not support VIRTIO_F_ANY_LAYOUT. That means, 675 * that we must put the header, the data, and the ack in their 676 * own respective descriptors. Since all the currently supported 677 * control queue commands take _very_ small amounts of data, we 678 * use a single DMA buffer for all of it, but use 3 descriptors to 679 * reference (respectively) the header, the data, and the ack byte 680 * within that memory to adhere to the virtio spec. 681 * 682 * If we add support for control queue features such as custom 683 * MAC filtering tables, which might require larger amounts of 684 * memory, we likely will want to add more sophistication here 685 * and optionally use additional allocated memory to hold that 686 * data instead of a fixed size buffer. 687 * 688 * Copy the header. 689 */ 690 bcopy(&hdr, p, sizeof (hdr)); 691 pa = virtio_dma_cookie_pa(cb->cb_dma, 0); 692 if ((r = virtio_chain_append(cb->cb_chain, 693 pa, hdrlen, VIRTIO_DIR_DEVICE_READS)) != DDI_SUCCESS) { 694 goto done; 695 } 696 697 /* 698 * Copy the request data 699 */ 700 p = virtio_dma_va(cb->cb_dma, hdrlen); 701 bcopy(data, p, datalen); 702 if ((r = virtio_chain_append(cb->cb_chain, 703 pa + hdrlen, datalen, VIRTIO_DIR_DEVICE_READS)) != DDI_SUCCESS) { 704 goto done; 705 } 706 707 /* 708 * We already cleared the buffer, so don't need to copy out a 0 for 709 * the ack byte. Just add a descriptor for that spot. 710 */ 711 ackp = virtio_dma_va(cb->cb_dma, hdrlen + datalen); 712 if ((r = virtio_chain_append(cb->cb_chain, 713 pa + hdrlen + datalen, acklen, 714 VIRTIO_DIR_DEVICE_WRITES)) != DDI_SUCCESS) { 715 goto done; 716 } 717 718 virtio_dma_sync(cb->cb_dma, DDI_DMA_SYNC_FORDEV); 719 virtio_chain_submit(cb->cb_chain, B_TRUE); 720 721 /* 722 * Spin waiting for response. 723 */ 724 mutex_enter(&vif->vif_mutex); 725 while ((vic = virtio_queue_poll(vif->vif_ctrl_vq)) == NULL) { 726 mutex_exit(&vif->vif_mutex); 727 delay(drv_usectohz(1000)); 728 mutex_enter(&vif->vif_mutex); 729 } 730 731 virtio_dma_sync(cb->cb_dma, DDI_DMA_SYNC_FORCPU); 732 VERIFY3P(virtio_chain_data(vic), ==, cb); 733 mutex_exit(&vif->vif_mutex); 734 735 if (*ackp != VIRTIO_NET_CQ_OK) { 736 r = DDI_FAILURE; 737 } 738 739 done: 740 mutex_enter(&vif->vif_mutex); 741 vioif_ctrlbuf_free(vif, cb); 742 mutex_exit(&vif->vif_mutex); 743 744 return (r); 745 } 746 747 static int 748 vioif_m_multicst(void *arg, boolean_t add, const uint8_t *mcst_addr) 749 { 750 /* 751 * Even though we currently do not have support for programming 752 * multicast filters, or even enabling promiscuous mode, we return 753 * success here to avoid the networking stack falling back to link 754 * layer broadcast for multicast traffic. Some hypervisors already 755 * pass received multicast frames onto the guest, so at least on those 756 * systems multicast will work as expected anyway. 757 */ 758 return (0); 759 } 760 761 static int 762 vioif_m_setpromisc(void *arg, boolean_t on) 763 { 764 vioif_t *vif = arg; 765 uint8_t val = on ? 1 : 0; 766 767 if (!vif->vif_has_ctrlq_rx) { 768 /* 769 * While most hypervisors support the control queue, bhyve 770 * (or more specifically viona) on illumos currently does not. 771 * 772 * Until that support is added to viona, we pretend 773 * the request always succeeds to match the historic behavior 774 * of the illumos vioif driver. Once that support has been 775 * added to viona, we should do the correct thing and return 776 * ENOTSUP 777 */ 778 return (0); 779 } 780 781 return (vioif_ctrlq_req(vif, VIRTIO_NET_CTRL_RX, 782 VIRTIO_NET_CTRL_RX_PROMISC, &val, sizeof (val))); 783 } 784 785 static int 786 vioif_m_unicst(void *arg, const uint8_t *mac) 787 { 788 return (ENOTSUP); 789 } 790 791 static uint_t 792 vioif_add_rx(vioif_t *vif) 793 { 794 VERIFY(MUTEX_HELD(&vif->vif_mutex)); 795 796 if (vif->vif_runstate != VIOIF_RUNSTATE_RUNNING) { 797 /* 798 * If the NIC is not running, do not give the device any 799 * receive buffers. 800 */ 801 return (0); 802 } 803 804 uint_t num_added = 0; 805 806 vioif_rxbuf_t *rb; 807 while ((rb = vioif_rxbuf_alloc(vif)) != NULL) { 808 /* 809 * For legacy devices, and those that have not negotiated 810 * VIRTIO_F_ANY_LAYOUT, the virtio net header must appear in a 811 * separate descriptor entry to the rest of the buffer. 812 */ 813 if (virtio_chain_append(rb->rb_chain, 814 virtio_dma_cookie_pa(rb->rb_dma, 0), 815 sizeof (struct virtio_net_hdr), 816 VIRTIO_DIR_DEVICE_WRITES) != DDI_SUCCESS) { 817 goto fail; 818 } 819 820 for (uint_t n = 0; n < virtio_dma_ncookies(rb->rb_dma); n++) { 821 uint64_t pa = virtio_dma_cookie_pa(rb->rb_dma, n); 822 size_t sz = virtio_dma_cookie_size(rb->rb_dma, n); 823 824 if (n == 0) { 825 pa += VIOIF_HEADER_SKIP; 826 VERIFY3U(sz, >, VIOIF_HEADER_SKIP); 827 sz -= VIOIF_HEADER_SKIP; 828 } 829 830 if (virtio_chain_append(rb->rb_chain, pa, sz, 831 VIRTIO_DIR_DEVICE_WRITES) != DDI_SUCCESS) { 832 goto fail; 833 } 834 } 835 836 virtio_chain_submit(rb->rb_chain, B_FALSE); 837 num_added++; 838 continue; 839 840 fail: 841 vioif_rxbuf_free(vif, rb); 842 vif->vif_norecvbuf++; 843 break; 844 } 845 846 if (num_added > 0) { 847 virtio_queue_flush(vif->vif_rx_vq); 848 } 849 850 return (num_added); 851 } 852 853 static uint_t 854 vioif_process_rx(vioif_t *vif) 855 { 856 virtio_chain_t *vic; 857 mblk_t *mphead = NULL, *lastmp = NULL, *mp; 858 uint_t num_processed = 0; 859 860 VERIFY(MUTEX_HELD(&vif->vif_mutex)); 861 862 while ((vic = virtio_queue_poll(vif->vif_rx_vq)) != NULL) { 863 /* 864 * We have to use the chain received length here, as the device 865 * does not tell us the received frame length any other way. 866 * In a limited survey of hypervisors, virtio network devices 867 * appear to provide the right value here. 868 */ 869 size_t len = virtio_chain_received_length(vic); 870 vioif_rxbuf_t *rb = virtio_chain_data(vic); 871 872 virtio_dma_sync(rb->rb_dma, DDI_DMA_SYNC_FORCPU); 873 874 /* 875 * If the NIC is not running, discard any received frames. 876 */ 877 if (vif->vif_runstate != VIOIF_RUNSTATE_RUNNING) { 878 vioif_rxbuf_free(vif, rb); 879 continue; 880 } 881 882 if (len < sizeof (struct virtio_net_hdr)) { 883 vif->vif_rxfail_chain_undersize++; 884 vif->vif_ierrors++; 885 vioif_rxbuf_free(vif, rb); 886 continue; 887 } 888 len -= sizeof (struct virtio_net_hdr); 889 890 /* 891 * We copy small packets that happen to fit into a single 892 * cookie and reuse the buffers. For bigger ones, we loan 893 * the buffers upstream. 894 */ 895 if (len < vif->vif_rxcopy_thresh || 896 vif->vif_nrxbufs_onloan >= vif->vif_nrxbufs_onloan_max) { 897 mutex_exit(&vif->vif_mutex); 898 if ((mp = allocb(len, 0)) == NULL) { 899 mutex_enter(&vif->vif_mutex); 900 vif->vif_norecvbuf++; 901 vif->vif_ierrors++; 902 903 vioif_rxbuf_free(vif, rb); 904 continue; 905 } 906 907 bcopy(virtio_dma_va(rb->rb_dma, VIOIF_HEADER_SKIP), 908 mp->b_rptr, len); 909 mp->b_wptr = mp->b_rptr + len; 910 911 /* 912 * As the packet contents was copied rather than 913 * loaned, we can return the receive buffer resources 914 * to the free list. 915 */ 916 mutex_enter(&vif->vif_mutex); 917 vioif_rxbuf_free(vif, rb); 918 919 } else { 920 mutex_exit(&vif->vif_mutex); 921 if ((mp = desballoc(virtio_dma_va(rb->rb_dma, 922 VIOIF_HEADER_SKIP), len, 0, 923 &rb->rb_frtn)) == NULL) { 924 mutex_enter(&vif->vif_mutex); 925 vif->vif_norecvbuf++; 926 vif->vif_ierrors++; 927 928 vioif_rxbuf_free(vif, rb); 929 continue; 930 } 931 mp->b_wptr = mp->b_rptr + len; 932 933 mutex_enter(&vif->vif_mutex); 934 vif->vif_nrxbufs_onloan++; 935 } 936 937 /* 938 * virtio-net does not tell us if this packet is multicast 939 * or broadcast, so we have to check it. 940 */ 941 if (mp->b_rptr[0] & 0x1) { 942 if (bcmp(mp->b_rptr, vioif_broadcast, ETHERADDRL) != 0) 943 vif->vif_multircv++; 944 else 945 vif->vif_brdcstrcv++; 946 } 947 948 vif->vif_rbytes += len; 949 vif->vif_ipackets++; 950 951 if (lastmp == NULL) { 952 mphead = mp; 953 } else { 954 lastmp->b_next = mp; 955 } 956 lastmp = mp; 957 num_processed++; 958 } 959 960 if (mphead != NULL) { 961 if (vif->vif_runstate == VIOIF_RUNSTATE_RUNNING) { 962 mutex_exit(&vif->vif_mutex); 963 mac_rx(vif->vif_mac_handle, NULL, mphead); 964 mutex_enter(&vif->vif_mutex); 965 } else { 966 /* 967 * The NIC was disabled part way through our execution, 968 * so free the messages we allocated. 969 */ 970 freemsgchain(mphead); 971 } 972 } 973 974 return (num_processed); 975 } 976 977 static uint_t 978 vioif_reclaim_used_tx(vioif_t *vif) 979 { 980 virtio_chain_t *vic; 981 uint_t num_reclaimed = 0; 982 983 VERIFY(MUTEX_NOT_HELD(&vif->vif_mutex)); 984 985 while ((vic = virtio_queue_poll(vif->vif_tx_vq)) != NULL) { 986 vioif_txbuf_t *tb = virtio_chain_data(vic); 987 988 if (tb->tb_mp != NULL) { 989 /* 990 * Unbind the external mapping. 991 */ 992 for (uint_t i = 0; i < tb->tb_dmaext_capacity; i++) { 993 if (tb->tb_dmaext[i] == NULL) { 994 continue; 995 } 996 997 virtio_dma_unbind(tb->tb_dmaext[i]); 998 } 999 1000 freemsg(tb->tb_mp); 1001 tb->tb_mp = NULL; 1002 } 1003 1004 /* 1005 * Return this transmit buffer to the free list for reuse. 1006 */ 1007 mutex_enter(&vif->vif_mutex); 1008 vioif_txbuf_free(vif, tb); 1009 mutex_exit(&vif->vif_mutex); 1010 1011 num_reclaimed++; 1012 } 1013 1014 /* Return ring to transmitting state if descriptors were reclaimed. */ 1015 if (num_reclaimed > 0) { 1016 boolean_t do_update = B_FALSE; 1017 1018 mutex_enter(&vif->vif_mutex); 1019 vif->vif_stat_tx_reclaim += num_reclaimed; 1020 if (vif->vif_tx_corked) { 1021 /* 1022 * TX was corked on a lack of available descriptors. 1023 * That dire state has passed so the TX interrupt can 1024 * be disabled and MAC can be notified that 1025 * transmission is possible again. 1026 */ 1027 vif->vif_tx_corked = B_FALSE; 1028 virtio_queue_no_interrupt(vif->vif_tx_vq, B_TRUE); 1029 do_update = B_TRUE; 1030 } 1031 1032 if (do_update) { 1033 mac_tx_update(vif->vif_mac_handle); 1034 } 1035 mutex_exit(&vif->vif_mutex); 1036 } 1037 1038 return (num_reclaimed); 1039 } 1040 1041 static void 1042 vioif_reclaim_periodic(void *arg) 1043 { 1044 vioif_t *vif = arg; 1045 uint_t num_reclaimed; 1046 1047 num_reclaimed = vioif_reclaim_used_tx(vif); 1048 1049 mutex_enter(&vif->vif_mutex); 1050 vif->vif_tx_reclaim_tid = 0; 1051 /* 1052 * If used descriptors were reclaimed or TX descriptors appear to be 1053 * outstanding, the ring is considered active and periodic reclamation 1054 * is necessary for now. 1055 */ 1056 if (num_reclaimed != 0 || virtio_queue_nactive(vif->vif_tx_vq) != 0) { 1057 /* Do not reschedule if the ring is being drained. */ 1058 if (!vif->vif_tx_drain) { 1059 vioif_reclaim_restart(vif); 1060 } 1061 } 1062 mutex_exit(&vif->vif_mutex); 1063 } 1064 1065 static void 1066 vioif_reclaim_restart(vioif_t *vif) 1067 { 1068 VERIFY(MUTEX_HELD(&vif->vif_mutex)); 1069 VERIFY(!vif->vif_tx_drain); 1070 1071 if (vif->vif_tx_reclaim_tid == 0) { 1072 vif->vif_tx_reclaim_tid = timeout(vioif_reclaim_periodic, vif, 1073 MSEC_TO_TICK_ROUNDUP(vioif_reclaim_ms)); 1074 } 1075 } 1076 1077 static void 1078 vioif_tx_drain(vioif_t *vif) 1079 { 1080 VERIFY(MUTEX_HELD(&vif->vif_mutex)); 1081 VERIFY3S(vif->vif_runstate, ==, VIOIF_RUNSTATE_STOPPING); 1082 1083 vif->vif_tx_drain = B_TRUE; 1084 /* Put a stop to the periodic reclaim if it is running */ 1085 if (vif->vif_tx_reclaim_tid != 0) { 1086 timeout_id_t tid = vif->vif_tx_reclaim_tid; 1087 1088 /* 1089 * With vif_tx_drain set, there is no risk that a racing 1090 * vioif_reclaim_periodic() call will reschedule itself. 1091 * 1092 * Being part of the mc_stop hook also guarantees that 1093 * vioif_m_tx() will not be called to restart it. 1094 */ 1095 vif->vif_tx_reclaim_tid = 0; 1096 mutex_exit(&vif->vif_mutex); 1097 (void) untimeout(tid); 1098 mutex_enter(&vif->vif_mutex); 1099 } 1100 virtio_queue_no_interrupt(vif->vif_tx_vq, B_TRUE); 1101 1102 /* 1103 * Wait for all of the TX descriptors to be processed by the host so 1104 * they can be reclaimed. 1105 */ 1106 while (vif->vif_ntxbufs_alloc > 0) { 1107 mutex_exit(&vif->vif_mutex); 1108 (void) vioif_reclaim_used_tx(vif); 1109 delay(5); 1110 mutex_enter(&vif->vif_mutex); 1111 } 1112 VERIFY(!vif->vif_tx_corked); 1113 VERIFY3U(vif->vif_tx_reclaim_tid, ==, 0); 1114 VERIFY3U(virtio_queue_nactive(vif->vif_tx_vq), ==, 0); 1115 } 1116 1117 static int 1118 vioif_tx_inline(vioif_t *vif, vioif_txbuf_t *tb, mblk_t *mp, size_t msg_size) 1119 { 1120 VERIFY(MUTEX_NOT_HELD(&vif->vif_mutex)); 1121 1122 VERIFY3U(msg_size, <=, virtio_dma_size(tb->tb_dma) - VIOIF_HEADER_SKIP); 1123 1124 /* 1125 * Copy the message into the inline buffer and then free the message. 1126 */ 1127 mcopymsg(mp, virtio_dma_va(tb->tb_dma, VIOIF_HEADER_SKIP)); 1128 1129 if (virtio_chain_append(tb->tb_chain, 1130 virtio_dma_cookie_pa(tb->tb_dma, 0) + VIOIF_HEADER_SKIP, 1131 msg_size, VIRTIO_DIR_DEVICE_READS) != DDI_SUCCESS) { 1132 return (DDI_FAILURE); 1133 } 1134 1135 return (DDI_SUCCESS); 1136 } 1137 1138 static int 1139 vioif_tx_external(vioif_t *vif, vioif_txbuf_t *tb, mblk_t *mp, size_t msg_size) 1140 { 1141 VERIFY(MUTEX_NOT_HELD(&vif->vif_mutex)); 1142 1143 mblk_t *nmp = mp; 1144 tb->tb_ndmaext = 0; 1145 1146 while (nmp != NULL) { 1147 size_t len; 1148 1149 if ((len = MBLKL(nmp)) == 0) { 1150 /* 1151 * Skip any zero-length entries in the chain. 1152 */ 1153 nmp = nmp->b_cont; 1154 continue; 1155 } 1156 1157 if (tb->tb_ndmaext >= tb->tb_dmaext_capacity) { 1158 mutex_enter(&vif->vif_mutex); 1159 vif->vif_txfail_indirect_limit++; 1160 vif->vif_notxbuf++; 1161 mutex_exit(&vif->vif_mutex); 1162 goto fail; 1163 } 1164 1165 if (tb->tb_dmaext[tb->tb_ndmaext] == NULL) { 1166 /* 1167 * Allocate a DMA handle for this slot. 1168 */ 1169 if ((tb->tb_dmaext[tb->tb_ndmaext] = 1170 virtio_dma_alloc_nomem(vif->vif_virtio, 1171 &vioif_dma_attr_external, KM_SLEEP)) == NULL) { 1172 mutex_enter(&vif->vif_mutex); 1173 vif->vif_notxbuf++; 1174 mutex_exit(&vif->vif_mutex); 1175 goto fail; 1176 } 1177 } 1178 virtio_dma_t *extdma = tb->tb_dmaext[tb->tb_ndmaext++]; 1179 1180 if (virtio_dma_bind(extdma, nmp->b_rptr, len, 1181 DDI_DMA_WRITE | DDI_DMA_STREAMING, KM_SLEEP) != 1182 DDI_SUCCESS) { 1183 mutex_enter(&vif->vif_mutex); 1184 vif->vif_txfail_dma_bind++; 1185 mutex_exit(&vif->vif_mutex); 1186 goto fail; 1187 } 1188 1189 for (uint_t n = 0; n < virtio_dma_ncookies(extdma); n++) { 1190 uint64_t pa = virtio_dma_cookie_pa(extdma, n); 1191 size_t sz = virtio_dma_cookie_size(extdma, n); 1192 1193 if (virtio_chain_append(tb->tb_chain, pa, sz, 1194 VIRTIO_DIR_DEVICE_READS) != DDI_SUCCESS) { 1195 mutex_enter(&vif->vif_mutex); 1196 vif->vif_txfail_indirect_limit++; 1197 vif->vif_notxbuf++; 1198 mutex_exit(&vif->vif_mutex); 1199 goto fail; 1200 } 1201 } 1202 1203 nmp = nmp->b_cont; 1204 } 1205 1206 /* 1207 * We need to keep the message around until we reclaim the buffer from 1208 * the device before freeing it. 1209 */ 1210 tb->tb_mp = mp; 1211 1212 return (DDI_SUCCESS); 1213 1214 fail: 1215 for (uint_t n = 0; n < tb->tb_ndmaext; n++) { 1216 if (tb->tb_dmaext[n] != NULL) { 1217 virtio_dma_unbind(tb->tb_dmaext[n]); 1218 } 1219 } 1220 tb->tb_ndmaext = 0; 1221 1222 freemsg(mp); 1223 1224 return (DDI_FAILURE); 1225 } 1226 1227 static boolean_t 1228 vioif_send(vioif_t *vif, mblk_t *mp) 1229 { 1230 VERIFY(MUTEX_NOT_HELD(&vif->vif_mutex)); 1231 1232 vioif_txbuf_t *tb = NULL; 1233 struct virtio_net_hdr *vnh = NULL; 1234 size_t msg_size = 0; 1235 uint32_t csum_start; 1236 uint32_t csum_stuff; 1237 uint32_t csum_flags; 1238 uint32_t lso_flags; 1239 uint32_t lso_mss; 1240 mblk_t *nmp; 1241 int ret; 1242 boolean_t lso_required = B_FALSE; 1243 struct ether_header *ether = (void *)mp->b_rptr; 1244 1245 for (nmp = mp; nmp; nmp = nmp->b_cont) 1246 msg_size += MBLKL(nmp); 1247 1248 if (vif->vif_tx_tso4 || vif->vif_tx_tso6) { 1249 mac_lso_get(mp, &lso_mss, &lso_flags); 1250 lso_required = (lso_flags & HW_LSO) != 0; 1251 } 1252 1253 mutex_enter(&vif->vif_mutex); 1254 if ((tb = vioif_txbuf_alloc(vif)) == NULL) { 1255 vif->vif_notxbuf++; 1256 goto fail; 1257 } 1258 mutex_exit(&vif->vif_mutex); 1259 1260 /* 1261 * Use the inline buffer for the virtio net header. Zero the portion 1262 * of our DMA allocation prior to the packet data. 1263 */ 1264 vnh = virtio_dma_va(tb->tb_dma, 0); 1265 bzero(vnh, VIOIF_HEADER_SKIP); 1266 1267 /* 1268 * For legacy devices, and those that have not negotiated 1269 * VIRTIO_F_ANY_LAYOUT, the virtio net header must appear in a separate 1270 * descriptor entry to the rest of the buffer. 1271 */ 1272 if (virtio_chain_append(tb->tb_chain, 1273 virtio_dma_cookie_pa(tb->tb_dma, 0), sizeof (struct virtio_net_hdr), 1274 VIRTIO_DIR_DEVICE_READS) != DDI_SUCCESS) { 1275 mutex_enter(&vif->vif_mutex); 1276 vif->vif_notxbuf++; 1277 goto fail; 1278 } 1279 1280 mac_hcksum_get(mp, &csum_start, &csum_stuff, NULL, NULL, &csum_flags); 1281 1282 /* 1283 * They want us to do the TCP/UDP csum calculation. 1284 */ 1285 if (csum_flags & HCK_PARTIALCKSUM) { 1286 int eth_hsize; 1287 1288 /* 1289 * Did we ask for it? 1290 */ 1291 ASSERT(vif->vif_tx_csum); 1292 1293 /* 1294 * We only asked for partial csum packets. 1295 */ 1296 ASSERT(!(csum_flags & HCK_IPV4_HDRCKSUM)); 1297 ASSERT(!(csum_flags & HCK_FULLCKSUM)); 1298 1299 if (ether->ether_type == htons(ETHERTYPE_VLAN)) { 1300 eth_hsize = sizeof (struct ether_vlan_header); 1301 } else { 1302 eth_hsize = sizeof (struct ether_header); 1303 } 1304 1305 vnh->vnh_flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 1306 vnh->vnh_csum_start = eth_hsize + csum_start; 1307 vnh->vnh_csum_offset = csum_stuff - csum_start; 1308 } 1309 1310 /* 1311 * Setup LSO fields if required. 1312 */ 1313 if (lso_required) { 1314 mac_ether_offload_flags_t needed; 1315 mac_ether_offload_info_t meo; 1316 uint32_t cksum; 1317 size_t len; 1318 mblk_t *pullmp = NULL; 1319 tcpha_t *tcpha; 1320 1321 if (mac_ether_offload_info(mp, &meo) != 0) { 1322 goto fail; 1323 } 1324 1325 needed = MEOI_L2INFO_SET | MEOI_L3INFO_SET | MEOI_L4INFO_SET; 1326 if ((meo.meoi_flags & needed) != needed) { 1327 goto fail; 1328 } 1329 1330 if (meo.meoi_l4proto != IPPROTO_TCP) { 1331 goto fail; 1332 } 1333 1334 if (meo.meoi_l3proto == ETHERTYPE_IP && vif->vif_tx_tso4) { 1335 vnh->vnh_gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 1336 } else if (meo.meoi_l3proto == ETHERTYPE_IPV6 && 1337 vif->vif_tx_tso6) { 1338 vnh->vnh_gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 1339 } else { 1340 goto fail; 1341 } 1342 1343 /* 1344 * The TCP stack does not include the length in the TCP 1345 * pseudo-header when it is performing LSO since hardware 1346 * generally asks for it to be removed (as it'll change). 1347 * Unfortunately, for virtio, we actually need it. This means we 1348 * need to go through and calculate the actual length and fix 1349 * things up. Because the virtio spec cares about the ECN flag 1350 * and indicating that, at least this means we'll have that 1351 * available as well. 1352 */ 1353 if (MBLKL(mp) < vnh->vnh_hdr_len) { 1354 pullmp = msgpullup(mp, vnh->vnh_hdr_len); 1355 if (pullmp == NULL) 1356 goto fail; 1357 tcpha = (tcpha_t *)(pullmp->b_rptr + meo.meoi_l2hlen + 1358 meo.meoi_l3hlen); 1359 } else { 1360 tcpha = (tcpha_t *)(mp->b_rptr + meo.meoi_l2hlen + 1361 meo.meoi_l3hlen); 1362 } 1363 1364 len = meo.meoi_len - meo.meoi_l2hlen - meo.meoi_l3hlen; 1365 cksum = ntohs(tcpha->tha_sum) + len; 1366 cksum = (cksum >> 16) + (cksum & 0xffff); 1367 cksum = (cksum >> 16) + (cksum & 0xffff); 1368 tcpha->tha_sum = htons(cksum); 1369 1370 if (tcpha->tha_flags & TH_CWR) { 1371 vnh->vnh_gso_type |= VIRTIO_NET_HDR_GSO_ECN; 1372 } 1373 vnh->vnh_gso_size = (uint16_t)lso_mss; 1374 vnh->vnh_hdr_len = meo.meoi_l2hlen + meo.meoi_l3hlen + 1375 meo.meoi_l4hlen; 1376 1377 freemsg(pullmp); 1378 } 1379 1380 /* 1381 * The device does not maintain its own statistics about broadcast or 1382 * multicast packets, so we have to check the destination address 1383 * ourselves. 1384 */ 1385 if ((ether->ether_dhost.ether_addr_octet[0] & 0x01) != 0) { 1386 mutex_enter(&vif->vif_mutex); 1387 if (ether_cmp(ðer->ether_dhost, vioif_broadcast) == 0) { 1388 vif->vif_brdcstxmt++; 1389 } else { 1390 vif->vif_multixmt++; 1391 } 1392 mutex_exit(&vif->vif_mutex); 1393 } 1394 1395 /* 1396 * For small packets, copy into the preallocated inline buffer rather 1397 * than incur the overhead of mapping. Note that both of these 1398 * functions ensure that "mp" is freed before returning. 1399 */ 1400 if (msg_size < vif->vif_txcopy_thresh) { 1401 ret = vioif_tx_inline(vif, tb, mp, msg_size); 1402 } else { 1403 ret = vioif_tx_external(vif, tb, mp, msg_size); 1404 } 1405 mp = NULL; 1406 1407 mutex_enter(&vif->vif_mutex); 1408 1409 if (ret != DDI_SUCCESS) { 1410 goto fail; 1411 } 1412 1413 vif->vif_opackets++; 1414 vif->vif_obytes += msg_size; 1415 mutex_exit(&vif->vif_mutex); 1416 1417 virtio_dma_sync(tb->tb_dma, DDI_DMA_SYNC_FORDEV); 1418 virtio_chain_submit(tb->tb_chain, B_TRUE); 1419 1420 return (B_TRUE); 1421 1422 fail: 1423 vif->vif_oerrors++; 1424 if (tb != NULL) { 1425 vioif_txbuf_free(vif, tb); 1426 } 1427 mutex_exit(&vif->vif_mutex); 1428 1429 return (mp == NULL); 1430 } 1431 1432 static mblk_t * 1433 vioif_m_tx(void *arg, mblk_t *mp) 1434 { 1435 vioif_t *vif = arg; 1436 mblk_t *nmp; 1437 1438 /* 1439 * Prior to attempting to send any more frames, do a reclaim to pick up 1440 * any descriptors which have been processed by the host. 1441 */ 1442 if (virtio_queue_nactive(vif->vif_tx_vq) != 0) { 1443 (void) vioif_reclaim_used_tx(vif); 1444 } 1445 1446 while (mp != NULL) { 1447 nmp = mp->b_next; 1448 mp->b_next = NULL; 1449 1450 if (!vioif_send(vif, mp)) { 1451 /* 1452 * If there are no descriptors available, try to 1453 * reclaim some, allowing a retry of the send if some 1454 * are found. 1455 */ 1456 mp->b_next = nmp; 1457 if (vioif_reclaim_used_tx(vif) != 0) { 1458 continue; 1459 } 1460 1461 /* 1462 * Otherwise, enable the TX ring interrupt so that as 1463 * soon as a descriptor becomes available, transmission 1464 * can begin again. For safety, make sure the periodic 1465 * reclaim is running as well. 1466 */ 1467 mutex_enter(&vif->vif_mutex); 1468 vif->vif_tx_corked = B_TRUE; 1469 virtio_queue_no_interrupt(vif->vif_tx_vq, B_FALSE); 1470 vioif_reclaim_restart(vif); 1471 mutex_exit(&vif->vif_mutex); 1472 return (mp); 1473 } 1474 mp = nmp; 1475 } 1476 1477 /* Ensure the periodic reclaim has been started. */ 1478 mutex_enter(&vif->vif_mutex); 1479 vioif_reclaim_restart(vif); 1480 mutex_exit(&vif->vif_mutex); 1481 1482 return (NULL); 1483 } 1484 1485 static int 1486 vioif_m_start(void *arg) 1487 { 1488 vioif_t *vif = arg; 1489 1490 mutex_enter(&vif->vif_mutex); 1491 1492 VERIFY3S(vif->vif_runstate, ==, VIOIF_RUNSTATE_STOPPED); 1493 vif->vif_runstate = VIOIF_RUNSTATE_RUNNING; 1494 1495 mac_link_update(vif->vif_mac_handle, LINK_STATE_UP); 1496 1497 virtio_queue_no_interrupt(vif->vif_rx_vq, B_FALSE); 1498 1499 /* 1500 * Starting interrupts on the TX virtqueue is unnecessary at this time. 1501 * Descriptor reclamation is handling during transmit, via a periodic 1502 * timer, and when resources are tight, via the then-enabled interrupt. 1503 */ 1504 vif->vif_tx_drain = B_FALSE; 1505 1506 /* 1507 * Add as many receive buffers as we can to the receive queue. If we 1508 * cannot add any, it may be because we have stopped and started again 1509 * and the descriptors are all in the queue already. 1510 */ 1511 (void) vioif_add_rx(vif); 1512 1513 mutex_exit(&vif->vif_mutex); 1514 return (DDI_SUCCESS); 1515 } 1516 1517 static void 1518 vioif_m_stop(void *arg) 1519 { 1520 vioif_t *vif = arg; 1521 1522 mutex_enter(&vif->vif_mutex); 1523 1524 VERIFY3S(vif->vif_runstate, ==, VIOIF_RUNSTATE_RUNNING); 1525 vif->vif_runstate = VIOIF_RUNSTATE_STOPPING; 1526 1527 /* Ensure all TX descriptors have been processed and reclaimed */ 1528 vioif_tx_drain(vif); 1529 1530 virtio_queue_no_interrupt(vif->vif_rx_vq, B_TRUE); 1531 1532 vif->vif_runstate = VIOIF_RUNSTATE_STOPPED; 1533 mutex_exit(&vif->vif_mutex); 1534 } 1535 1536 static int 1537 vioif_m_stat(void *arg, uint_t stat, uint64_t *val) 1538 { 1539 vioif_t *vif = arg; 1540 1541 switch (stat) { 1542 case MAC_STAT_IERRORS: 1543 *val = vif->vif_ierrors; 1544 break; 1545 case MAC_STAT_OERRORS: 1546 *val = vif->vif_oerrors; 1547 break; 1548 case MAC_STAT_MULTIRCV: 1549 *val = vif->vif_multircv; 1550 break; 1551 case MAC_STAT_BRDCSTRCV: 1552 *val = vif->vif_brdcstrcv; 1553 break; 1554 case MAC_STAT_MULTIXMT: 1555 *val = vif->vif_multixmt; 1556 break; 1557 case MAC_STAT_BRDCSTXMT: 1558 *val = vif->vif_brdcstxmt; 1559 break; 1560 case MAC_STAT_IPACKETS: 1561 *val = vif->vif_ipackets; 1562 break; 1563 case MAC_STAT_RBYTES: 1564 *val = vif->vif_rbytes; 1565 break; 1566 case MAC_STAT_OPACKETS: 1567 *val = vif->vif_opackets; 1568 break; 1569 case MAC_STAT_OBYTES: 1570 *val = vif->vif_obytes; 1571 break; 1572 case MAC_STAT_NORCVBUF: 1573 *val = vif->vif_norecvbuf; 1574 break; 1575 case MAC_STAT_NOXMTBUF: 1576 *val = vif->vif_notxbuf; 1577 break; 1578 case MAC_STAT_IFSPEED: 1579 /* always 1 Gbit */ 1580 *val = 1000000000ULL; 1581 break; 1582 case ETHER_STAT_LINK_DUPLEX: 1583 /* virtual device, always full-duplex */ 1584 *val = LINK_DUPLEX_FULL; 1585 break; 1586 1587 default: 1588 return (ENOTSUP); 1589 } 1590 1591 return (DDI_SUCCESS); 1592 } 1593 1594 static int 1595 vioif_m_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, 1596 uint_t pr_valsize, const void *pr_val) 1597 { 1598 vioif_t *vif = arg; 1599 1600 switch (pr_num) { 1601 case MAC_PROP_MTU: { 1602 int r; 1603 uint32_t mtu; 1604 if (pr_valsize < sizeof (mtu)) { 1605 return (EOVERFLOW); 1606 } 1607 bcopy(pr_val, &mtu, sizeof (mtu)); 1608 1609 if (mtu < ETHERMIN || mtu > vif->vif_mtu_max) { 1610 return (EINVAL); 1611 } 1612 1613 mutex_enter(&vif->vif_mutex); 1614 if ((r = mac_maxsdu_update(vif->vif_mac_handle, mtu)) == 0) { 1615 vif->vif_mtu = mtu; 1616 } 1617 mutex_exit(&vif->vif_mutex); 1618 1619 return (r); 1620 } 1621 1622 case MAC_PROP_PRIVATE: { 1623 long max, result; 1624 uint_t *resp; 1625 char *endptr; 1626 1627 if (strcmp(pr_name, VIOIF_MACPROP_TXCOPY_THRESH) == 0) { 1628 max = VIOIF_MACPROP_TXCOPY_THRESH_MAX; 1629 resp = &vif->vif_txcopy_thresh; 1630 } else if (strcmp(pr_name, VIOIF_MACPROP_RXCOPY_THRESH) == 0) { 1631 max = VIOIF_MACPROP_RXCOPY_THRESH_MAX; 1632 resp = &vif->vif_rxcopy_thresh; 1633 } else { 1634 return (ENOTSUP); 1635 } 1636 1637 if (pr_val == NULL) { 1638 return (EINVAL); 1639 } 1640 1641 if (ddi_strtol(pr_val, &endptr, 10, &result) != 0 || 1642 *endptr != '\0' || result < 0 || result > max) { 1643 return (EINVAL); 1644 } 1645 1646 mutex_enter(&vif->vif_mutex); 1647 *resp = result; 1648 mutex_exit(&vif->vif_mutex); 1649 1650 return (0); 1651 } 1652 1653 default: 1654 return (ENOTSUP); 1655 } 1656 } 1657 1658 static int 1659 vioif_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, 1660 uint_t pr_valsize, void *pr_val) 1661 { 1662 vioif_t *vif = arg; 1663 1664 switch (pr_num) { 1665 case MAC_PROP_PRIVATE: { 1666 uint_t value; 1667 1668 if (strcmp(pr_name, VIOIF_MACPROP_TXCOPY_THRESH) == 0) { 1669 value = vif->vif_txcopy_thresh; 1670 } else if (strcmp(pr_name, VIOIF_MACPROP_RXCOPY_THRESH) == 0) { 1671 value = vif->vif_rxcopy_thresh; 1672 } else { 1673 return (ENOTSUP); 1674 } 1675 1676 if (snprintf(pr_val, pr_valsize, "%u", value) >= pr_valsize) { 1677 return (EOVERFLOW); 1678 } 1679 1680 return (0); 1681 } 1682 1683 default: 1684 return (ENOTSUP); 1685 } 1686 } 1687 1688 static void 1689 vioif_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num, 1690 mac_prop_info_handle_t prh) 1691 { 1692 vioif_t *vif = arg; 1693 char valstr[64]; 1694 int value; 1695 1696 switch (pr_num) { 1697 case MAC_PROP_MTU: 1698 mac_prop_info_set_perm(prh, MAC_PROP_PERM_RW); 1699 mac_prop_info_set_range_uint32(prh, ETHERMIN, vif->vif_mtu_max); 1700 return; 1701 1702 case MAC_PROP_PRIVATE: 1703 if (strcmp(pr_name, VIOIF_MACPROP_TXCOPY_THRESH) == 0) { 1704 value = VIOIF_MACPROP_TXCOPY_THRESH_DEF; 1705 } else if (strcmp(pr_name, VIOIF_MACPROP_RXCOPY_THRESH) == 0) { 1706 value = VIOIF_MACPROP_RXCOPY_THRESH_DEF; 1707 } else { 1708 /* 1709 * We do not recognise this private property name. 1710 */ 1711 return; 1712 } 1713 mac_prop_info_set_perm(prh, MAC_PROP_PERM_RW); 1714 (void) snprintf(valstr, sizeof (valstr), "%d", value); 1715 mac_prop_info_set_default_str(prh, valstr); 1716 return; 1717 1718 default: 1719 return; 1720 } 1721 } 1722 1723 static boolean_t 1724 vioif_m_getcapab(void *arg, mac_capab_t cap, void *cap_data) 1725 { 1726 vioif_t *vif = arg; 1727 1728 switch (cap) { 1729 case MAC_CAPAB_HCKSUM: { 1730 if (!vif->vif_tx_csum) { 1731 return (B_FALSE); 1732 } 1733 1734 *(uint32_t *)cap_data = HCKSUM_INET_PARTIAL; 1735 1736 return (B_TRUE); 1737 } 1738 1739 case MAC_CAPAB_LSO: { 1740 if (!vif->vif_tx_tso4) { 1741 return (B_FALSE); 1742 } 1743 1744 mac_capab_lso_t *lso = cap_data; 1745 lso->lso_flags = LSO_TX_BASIC_TCP_IPV4 | LSO_TX_BASIC_TCP_IPV6; 1746 lso->lso_basic_tcp_ipv4.lso_max = VIOIF_RX_DATA_SIZE; 1747 lso->lso_basic_tcp_ipv6.lso_max = VIOIF_RX_DATA_SIZE; 1748 1749 return (B_TRUE); 1750 } 1751 1752 default: 1753 return (B_FALSE); 1754 } 1755 } 1756 1757 static boolean_t 1758 vioif_has_feature(vioif_t *vif, uint32_t feature) 1759 { 1760 return (virtio_feature_present(vif->vif_virtio, feature)); 1761 } 1762 1763 /* 1764 * Read the primary MAC address from the device if one is provided. If not, 1765 * generate a random locally administered MAC address and write it back to the 1766 * device. 1767 */ 1768 static void 1769 vioif_get_mac(vioif_t *vif) 1770 { 1771 VERIFY(MUTEX_HELD(&vif->vif_mutex)); 1772 1773 if (vioif_has_feature(vif, VIRTIO_NET_F_MAC)) { 1774 for (uint_t i = 0; i < ETHERADDRL; i++) { 1775 vif->vif_mac[i] = virtio_dev_get8(vif->vif_virtio, 1776 VIRTIO_NET_CONFIG_MAC + i); 1777 } 1778 vif->vif_mac_from_host = 1; 1779 1780 return; 1781 } 1782 1783 /* Get a few random bytes */ 1784 (void) random_get_pseudo_bytes(vif->vif_mac, ETHERADDRL); 1785 /* Make sure it's a unicast MAC */ 1786 vif->vif_mac[0] &= ~1; 1787 /* Set the "locally administered" bit */ 1788 vif->vif_mac[1] |= 2; 1789 1790 /* 1791 * Write the random MAC address back to the device. 1792 */ 1793 for (uint_t i = 0; i < ETHERADDRL; i++) { 1794 virtio_dev_put8(vif->vif_virtio, VIRTIO_NET_CONFIG_MAC + i, 1795 vif->vif_mac[i]); 1796 } 1797 vif->vif_mac_from_host = 0; 1798 1799 dev_err(vif->vif_dip, CE_NOTE, "!Generated a random MAC address: " 1800 "%02x:%02x:%02x:%02x:%02x:%02x", 1801 (uint_t)vif->vif_mac[0], (uint_t)vif->vif_mac[1], 1802 (uint_t)vif->vif_mac[2], (uint_t)vif->vif_mac[3], 1803 (uint_t)vif->vif_mac[4], (uint_t)vif->vif_mac[5]); 1804 } 1805 1806 /* 1807 * Virtqueue interrupt handlers 1808 */ 1809 static uint_t 1810 vioif_rx_handler(caddr_t arg0, caddr_t arg1) 1811 { 1812 vioif_t *vif = (vioif_t *)arg0; 1813 1814 mutex_enter(&vif->vif_mutex); 1815 (void) vioif_process_rx(vif); 1816 1817 /* 1818 * Attempt to replenish the receive queue. If we cannot add any 1819 * descriptors here, it may be because all of the recently received 1820 * packets were loaned up to the networking stack. 1821 */ 1822 (void) vioif_add_rx(vif); 1823 mutex_exit(&vif->vif_mutex); 1824 1825 return (DDI_INTR_CLAIMED); 1826 } 1827 1828 static uint_t 1829 vioif_tx_handler(caddr_t arg0, caddr_t arg1) 1830 { 1831 vioif_t *vif = (vioif_t *)arg0; 1832 1833 /* 1834 * The TX interrupt could race with other reclamation activity, so 1835 * interpreting the return value is unimportant. 1836 */ 1837 (void) vioif_reclaim_used_tx(vif); 1838 1839 return (DDI_INTR_CLAIMED); 1840 } 1841 1842 static void 1843 vioif_check_features(vioif_t *vif) 1844 { 1845 VERIFY(MUTEX_HELD(&vif->vif_mutex)); 1846 1847 vif->vif_tx_csum = 0; 1848 vif->vif_tx_tso4 = 0; 1849 vif->vif_tx_tso6 = 0; 1850 1851 if (vioif_has_feature(vif, VIRTIO_NET_F_CSUM)) { 1852 /* 1853 * The host will accept packets with partial checksums from us. 1854 */ 1855 vif->vif_tx_csum = 1; 1856 1857 /* 1858 * The legacy GSO feature represents the combination of 1859 * HOST_TSO4, HOST_TSO6, and HOST_ECN. 1860 */ 1861 boolean_t gso = vioif_has_feature(vif, VIRTIO_NET_F_GSO); 1862 boolean_t tso4 = vioif_has_feature(vif, VIRTIO_NET_F_HOST_TSO4); 1863 boolean_t tso6 = vioif_has_feature(vif, VIRTIO_NET_F_HOST_TSO6); 1864 boolean_t ecn = vioif_has_feature(vif, VIRTIO_NET_F_HOST_ECN); 1865 1866 /* 1867 * Explicit congestion notification (ECN) is configured 1868 * globally; see "tcp_ecn_permitted". As we cannot currently 1869 * request that the stack disable ECN on a per interface basis, 1870 * we require the device to support the combination of 1871 * segmentation offload and ECN support. 1872 */ 1873 if (gso) { 1874 vif->vif_tx_tso4 = 1; 1875 vif->vif_tx_tso6 = 1; 1876 } 1877 if (tso4 && ecn) { 1878 vif->vif_tx_tso4 = 1; 1879 } 1880 if (tso6 && ecn) { 1881 vif->vif_tx_tso6 = 1; 1882 } 1883 } 1884 1885 if (vioif_has_feature(vif, VIRTIO_NET_F_CTRL_VQ)) { 1886 vif->vif_has_ctrlq = 1; 1887 1888 /* 1889 * The VIRTIO_NET_F_CTRL_VQ feature must be enabled if there's 1890 * any chance of the VIRTIO_NET_F_CTRL_RX being enabled. 1891 */ 1892 if (vioif_has_feature(vif, VIRTIO_NET_F_CTRL_RX)) 1893 vif->vif_has_ctrlq_rx = 1; 1894 } 1895 } 1896 1897 static int 1898 vioif_select_interrupt_types(void) 1899 { 1900 id_t id; 1901 smbios_system_t sys; 1902 smbios_info_t info; 1903 1904 if (vioif_allowed_int_types != -1) { 1905 /* 1906 * If this value was tuned via /etc/system or the debugger, 1907 * use the provided value directly. 1908 */ 1909 return (vioif_allowed_int_types); 1910 } 1911 1912 if (ksmbios == NULL || 1913 (id = smbios_info_system(ksmbios, &sys)) == SMB_ERR || 1914 smbios_info_common(ksmbios, id, &info) == SMB_ERR) { 1915 /* 1916 * The system may not have valid SMBIOS data, so ignore a 1917 * failure here. 1918 */ 1919 return (VIRTIO_ANY_INTR_TYPE); 1920 } 1921 1922 if (strcmp(info.smbi_manufacturer, "Google") == 0 && 1923 strcmp(info.smbi_product, "Google Compute Engine") == 0) { 1924 /* 1925 * An undiagnosed issue with the Google Compute Engine (GCE) 1926 * hypervisor exists. In this environment, no RX interrupts 1927 * are received if MSI-X handlers are installed. This does not 1928 * appear to be true for the Virtio SCSI driver. Fixed 1929 * interrupts do appear to work, so we fall back for now: 1930 */ 1931 return (DDI_INTR_TYPE_FIXED); 1932 } 1933 1934 return (VIRTIO_ANY_INTR_TYPE); 1935 } 1936 1937 static int 1938 vioif_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 1939 { 1940 int ret; 1941 vioif_t *vif; 1942 virtio_t *vio; 1943 mac_register_t *macp = NULL; 1944 1945 if (cmd != DDI_ATTACH) { 1946 return (DDI_FAILURE); 1947 } 1948 1949 if ((vio = virtio_init(dip, VIRTIO_NET_WANTED_FEATURES, B_TRUE)) == 1950 NULL) { 1951 return (DDI_FAILURE); 1952 } 1953 1954 vif = kmem_zalloc(sizeof (*vif), KM_SLEEP); 1955 vif->vif_dip = dip; 1956 vif->vif_virtio = vio; 1957 vif->vif_runstate = VIOIF_RUNSTATE_STOPPED; 1958 ddi_set_driver_private(dip, vif); 1959 1960 if ((vif->vif_rx_vq = virtio_queue_alloc(vio, VIRTIO_NET_VIRTQ_RX, 1961 "rx", vioif_rx_handler, vif, B_FALSE, VIOIF_MAX_SEGS)) == NULL || 1962 (vif->vif_tx_vq = virtio_queue_alloc(vio, VIRTIO_NET_VIRTQ_TX, 1963 "tx", vioif_tx_handler, vif, B_FALSE, VIOIF_MAX_SEGS)) == NULL) { 1964 goto fail; 1965 } 1966 1967 if (vioif_has_feature(vif, VIRTIO_NET_F_CTRL_VQ) && 1968 (vif->vif_ctrl_vq = virtio_queue_alloc(vio, 1969 VIRTIO_NET_VIRTQ_CONTROL, "ctrlq", NULL, vif, 1970 B_FALSE, VIOIF_MAX_SEGS)) == NULL) { 1971 goto fail; 1972 } 1973 1974 if (virtio_init_complete(vio, vioif_select_interrupt_types()) != 1975 DDI_SUCCESS) { 1976 dev_err(dip, CE_WARN, "failed to complete Virtio init"); 1977 goto fail; 1978 } 1979 1980 virtio_queue_no_interrupt(vif->vif_rx_vq, B_TRUE); 1981 virtio_queue_no_interrupt(vif->vif_tx_vq, B_TRUE); 1982 if (vif->vif_ctrl_vq != NULL) 1983 virtio_queue_no_interrupt(vif->vif_ctrl_vq, B_TRUE); 1984 1985 mutex_init(&vif->vif_mutex, NULL, MUTEX_DRIVER, virtio_intr_pri(vio)); 1986 mutex_enter(&vif->vif_mutex); 1987 1988 vioif_get_mac(vif); 1989 1990 vif->vif_rxcopy_thresh = VIOIF_MACPROP_RXCOPY_THRESH_DEF; 1991 vif->vif_txcopy_thresh = VIOIF_MACPROP_TXCOPY_THRESH_DEF; 1992 1993 if (vioif_has_feature(vif, VIRTIO_NET_F_MTU)) { 1994 vif->vif_mtu_max = virtio_dev_get16(vio, VIRTIO_NET_CONFIG_MTU); 1995 } else { 1996 vif->vif_mtu_max = ETHERMTU; 1997 } 1998 1999 vif->vif_mtu = ETHERMTU; 2000 if (vif->vif_mtu > vif->vif_mtu_max) { 2001 vif->vif_mtu = vif->vif_mtu_max; 2002 } 2003 2004 vioif_check_features(vif); 2005 2006 if (vioif_alloc_bufs(vif) != 0) { 2007 mutex_exit(&vif->vif_mutex); 2008 dev_err(dip, CE_WARN, "failed to allocate memory"); 2009 goto fail; 2010 } 2011 2012 mutex_exit(&vif->vif_mutex); 2013 2014 if (virtio_interrupts_enable(vio) != DDI_SUCCESS) { 2015 dev_err(dip, CE_WARN, "failed to enable interrupts"); 2016 goto fail; 2017 } 2018 2019 if ((macp = mac_alloc(MAC_VERSION)) == NULL) { 2020 dev_err(dip, CE_WARN, "failed to allocate a mac_register"); 2021 goto fail; 2022 } 2023 2024 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 2025 macp->m_driver = vif; 2026 macp->m_dip = dip; 2027 macp->m_src_addr = vif->vif_mac; 2028 macp->m_callbacks = &vioif_mac_callbacks; 2029 macp->m_min_sdu = 0; 2030 macp->m_max_sdu = vif->vif_mtu; 2031 macp->m_margin = VLAN_TAGSZ; 2032 macp->m_priv_props = vioif_priv_props; 2033 2034 if ((ret = mac_register(macp, &vif->vif_mac_handle)) != 0) { 2035 dev_err(dip, CE_WARN, "mac_register() failed (%d)", ret); 2036 goto fail; 2037 } 2038 mac_free(macp); 2039 2040 mac_link_update(vif->vif_mac_handle, LINK_STATE_UP); 2041 2042 return (DDI_SUCCESS); 2043 2044 fail: 2045 vioif_free_bufs(vif); 2046 if (macp != NULL) { 2047 mac_free(macp); 2048 } 2049 (void) virtio_fini(vio, B_TRUE); 2050 kmem_free(vif, sizeof (*vif)); 2051 return (DDI_FAILURE); 2052 } 2053 2054 static int 2055 vioif_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 2056 { 2057 int r; 2058 vioif_t *vif; 2059 2060 if (cmd != DDI_DETACH) { 2061 return (DDI_FAILURE); 2062 } 2063 2064 if ((vif = ddi_get_driver_private(dip)) == NULL) { 2065 return (DDI_FAILURE); 2066 } 2067 2068 mutex_enter(&vif->vif_mutex); 2069 if (vif->vif_runstate != VIOIF_RUNSTATE_STOPPED) { 2070 dev_err(dip, CE_WARN, "!NIC still running, cannot detach"); 2071 mutex_exit(&vif->vif_mutex); 2072 return (DDI_FAILURE); 2073 } 2074 2075 /* 2076 * There should be no outstanding transmit buffers once the NIC is 2077 * completely stopped. 2078 */ 2079 VERIFY3U(vif->vif_ntxbufs_alloc, ==, 0); 2080 2081 /* 2082 * Though we cannot claw back all of the receive buffers until we reset 2083 * the device, we must ensure all those loaned to MAC have been 2084 * returned before calling mac_unregister(). 2085 */ 2086 if (vif->vif_nrxbufs_onloan > 0) { 2087 dev_err(dip, CE_WARN, "!%u receive buffers still loaned, " 2088 "cannot detach", vif->vif_nrxbufs_onloan); 2089 mutex_exit(&vif->vif_mutex); 2090 return (DDI_FAILURE); 2091 } 2092 2093 if ((r = mac_unregister(vif->vif_mac_handle)) != 0) { 2094 dev_err(dip, CE_WARN, "!MAC unregister failed (%d)", r); 2095 return (DDI_FAILURE); 2096 } 2097 2098 /* 2099 * Shut down the device so that we can recover any previously 2100 * submitted receive buffers. 2101 */ 2102 virtio_shutdown(vif->vif_virtio); 2103 for (;;) { 2104 virtio_chain_t *vic; 2105 2106 if ((vic = virtio_queue_evacuate(vif->vif_rx_vq)) == NULL) { 2107 break; 2108 } 2109 2110 vioif_rxbuf_t *rb = virtio_chain_data(vic); 2111 vioif_rxbuf_free(vif, rb); 2112 } 2113 2114 /* 2115 * vioif_free_bufs() must be called before virtio_fini() 2116 * as it uses virtio_chain_free() which itself depends on some 2117 * virtio data structures still being around. 2118 */ 2119 vioif_free_bufs(vif); 2120 (void) virtio_fini(vif->vif_virtio, B_FALSE); 2121 2122 mutex_exit(&vif->vif_mutex); 2123 mutex_destroy(&vif->vif_mutex); 2124 2125 kmem_free(vif, sizeof (*vif)); 2126 2127 return (DDI_SUCCESS); 2128 } 2129 2130 static int 2131 vioif_quiesce(dev_info_t *dip) 2132 { 2133 vioif_t *vif; 2134 2135 if ((vif = ddi_get_driver_private(dip)) == NULL) 2136 return (DDI_FAILURE); 2137 2138 return (virtio_quiesce(vif->vif_virtio)); 2139 } 2140 2141 int 2142 _init(void) 2143 { 2144 int ret; 2145 2146 mac_init_ops(&vioif_dev_ops, "vioif"); 2147 2148 if ((ret = mod_install(&vioif_modlinkage)) != DDI_SUCCESS) { 2149 mac_fini_ops(&vioif_dev_ops); 2150 } 2151 2152 return (ret); 2153 } 2154 2155 int 2156 _fini(void) 2157 { 2158 int ret; 2159 2160 if ((ret = mod_remove(&vioif_modlinkage)) == DDI_SUCCESS) { 2161 mac_fini_ops(&vioif_dev_ops); 2162 } 2163 2164 return (ret); 2165 } 2166 2167 int 2168 _info(struct modinfo *modinfop) 2169 { 2170 return (mod_info(&vioif_modlinkage, modinfop)); 2171 } 2172