1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2013 Nexenta Inc. All rights reserved. 14 * Copyright (c) 2014, 2015 by Delphix. All rights reserved. 15 */ 16 17 /* Based on the NetBSD virtio driver by Minoura Makoto. */ 18 /* 19 * Copyright (c) 2010 Minoura Makoto. 20 * All rights reserved. 21 * 22 * Redistribution and use in source and binary forms, with or without 23 * modification, are permitted provided that the following conditions 24 * are met: 25 * 1. Redistributions of source code must retain the above copyright 26 * notice, this list of conditions and the following disclaimer. 27 * 2. Redistributions in binary form must reproduce the above copyright 28 * notice, this list of conditions and the following disclaimer in the 29 * documentation and/or other materials provided with the distribution. 30 * 31 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 32 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 33 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 34 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 35 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 36 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 37 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 38 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 39 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 40 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 41 */ 42 43 #include <sys/types.h> 44 #include <sys/errno.h> 45 #include <sys/param.h> 46 #include <sys/stropts.h> 47 #include <sys/stream.h> 48 #include <sys/strsubr.h> 49 #include <sys/kmem.h> 50 #include <sys/conf.h> 51 #include <sys/devops.h> 52 #include <sys/ksynch.h> 53 #include <sys/stat.h> 54 #include <sys/modctl.h> 55 #include <sys/debug.h> 56 #include <sys/pci.h> 57 #include <sys/ethernet.h> 58 #include <sys/vlan.h> 59 60 #include <sys/dlpi.h> 61 #include <sys/taskq.h> 62 #include <sys/cyclic.h> 63 64 #include <sys/pattr.h> 65 #include <sys/strsun.h> 66 67 #include <sys/random.h> 68 #include <sys/sysmacros.h> 69 #include <sys/stream.h> 70 71 #include <sys/mac.h> 72 #include <sys/mac_provider.h> 73 #include <sys/mac_ether.h> 74 75 #include "virtiovar.h" 76 #include "virtioreg.h" 77 78 /* Configuration registers */ 79 #define VIRTIO_NET_CONFIG_MAC 0 /* 8bit x 6byte */ 80 #define VIRTIO_NET_CONFIG_STATUS 6 /* 16bit */ 81 82 /* Feature bits */ 83 #define VIRTIO_NET_F_CSUM (1 << 0) /* Host handles pkts w/ partial csum */ 84 #define VIRTIO_NET_F_GUEST_CSUM (1 << 1) /* Guest handles pkts w/ part csum */ 85 #define VIRTIO_NET_F_MAC (1 << 5) /* Host has given MAC address. */ 86 #define VIRTIO_NET_F_GSO (1 << 6) /* Host handles pkts w/ any GSO type */ 87 #define VIRTIO_NET_F_GUEST_TSO4 (1 << 7) /* Guest can handle TSOv4 in. */ 88 #define VIRTIO_NET_F_GUEST_TSO6 (1 << 8) /* Guest can handle TSOv6 in. */ 89 #define VIRTIO_NET_F_GUEST_ECN (1 << 9) /* Guest can handle TSO[6] w/ ECN in */ 90 #define VIRTIO_NET_F_GUEST_UFO (1 << 10) /* Guest can handle UFO in. */ 91 #define VIRTIO_NET_F_HOST_TSO4 (1 << 11) /* Host can handle TSOv4 in. */ 92 #define VIRTIO_NET_F_HOST_TSO6 (1 << 12) /* Host can handle TSOv6 in. */ 93 #define VIRTIO_NET_F_HOST_ECN (1 << 13) /* Host can handle TSO[6] w/ ECN in */ 94 #define VIRTIO_NET_F_HOST_UFO (1 << 14) /* Host can handle UFO in. */ 95 #define VIRTIO_NET_F_MRG_RXBUF (1 << 15) /* Host can merge receive buffers. */ 96 #define VIRTIO_NET_F_STATUS (1 << 16) /* Config.status available */ 97 #define VIRTIO_NET_F_CTRL_VQ (1 << 17) /* Control channel available */ 98 #define VIRTIO_NET_F_CTRL_RX (1 << 18) /* Control channel RX mode support */ 99 #define VIRTIO_NET_F_CTRL_VLAN (1 << 19) /* Control channel VLAN filtering */ 100 #define VIRTIO_NET_F_CTRL_RX_EXTRA (1 << 20) /* Extra RX mode control support */ 101 102 #define VIRTIO_NET_FEATURE_BITS \ 103 "\020" \ 104 "\1CSUM" \ 105 "\2GUEST_CSUM" \ 106 "\6MAC" \ 107 "\7GSO" \ 108 "\10GUEST_TSO4" \ 109 "\11GUEST_TSO6" \ 110 "\12GUEST_ECN" \ 111 "\13GUEST_UFO" \ 112 "\14HOST_TSO4" \ 113 "\15HOST_TSO6" \ 114 "\16HOST_ECN" \ 115 "\17HOST_UFO" \ 116 "\20MRG_RXBUF" \ 117 "\21STATUS" \ 118 "\22CTRL_VQ" \ 119 "\23CTRL_RX" \ 120 "\24CTRL_VLAN" \ 121 "\25CTRL_RX_EXTRA" 122 123 /* Status */ 124 #define VIRTIO_NET_S_LINK_UP 1 125 126 #pragma pack(1) 127 /* Packet header structure */ 128 struct virtio_net_hdr { 129 uint8_t flags; 130 uint8_t gso_type; 131 uint16_t hdr_len; 132 uint16_t gso_size; 133 uint16_t csum_start; 134 uint16_t csum_offset; 135 }; 136 #pragma pack() 137 138 #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* flags */ 139 #define VIRTIO_NET_HDR_GSO_NONE 0 /* gso_type */ 140 #define VIRTIO_NET_HDR_GSO_TCPV4 1 /* gso_type */ 141 #define VIRTIO_NET_HDR_GSO_UDP 3 /* gso_type */ 142 #define VIRTIO_NET_HDR_GSO_TCPV6 4 /* gso_type */ 143 #define VIRTIO_NET_HDR_GSO_ECN 0x80 /* gso_type, |'ed */ 144 145 146 /* Control virtqueue */ 147 #pragma pack(1) 148 struct virtio_net_ctrl_cmd { 149 uint8_t class; 150 uint8_t command; 151 }; 152 #pragma pack() 153 154 #define VIRTIO_NET_CTRL_RX 0 155 #define VIRTIO_NET_CTRL_RX_PROMISC 0 156 #define VIRTIO_NET_CTRL_RX_ALLMULTI 1 157 158 #define VIRTIO_NET_CTRL_MAC 1 159 #define VIRTIO_NET_CTRL_MAC_TABLE_SET 0 160 161 #define VIRTIO_NET_CTRL_VLAN 2 162 #define VIRTIO_NET_CTRL_VLAN_ADD 0 163 #define VIRTIO_NET_CTRL_VLAN_DEL 1 164 165 #pragma pack(1) 166 struct virtio_net_ctrl_status { 167 uint8_t ack; 168 }; 169 170 struct virtio_net_ctrl_rx { 171 uint8_t onoff; 172 }; 173 174 struct virtio_net_ctrl_mac_tbl { 175 uint32_t nentries; 176 uint8_t macs[][ETHERADDRL]; 177 }; 178 179 struct virtio_net_ctrl_vlan { 180 uint16_t id; 181 }; 182 #pragma pack() 183 184 static int vioif_quiesce(dev_info_t *); 185 static int vioif_attach(dev_info_t *, ddi_attach_cmd_t); 186 static int vioif_detach(dev_info_t *, ddi_detach_cmd_t); 187 188 DDI_DEFINE_STREAM_OPS(vioif_ops, 189 nulldev, /* identify */ 190 nulldev, /* probe */ 191 vioif_attach, /* attach */ 192 vioif_detach, /* detach */ 193 nodev, /* reset */ 194 NULL, /* cb_ops */ 195 D_MP, /* bus_ops */ 196 NULL, /* power */ 197 vioif_quiesce /* quiesce */ 198 ); 199 200 static char vioif_ident[] = "VirtIO ethernet driver"; 201 202 /* Standard Module linkage initialization for a Streams driver */ 203 extern struct mod_ops mod_driverops; 204 205 static struct modldrv modldrv = { 206 &mod_driverops, /* Type of module. This one is a driver */ 207 vioif_ident, /* short description */ 208 &vioif_ops /* driver specific ops */ 209 }; 210 211 static struct modlinkage modlinkage = { 212 MODREV_1, 213 { 214 (void *)&modldrv, 215 NULL, 216 }, 217 }; 218 219 ddi_device_acc_attr_t vioif_attr = { 220 DDI_DEVICE_ATTR_V0, 221 DDI_NEVERSWAP_ACC, /* virtio is always native byte order */ 222 DDI_STORECACHING_OK_ACC, 223 DDI_DEFAULT_ACC 224 }; 225 226 /* 227 * A mapping represents a binding for a single buffer that is contiguous in the 228 * virtual address space. 229 */ 230 struct vioif_buf_mapping { 231 caddr_t vbm_buf; 232 ddi_dma_handle_t vbm_dmah; 233 ddi_acc_handle_t vbm_acch; 234 ddi_dma_cookie_t vbm_dmac; 235 unsigned int vbm_ncookies; 236 }; 237 238 /* 239 * Rx buffers can be loaned upstream, so the code has 240 * to allocate them dynamically. 241 */ 242 struct vioif_rx_buf { 243 struct vioif_softc *rb_sc; 244 frtn_t rb_frtn; 245 246 struct vioif_buf_mapping rb_mapping; 247 }; 248 249 /* 250 * Tx buffers have two mapping types. One, "inline", is pre-allocated and is 251 * used to hold the virtio_net_header. Small packets also get copied there, as 252 * it's faster then mapping them. Bigger packets get mapped using the "external" 253 * mapping array. An array is used, because a packet may consist of muptiple 254 * fragments, so each fragment gets bound to an entry. According to my 255 * observations, the number of fragments does not exceed 2, but just in case, 256 * a bigger, up to VIOIF_INDIRECT_MAX - 1 array is allocated. To save resources, 257 * the dma handles are allocated lazily in the tx path. 258 */ 259 struct vioif_tx_buf { 260 mblk_t *tb_mp; 261 262 /* inline buffer */ 263 struct vioif_buf_mapping tb_inline_mapping; 264 265 /* External buffers */ 266 struct vioif_buf_mapping *tb_external_mapping; 267 unsigned int tb_external_num; 268 }; 269 270 struct vioif_softc { 271 dev_info_t *sc_dev; /* mirrors virtio_softc->sc_dev */ 272 struct virtio_softc sc_virtio; 273 274 mac_handle_t sc_mac_handle; 275 mac_register_t *sc_macp; 276 277 struct virtqueue *sc_rx_vq; 278 struct virtqueue *sc_tx_vq; 279 struct virtqueue *sc_ctrl_vq; 280 281 unsigned int sc_tx_stopped:1; 282 283 /* Feature bits. */ 284 unsigned int sc_rx_csum:1; 285 unsigned int sc_tx_csum:1; 286 unsigned int sc_tx_tso4:1; 287 288 int sc_mtu; 289 uint8_t sc_mac[ETHERADDRL]; 290 /* 291 * For rx buffers, we keep a pointer array, because the buffers 292 * can be loaned upstream, and we have to repopulate the array with 293 * new members. 294 */ 295 struct vioif_rx_buf **sc_rxbufs; 296 297 /* 298 * For tx, we just allocate an array of buffers. The packet can 299 * either be copied into the inline buffer, or the external mapping 300 * could be used to map the packet 301 */ 302 struct vioif_tx_buf *sc_txbufs; 303 304 kstat_t *sc_intrstat; 305 /* 306 * We "loan" rx buffers upstream and reuse them after they are 307 * freed. This lets us avoid allocations in the hot path. 308 */ 309 kmem_cache_t *sc_rxbuf_cache; 310 ulong_t sc_rxloan; 311 312 /* Copying small packets turns out to be faster then mapping them. */ 313 unsigned long sc_rxcopy_thresh; 314 unsigned long sc_txcopy_thresh; 315 /* Some statistic coming here */ 316 uint64_t sc_ipackets; 317 uint64_t sc_opackets; 318 uint64_t sc_rbytes; 319 uint64_t sc_obytes; 320 uint64_t sc_brdcstxmt; 321 uint64_t sc_brdcstrcv; 322 uint64_t sc_multixmt; 323 uint64_t sc_multircv; 324 uint64_t sc_norecvbuf; 325 uint64_t sc_notxbuf; 326 uint64_t sc_ierrors; 327 uint64_t sc_oerrors; 328 }; 329 330 #define ETHER_HEADER_LEN sizeof (struct ether_header) 331 332 /* MTU + the ethernet header. */ 333 #define MAX_PAYLOAD 65535 334 #define MAX_MTU (MAX_PAYLOAD - ETHER_HEADER_LEN) 335 #define DEFAULT_MTU ETHERMTU 336 337 /* 338 * Yeah, we spend 8M per device. Turns out, there is no point 339 * being smart and using merged rx buffers (VIRTIO_NET_F_MRG_RXBUF), 340 * because vhost does not support them, and we expect to be used with 341 * vhost in production environment. 342 */ 343 /* The buffer keeps both the packet data and the virtio_net_header. */ 344 #define VIOIF_RX_SIZE (MAX_PAYLOAD + sizeof (struct virtio_net_hdr)) 345 346 /* 347 * We win a bit on header alignment, but the host wins a lot 348 * more on moving aligned buffers. Might need more thought. 349 */ 350 #define VIOIF_IP_ALIGN 0 351 352 /* Maximum number of indirect descriptors, somewhat arbitrary. */ 353 #define VIOIF_INDIRECT_MAX 128 354 355 /* 356 * We pre-allocate a reasonably large buffer to copy small packets 357 * there. Bigger packets are mapped, packets with multiple 358 * cookies are mapped as indirect buffers. 359 */ 360 #define VIOIF_TX_INLINE_SIZE 2048 361 362 /* Native queue size for all queues */ 363 #define VIOIF_RX_QLEN 0 364 #define VIOIF_TX_QLEN 0 365 #define VIOIF_CTRL_QLEN 0 366 367 static uchar_t vioif_broadcast[ETHERADDRL] = { 368 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 369 }; 370 371 #define VIOIF_TX_THRESH_MAX 640 372 #define VIOIF_RX_THRESH_MAX 640 373 374 #define CACHE_NAME_SIZE 32 375 376 static char vioif_txcopy_thresh[] = 377 "vioif_txcopy_thresh"; 378 static char vioif_rxcopy_thresh[] = 379 "vioif_rxcopy_thresh"; 380 381 static char *vioif_priv_props[] = { 382 vioif_txcopy_thresh, 383 vioif_rxcopy_thresh, 384 NULL 385 }; 386 387 /* Add up to ddi? */ 388 static ddi_dma_cookie_t * 389 vioif_dma_curr_cookie(ddi_dma_handle_t dmah) 390 { 391 ddi_dma_impl_t *dmah_impl = (void *) dmah; 392 ASSERT(dmah_impl->dmai_cookie); 393 return (dmah_impl->dmai_cookie); 394 } 395 396 static void 397 vioif_dma_reset_cookie(ddi_dma_handle_t dmah, ddi_dma_cookie_t *dmac) 398 { 399 ddi_dma_impl_t *dmah_impl = (void *) dmah; 400 dmah_impl->dmai_cookie = dmac; 401 } 402 403 static link_state_t 404 vioif_link_state(struct vioif_softc *sc) 405 { 406 if (sc->sc_virtio.sc_features & VIRTIO_NET_F_STATUS) { 407 if (virtio_read_device_config_2(&sc->sc_virtio, 408 VIRTIO_NET_CONFIG_STATUS) & VIRTIO_NET_S_LINK_UP) { 409 return (LINK_STATE_UP); 410 } else { 411 return (LINK_STATE_DOWN); 412 } 413 } 414 415 return (LINK_STATE_UP); 416 } 417 418 static ddi_dma_attr_t vioif_inline_buf_dma_attr = { 419 DMA_ATTR_V0, /* Version number */ 420 0, /* low address */ 421 0xFFFFFFFFFFFFFFFF, /* high address */ 422 0xFFFFFFFF, /* counter register max */ 423 1, /* page alignment */ 424 1, /* burst sizes: 1 - 32 */ 425 1, /* minimum transfer size */ 426 0xFFFFFFFF, /* max transfer size */ 427 0xFFFFFFFFFFFFFFF, /* address register max */ 428 1, /* scatter-gather capacity */ 429 1, /* device operates on bytes */ 430 0, /* attr flag: set to 0 */ 431 }; 432 433 static ddi_dma_attr_t vioif_mapped_buf_dma_attr = { 434 DMA_ATTR_V0, /* Version number */ 435 0, /* low address */ 436 0xFFFFFFFFFFFFFFFF, /* high address */ 437 0xFFFFFFFF, /* counter register max */ 438 1, /* page alignment */ 439 1, /* burst sizes: 1 - 32 */ 440 1, /* minimum transfer size */ 441 0xFFFFFFFF, /* max transfer size */ 442 0xFFFFFFFFFFFFFFF, /* address register max */ 443 444 /* One entry is used for the virtio_net_hdr on the tx path */ 445 VIOIF_INDIRECT_MAX - 1, /* scatter-gather capacity */ 446 1, /* device operates on bytes */ 447 0, /* attr flag: set to 0 */ 448 }; 449 450 static ddi_device_acc_attr_t vioif_bufattr = { 451 DDI_DEVICE_ATTR_V0, 452 DDI_NEVERSWAP_ACC, 453 DDI_STORECACHING_OK_ACC, 454 DDI_DEFAULT_ACC 455 }; 456 457 static void 458 vioif_rx_free(caddr_t free_arg) 459 { 460 struct vioif_rx_buf *buf = (void *) free_arg; 461 struct vioif_softc *sc = buf->rb_sc; 462 463 kmem_cache_free(sc->sc_rxbuf_cache, buf); 464 atomic_dec_ulong(&sc->sc_rxloan); 465 } 466 467 static int 468 vioif_rx_construct(void *buffer, void *user_arg, int kmflags) 469 { 470 _NOTE(ARGUNUSED(kmflags)); 471 struct vioif_softc *sc = user_arg; 472 struct vioif_rx_buf *buf = buffer; 473 size_t len; 474 475 if (ddi_dma_alloc_handle(sc->sc_dev, &vioif_mapped_buf_dma_attr, 476 DDI_DMA_SLEEP, NULL, &buf->rb_mapping.vbm_dmah)) { 477 dev_err(sc->sc_dev, CE_WARN, 478 "Can't allocate dma handle for rx buffer"); 479 goto exit_handle; 480 } 481 482 if (ddi_dma_mem_alloc(buf->rb_mapping.vbm_dmah, 483 VIOIF_RX_SIZE + sizeof (struct virtio_net_hdr), 484 &vioif_bufattr, DDI_DMA_STREAMING, DDI_DMA_SLEEP, 485 NULL, &buf->rb_mapping.vbm_buf, &len, &buf->rb_mapping.vbm_acch)) { 486 dev_err(sc->sc_dev, CE_WARN, 487 "Can't allocate rx buffer"); 488 goto exit_alloc; 489 } 490 ASSERT(len >= VIOIF_RX_SIZE); 491 492 if (ddi_dma_addr_bind_handle(buf->rb_mapping.vbm_dmah, NULL, 493 buf->rb_mapping.vbm_buf, len, DDI_DMA_READ | DDI_DMA_STREAMING, 494 DDI_DMA_SLEEP, NULL, &buf->rb_mapping.vbm_dmac, 495 &buf->rb_mapping.vbm_ncookies)) { 496 dev_err(sc->sc_dev, CE_WARN, "Can't bind tx buffer"); 497 498 goto exit_bind; 499 } 500 501 ASSERT(buf->rb_mapping.vbm_ncookies <= VIOIF_INDIRECT_MAX); 502 503 buf->rb_sc = sc; 504 buf->rb_frtn.free_arg = (void *) buf; 505 buf->rb_frtn.free_func = vioif_rx_free; 506 507 return (0); 508 exit_bind: 509 ddi_dma_mem_free(&buf->rb_mapping.vbm_acch); 510 exit_alloc: 511 ddi_dma_free_handle(&buf->rb_mapping.vbm_dmah); 512 exit_handle: 513 514 return (ENOMEM); 515 } 516 517 static void 518 vioif_rx_destruct(void *buffer, void *user_arg) 519 { 520 _NOTE(ARGUNUSED(user_arg)); 521 struct vioif_rx_buf *buf = buffer; 522 523 ASSERT(buf->rb_mapping.vbm_acch); 524 ASSERT(buf->rb_mapping.vbm_acch); 525 526 (void) ddi_dma_unbind_handle(buf->rb_mapping.vbm_dmah); 527 ddi_dma_mem_free(&buf->rb_mapping.vbm_acch); 528 ddi_dma_free_handle(&buf->rb_mapping.vbm_dmah); 529 } 530 531 static void 532 vioif_free_mems(struct vioif_softc *sc) 533 { 534 int i; 535 536 for (i = 0; i < sc->sc_tx_vq->vq_num; i++) { 537 struct vioif_tx_buf *buf = &sc->sc_txbufs[i]; 538 int j; 539 540 /* Tear down the internal mapping. */ 541 542 ASSERT(buf->tb_inline_mapping.vbm_acch); 543 ASSERT(buf->tb_inline_mapping.vbm_dmah); 544 545 (void) ddi_dma_unbind_handle(buf->tb_inline_mapping.vbm_dmah); 546 ddi_dma_mem_free(&buf->tb_inline_mapping.vbm_acch); 547 ddi_dma_free_handle(&buf->tb_inline_mapping.vbm_dmah); 548 549 /* We should not see any in-flight buffers at this point. */ 550 ASSERT(!buf->tb_mp); 551 552 /* Free all the dma hdnales we allocated lazily. */ 553 for (j = 0; buf->tb_external_mapping[j].vbm_dmah; j++) 554 ddi_dma_free_handle( 555 &buf->tb_external_mapping[j].vbm_dmah); 556 /* Free the external mapping array. */ 557 kmem_free(buf->tb_external_mapping, 558 sizeof (struct vioif_tx_buf) * VIOIF_INDIRECT_MAX - 1); 559 } 560 561 kmem_free(sc->sc_txbufs, sizeof (struct vioif_tx_buf) * 562 sc->sc_tx_vq->vq_num); 563 564 for (i = 0; i < sc->sc_rx_vq->vq_num; i++) { 565 struct vioif_rx_buf *buf = sc->sc_rxbufs[i]; 566 567 if (buf) 568 kmem_cache_free(sc->sc_rxbuf_cache, buf); 569 } 570 kmem_free(sc->sc_rxbufs, sizeof (struct vioif_rx_buf *) * 571 sc->sc_rx_vq->vq_num); 572 } 573 574 static int 575 vioif_alloc_mems(struct vioif_softc *sc) 576 { 577 int i, txqsize, rxqsize; 578 size_t len; 579 unsigned int nsegments; 580 581 txqsize = sc->sc_tx_vq->vq_num; 582 rxqsize = sc->sc_rx_vq->vq_num; 583 584 sc->sc_txbufs = kmem_zalloc(sizeof (struct vioif_tx_buf) * txqsize, 585 KM_SLEEP); 586 if (sc->sc_txbufs == NULL) { 587 dev_err(sc->sc_dev, CE_WARN, 588 "Failed to allocate the tx buffers array"); 589 goto exit_txalloc; 590 } 591 592 /* 593 * We don't allocate the rx vioif_bufs, just the pointers, as 594 * rx vioif_bufs can be loaned upstream, and we don't know the 595 * total number we need. 596 */ 597 sc->sc_rxbufs = kmem_zalloc(sizeof (struct vioif_rx_buf *) * rxqsize, 598 KM_SLEEP); 599 if (sc->sc_rxbufs == NULL) { 600 dev_err(sc->sc_dev, CE_WARN, 601 "Failed to allocate the rx buffers pointer array"); 602 goto exit_rxalloc; 603 } 604 605 for (i = 0; i < txqsize; i++) { 606 struct vioif_tx_buf *buf = &sc->sc_txbufs[i]; 607 608 /* Allocate and bind an inline mapping. */ 609 610 if (ddi_dma_alloc_handle(sc->sc_dev, 611 &vioif_inline_buf_dma_attr, 612 DDI_DMA_SLEEP, NULL, &buf->tb_inline_mapping.vbm_dmah)) { 613 614 dev_err(sc->sc_dev, CE_WARN, 615 "Can't allocate dma handle for tx buffer %d", i); 616 goto exit_tx; 617 } 618 619 if (ddi_dma_mem_alloc(buf->tb_inline_mapping.vbm_dmah, 620 VIOIF_TX_INLINE_SIZE, &vioif_bufattr, DDI_DMA_STREAMING, 621 DDI_DMA_SLEEP, NULL, &buf->tb_inline_mapping.vbm_buf, 622 &len, &buf->tb_inline_mapping.vbm_acch)) { 623 624 dev_err(sc->sc_dev, CE_WARN, 625 "Can't allocate tx buffer %d", i); 626 goto exit_tx; 627 } 628 ASSERT(len >= VIOIF_TX_INLINE_SIZE); 629 630 if (ddi_dma_addr_bind_handle(buf->tb_inline_mapping.vbm_dmah, 631 NULL, buf->tb_inline_mapping.vbm_buf, len, 632 DDI_DMA_WRITE | DDI_DMA_STREAMING, DDI_DMA_SLEEP, NULL, 633 &buf->tb_inline_mapping.vbm_dmac, &nsegments)) { 634 635 dev_err(sc->sc_dev, CE_WARN, 636 "Can't bind tx buffer %d", i); 637 goto exit_tx; 638 } 639 640 /* We asked for a single segment */ 641 ASSERT(nsegments == 1); 642 643 /* 644 * We allow up to VIOIF_INDIRECT_MAX - 1 external mappings. 645 * In reality, I don't expect more then 2-3 used, but who 646 * knows. 647 */ 648 buf->tb_external_mapping = kmem_zalloc( 649 sizeof (struct vioif_tx_buf) * VIOIF_INDIRECT_MAX - 1, 650 KM_SLEEP); 651 652 /* 653 * The external mapping's dma handles are allocate lazily, 654 * as we don't expect most of them to be used.. 655 */ 656 } 657 658 return (0); 659 660 exit_tx: 661 for (i = 0; i < txqsize; i++) { 662 struct vioif_tx_buf *buf = &sc->sc_txbufs[i]; 663 664 if (buf->tb_inline_mapping.vbm_dmah) 665 (void) ddi_dma_unbind_handle( 666 buf->tb_inline_mapping.vbm_dmah); 667 668 if (buf->tb_inline_mapping.vbm_acch) 669 ddi_dma_mem_free( 670 &buf->tb_inline_mapping.vbm_acch); 671 672 if (buf->tb_inline_mapping.vbm_dmah) 673 ddi_dma_free_handle( 674 &buf->tb_inline_mapping.vbm_dmah); 675 676 if (buf->tb_external_mapping) 677 kmem_free(buf->tb_external_mapping, 678 sizeof (struct vioif_tx_buf) * 679 VIOIF_INDIRECT_MAX - 1); 680 } 681 682 kmem_free(sc->sc_rxbufs, sizeof (struct vioif_rx_buf) * rxqsize); 683 684 exit_rxalloc: 685 kmem_free(sc->sc_txbufs, sizeof (struct vioif_tx_buf) * txqsize); 686 exit_txalloc: 687 return (ENOMEM); 688 } 689 690 /* ARGSUSED */ 691 int 692 vioif_multicst(void *arg, boolean_t add, const uint8_t *macaddr) 693 { 694 return (DDI_SUCCESS); 695 } 696 697 /* ARGSUSED */ 698 int 699 vioif_promisc(void *arg, boolean_t on) 700 { 701 return (DDI_SUCCESS); 702 } 703 704 /* ARGSUSED */ 705 int 706 vioif_unicst(void *arg, const uint8_t *macaddr) 707 { 708 return (DDI_FAILURE); 709 } 710 711 712 static int 713 vioif_add_rx(struct vioif_softc *sc, int kmflag) 714 { 715 struct vq_entry *ve; 716 struct vioif_rx_buf *buf; 717 718 ve = vq_alloc_entry(sc->sc_rx_vq); 719 if (!ve) { 720 /* 721 * Out of free descriptors - ring already full. 722 * It would be better to update sc_norxdescavail 723 * but MAC does not ask for this info, hence we 724 * update sc_norecvbuf. 725 */ 726 sc->sc_norecvbuf++; 727 goto exit_vq; 728 } 729 buf = sc->sc_rxbufs[ve->qe_index]; 730 731 if (!buf) { 732 /* First run, allocate the buffer. */ 733 buf = kmem_cache_alloc(sc->sc_rxbuf_cache, kmflag); 734 sc->sc_rxbufs[ve->qe_index] = buf; 735 } 736 737 /* Still nothing? Bye. */ 738 if (!buf) { 739 dev_err(sc->sc_dev, CE_WARN, "Can't allocate rx buffer"); 740 sc->sc_norecvbuf++; 741 goto exit_buf; 742 } 743 744 ASSERT(buf->rb_mapping.vbm_ncookies >= 1); 745 746 /* 747 * For an unknown reason, the virtio_net_hdr must be placed 748 * as a separate virtio queue entry. 749 */ 750 virtio_ve_add_indirect_buf(ve, buf->rb_mapping.vbm_dmac.dmac_laddress, 751 sizeof (struct virtio_net_hdr), B_FALSE); 752 753 /* Add the rest of the first cookie. */ 754 virtio_ve_add_indirect_buf(ve, 755 buf->rb_mapping.vbm_dmac.dmac_laddress + 756 sizeof (struct virtio_net_hdr), 757 buf->rb_mapping.vbm_dmac.dmac_size - 758 sizeof (struct virtio_net_hdr), B_FALSE); 759 760 /* 761 * If the buffer consists of a single cookie (unlikely for a 762 * 64-k buffer), we are done. Otherwise, add the rest of the cookies 763 * using indirect entries. 764 */ 765 if (buf->rb_mapping.vbm_ncookies > 1) { 766 ddi_dma_cookie_t *first_extra_dmac; 767 ddi_dma_cookie_t dmac; 768 first_extra_dmac = 769 vioif_dma_curr_cookie(buf->rb_mapping.vbm_dmah); 770 771 ddi_dma_nextcookie(buf->rb_mapping.vbm_dmah, &dmac); 772 virtio_ve_add_cookie(ve, buf->rb_mapping.vbm_dmah, 773 dmac, buf->rb_mapping.vbm_ncookies - 1, B_FALSE); 774 vioif_dma_reset_cookie(buf->rb_mapping.vbm_dmah, 775 first_extra_dmac); 776 } 777 778 virtio_push_chain(ve, B_FALSE); 779 780 return (DDI_SUCCESS); 781 782 exit_buf: 783 vq_free_entry(sc->sc_rx_vq, ve); 784 exit_vq: 785 return (DDI_FAILURE); 786 } 787 788 static int 789 vioif_populate_rx(struct vioif_softc *sc, int kmflag) 790 { 791 int i = 0; 792 int ret; 793 794 for (;;) { 795 ret = vioif_add_rx(sc, kmflag); 796 if (ret) 797 /* 798 * We could not allocate some memory. Try to work with 799 * what we've got. 800 */ 801 break; 802 i++; 803 } 804 805 if (i) 806 virtio_sync_vq(sc->sc_rx_vq); 807 808 return (i); 809 } 810 811 static int 812 vioif_process_rx(struct vioif_softc *sc) 813 { 814 struct vq_entry *ve; 815 struct vioif_rx_buf *buf; 816 mblk_t *mp; 817 uint32_t len; 818 int i = 0; 819 820 while ((ve = virtio_pull_chain(sc->sc_rx_vq, &len))) { 821 822 buf = sc->sc_rxbufs[ve->qe_index]; 823 ASSERT(buf); 824 825 if (len < sizeof (struct virtio_net_hdr)) { 826 dev_err(sc->sc_dev, CE_WARN, "RX: Cnain too small: %u", 827 len - (uint32_t)sizeof (struct virtio_net_hdr)); 828 sc->sc_ierrors++; 829 virtio_free_chain(ve); 830 continue; 831 } 832 833 len -= sizeof (struct virtio_net_hdr); 834 /* 835 * We copy small packets that happenned to fit into a single 836 * cookie and reuse the buffers. For bigger ones, we loan 837 * the buffers upstream. 838 */ 839 if (len < sc->sc_rxcopy_thresh) { 840 mp = allocb(len, 0); 841 if (!mp) { 842 sc->sc_norecvbuf++; 843 sc->sc_ierrors++; 844 845 virtio_free_chain(ve); 846 break; 847 } 848 849 bcopy((char *)buf->rb_mapping.vbm_buf + 850 sizeof (struct virtio_net_hdr), mp->b_rptr, len); 851 mp->b_wptr = mp->b_rptr + len; 852 853 } else { 854 mp = desballoc((unsigned char *) 855 buf->rb_mapping.vbm_buf + 856 sizeof (struct virtio_net_hdr) + 857 VIOIF_IP_ALIGN, len, 0, &buf->rb_frtn); 858 if (!mp) { 859 sc->sc_norecvbuf++; 860 sc->sc_ierrors++; 861 862 virtio_free_chain(ve); 863 break; 864 } 865 mp->b_wptr = mp->b_rptr + len; 866 867 atomic_inc_ulong(&sc->sc_rxloan); 868 /* 869 * Buffer loaned, we will have to allocate a new one 870 * for this slot. 871 */ 872 sc->sc_rxbufs[ve->qe_index] = NULL; 873 } 874 875 /* 876 * virtio-net does not tell us if this packet is multicast 877 * or broadcast, so we have to check it. 878 */ 879 if (mp->b_rptr[0] & 0x1) { 880 if (bcmp(mp->b_rptr, vioif_broadcast, ETHERADDRL) != 0) 881 sc->sc_multircv++; 882 else 883 sc->sc_brdcstrcv++; 884 } 885 886 sc->sc_rbytes += len; 887 sc->sc_ipackets++; 888 889 virtio_free_chain(ve); 890 mac_rx(sc->sc_mac_handle, NULL, mp); 891 i++; 892 } 893 894 return (i); 895 } 896 897 static void 898 vioif_reclaim_used_tx(struct vioif_softc *sc) 899 { 900 struct vq_entry *ve; 901 struct vioif_tx_buf *buf; 902 uint32_t len; 903 mblk_t *mp; 904 int i = 0; 905 906 while ((ve = virtio_pull_chain(sc->sc_tx_vq, &len))) { 907 /* We don't chain descriptors for tx, so don't expect any. */ 908 ASSERT(!ve->qe_next); 909 910 buf = &sc->sc_txbufs[ve->qe_index]; 911 mp = buf->tb_mp; 912 buf->tb_mp = NULL; 913 914 if (mp) { 915 for (i = 0; i < buf->tb_external_num; i++) 916 (void) ddi_dma_unbind_handle( 917 buf->tb_external_mapping[i].vbm_dmah); 918 } 919 920 virtio_free_chain(ve); 921 922 /* External mapping used, mp was not freed in vioif_send() */ 923 if (mp) 924 freemsg(mp); 925 i++; 926 } 927 928 if (sc->sc_tx_stopped && i) { 929 sc->sc_tx_stopped = 0; 930 mac_tx_update(sc->sc_mac_handle); 931 } 932 } 933 934 /* sc will be used to update stat counters. */ 935 /* ARGSUSED */ 936 static inline void 937 vioif_tx_inline(struct vioif_softc *sc, struct vq_entry *ve, mblk_t *mp, 938 size_t msg_size) 939 { 940 struct vioif_tx_buf *buf; 941 buf = &sc->sc_txbufs[ve->qe_index]; 942 943 ASSERT(buf); 944 945 /* Frees mp */ 946 mcopymsg(mp, buf->tb_inline_mapping.vbm_buf + 947 sizeof (struct virtio_net_hdr)); 948 949 virtio_ve_add_indirect_buf(ve, 950 buf->tb_inline_mapping.vbm_dmac.dmac_laddress + 951 sizeof (struct virtio_net_hdr), msg_size, B_TRUE); 952 } 953 954 static inline int 955 vioif_tx_lazy_handle_alloc(struct vioif_softc *sc, struct vioif_tx_buf *buf, 956 int i) 957 { 958 int ret = DDI_SUCCESS; 959 960 if (!buf->tb_external_mapping[i].vbm_dmah) { 961 ret = ddi_dma_alloc_handle(sc->sc_dev, 962 &vioif_mapped_buf_dma_attr, DDI_DMA_SLEEP, NULL, 963 &buf->tb_external_mapping[i].vbm_dmah); 964 if (ret != DDI_SUCCESS) { 965 dev_err(sc->sc_dev, CE_WARN, 966 "Can't allocate dma handle for external tx buffer"); 967 } 968 } 969 970 return (ret); 971 } 972 973 static inline int 974 vioif_tx_external(struct vioif_softc *sc, struct vq_entry *ve, mblk_t *mp, 975 size_t msg_size) 976 { 977 _NOTE(ARGUNUSED(msg_size)); 978 979 struct vioif_tx_buf *buf; 980 mblk_t *nmp; 981 int i, j; 982 int ret = DDI_SUCCESS; 983 984 buf = &sc->sc_txbufs[ve->qe_index]; 985 986 ASSERT(buf); 987 988 buf->tb_external_num = 0; 989 i = 0; 990 nmp = mp; 991 992 while (nmp) { 993 size_t len; 994 ddi_dma_cookie_t dmac; 995 unsigned int ncookies; 996 997 len = MBLKL(nmp); 998 /* 999 * For some reason, the network stack can 1000 * actually send us zero-length fragments. 1001 */ 1002 if (len == 0) { 1003 nmp = nmp->b_cont; 1004 continue; 1005 } 1006 1007 ret = vioif_tx_lazy_handle_alloc(sc, buf, i); 1008 if (ret != DDI_SUCCESS) { 1009 sc->sc_notxbuf++; 1010 sc->sc_oerrors++; 1011 goto exit_lazy_alloc; 1012 } 1013 ret = ddi_dma_addr_bind_handle( 1014 buf->tb_external_mapping[i].vbm_dmah, NULL, 1015 (caddr_t)nmp->b_rptr, len, 1016 DDI_DMA_WRITE | DDI_DMA_STREAMING, 1017 DDI_DMA_SLEEP, NULL, &dmac, &ncookies); 1018 1019 if (ret != DDI_SUCCESS) { 1020 sc->sc_oerrors++; 1021 dev_err(sc->sc_dev, CE_NOTE, 1022 "TX: Failed to bind external handle"); 1023 goto exit_bind; 1024 } 1025 1026 /* Check if we still fit into the indirect table. */ 1027 if (virtio_ve_indirect_available(ve) < ncookies) { 1028 dev_err(sc->sc_dev, CE_NOTE, 1029 "TX: Indirect descriptor table limit reached." 1030 " It took %d fragments.", i); 1031 sc->sc_notxbuf++; 1032 sc->sc_oerrors++; 1033 1034 ret = DDI_FAILURE; 1035 goto exit_limit; 1036 } 1037 1038 virtio_ve_add_cookie(ve, buf->tb_external_mapping[i].vbm_dmah, 1039 dmac, ncookies, B_TRUE); 1040 1041 nmp = nmp->b_cont; 1042 i++; 1043 } 1044 1045 buf->tb_external_num = i; 1046 /* Save the mp to free it when the packet is sent. */ 1047 buf->tb_mp = mp; 1048 1049 return (DDI_SUCCESS); 1050 1051 exit_limit: 1052 exit_bind: 1053 exit_lazy_alloc: 1054 1055 for (j = 0; j < i; j++) { 1056 (void) ddi_dma_unbind_handle( 1057 buf->tb_external_mapping[j].vbm_dmah); 1058 } 1059 1060 return (ret); 1061 } 1062 1063 static boolean_t 1064 vioif_send(struct vioif_softc *sc, mblk_t *mp) 1065 { 1066 struct vq_entry *ve; 1067 struct vioif_tx_buf *buf; 1068 struct virtio_net_hdr *net_header = NULL; 1069 size_t msg_size = 0; 1070 uint32_t csum_start; 1071 uint32_t csum_stuff; 1072 uint32_t csum_flags; 1073 uint32_t lso_flags; 1074 uint32_t lso_mss; 1075 mblk_t *nmp; 1076 int ret; 1077 boolean_t lso_required = B_FALSE; 1078 1079 for (nmp = mp; nmp; nmp = nmp->b_cont) 1080 msg_size += MBLKL(nmp); 1081 1082 if (sc->sc_tx_tso4) { 1083 mac_lso_get(mp, &lso_mss, &lso_flags); 1084 lso_required = (lso_flags & HW_LSO); 1085 } 1086 1087 ve = vq_alloc_entry(sc->sc_tx_vq); 1088 1089 if (!ve) { 1090 sc->sc_notxbuf++; 1091 /* Out of free descriptors - try later. */ 1092 return (B_FALSE); 1093 } 1094 buf = &sc->sc_txbufs[ve->qe_index]; 1095 1096 /* Use the inline buffer of the first entry for the virtio_net_hdr. */ 1097 (void) memset(buf->tb_inline_mapping.vbm_buf, 0, 1098 sizeof (struct virtio_net_hdr)); 1099 1100 net_header = (struct virtio_net_hdr *)buf->tb_inline_mapping.vbm_buf; 1101 1102 mac_hcksum_get(mp, &csum_start, &csum_stuff, NULL, 1103 NULL, &csum_flags); 1104 1105 /* They want us to do the TCP/UDP csum calculation. */ 1106 if (csum_flags & HCK_PARTIALCKSUM) { 1107 struct ether_header *eth_header; 1108 int eth_hsize; 1109 1110 /* Did we ask for it? */ 1111 ASSERT(sc->sc_tx_csum); 1112 1113 /* We only asked for partial csum packets. */ 1114 ASSERT(!(csum_flags & HCK_IPV4_HDRCKSUM)); 1115 ASSERT(!(csum_flags & HCK_FULLCKSUM)); 1116 1117 eth_header = (void *) mp->b_rptr; 1118 if (eth_header->ether_type == htons(ETHERTYPE_VLAN)) { 1119 eth_hsize = sizeof (struct ether_vlan_header); 1120 } else { 1121 eth_hsize = sizeof (struct ether_header); 1122 } 1123 net_header->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 1124 net_header->csum_start = eth_hsize + csum_start; 1125 net_header->csum_offset = csum_stuff - csum_start; 1126 } 1127 1128 /* setup LSO fields if required */ 1129 if (lso_required) { 1130 net_header->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 1131 net_header->gso_size = (uint16_t)lso_mss; 1132 } 1133 1134 virtio_ve_add_indirect_buf(ve, 1135 buf->tb_inline_mapping.vbm_dmac.dmac_laddress, 1136 sizeof (struct virtio_net_hdr), B_TRUE); 1137 1138 /* meanwhile update the statistic */ 1139 if (mp->b_rptr[0] & 0x1) { 1140 if (bcmp(mp->b_rptr, vioif_broadcast, ETHERADDRL) != 0) 1141 sc->sc_multixmt++; 1142 else 1143 sc->sc_brdcstxmt++; 1144 } 1145 1146 /* 1147 * We copy small packets into the inline buffer. The bigger ones 1148 * get mapped using the mapped buffer. 1149 */ 1150 if (msg_size < sc->sc_txcopy_thresh) { 1151 vioif_tx_inline(sc, ve, mp, msg_size); 1152 } else { 1153 /* statistic gets updated by vioif_tx_external when fail */ 1154 ret = vioif_tx_external(sc, ve, mp, msg_size); 1155 if (ret != DDI_SUCCESS) 1156 goto exit_tx_external; 1157 } 1158 1159 virtio_push_chain(ve, B_TRUE); 1160 1161 sc->sc_opackets++; 1162 sc->sc_obytes += msg_size; 1163 1164 return (B_TRUE); 1165 1166 exit_tx_external: 1167 1168 vq_free_entry(sc->sc_tx_vq, ve); 1169 /* 1170 * vioif_tx_external can fail when the buffer does not fit into the 1171 * indirect descriptor table. Free the mp. I don't expect this ever 1172 * to happen. 1173 */ 1174 freemsg(mp); 1175 1176 return (B_TRUE); 1177 } 1178 1179 mblk_t * 1180 vioif_tx(void *arg, mblk_t *mp) 1181 { 1182 struct vioif_softc *sc = arg; 1183 mblk_t *nmp; 1184 1185 while (mp != NULL) { 1186 nmp = mp->b_next; 1187 mp->b_next = NULL; 1188 1189 if (!vioif_send(sc, mp)) { 1190 sc->sc_tx_stopped = 1; 1191 mp->b_next = nmp; 1192 break; 1193 } 1194 mp = nmp; 1195 } 1196 1197 return (mp); 1198 } 1199 1200 int 1201 vioif_start(void *arg) 1202 { 1203 struct vioif_softc *sc = arg; 1204 1205 mac_link_update(sc->sc_mac_handle, 1206 vioif_link_state(sc)); 1207 1208 virtio_start_vq_intr(sc->sc_rx_vq); 1209 1210 return (DDI_SUCCESS); 1211 } 1212 1213 void 1214 vioif_stop(void *arg) 1215 { 1216 struct vioif_softc *sc = arg; 1217 1218 virtio_stop_vq_intr(sc->sc_rx_vq); 1219 } 1220 1221 /* ARGSUSED */ 1222 static int 1223 vioif_stat(void *arg, uint_t stat, uint64_t *val) 1224 { 1225 struct vioif_softc *sc = arg; 1226 1227 switch (stat) { 1228 case MAC_STAT_IERRORS: 1229 *val = sc->sc_ierrors; 1230 break; 1231 case MAC_STAT_OERRORS: 1232 *val = sc->sc_oerrors; 1233 break; 1234 case MAC_STAT_MULTIRCV: 1235 *val = sc->sc_multircv; 1236 break; 1237 case MAC_STAT_BRDCSTRCV: 1238 *val = sc->sc_brdcstrcv; 1239 break; 1240 case MAC_STAT_MULTIXMT: 1241 *val = sc->sc_multixmt; 1242 break; 1243 case MAC_STAT_BRDCSTXMT: 1244 *val = sc->sc_brdcstxmt; 1245 break; 1246 case MAC_STAT_IPACKETS: 1247 *val = sc->sc_ipackets; 1248 break; 1249 case MAC_STAT_RBYTES: 1250 *val = sc->sc_rbytes; 1251 break; 1252 case MAC_STAT_OPACKETS: 1253 *val = sc->sc_opackets; 1254 break; 1255 case MAC_STAT_OBYTES: 1256 *val = sc->sc_obytes; 1257 break; 1258 case MAC_STAT_NORCVBUF: 1259 *val = sc->sc_norecvbuf; 1260 break; 1261 case MAC_STAT_NOXMTBUF: 1262 *val = sc->sc_notxbuf; 1263 break; 1264 case MAC_STAT_IFSPEED: 1265 /* always 1 Gbit */ 1266 *val = 1000000000ULL; 1267 break; 1268 case ETHER_STAT_LINK_DUPLEX: 1269 /* virtual device, always full-duplex */ 1270 *val = LINK_DUPLEX_FULL; 1271 break; 1272 1273 default: 1274 return (ENOTSUP); 1275 } 1276 1277 return (DDI_SUCCESS); 1278 } 1279 1280 static int 1281 vioif_set_prop_private(struct vioif_softc *sc, const char *pr_name, 1282 uint_t pr_valsize, const void *pr_val) 1283 { 1284 _NOTE(ARGUNUSED(pr_valsize)); 1285 1286 long result; 1287 1288 if (strcmp(pr_name, vioif_txcopy_thresh) == 0) { 1289 1290 if (pr_val == NULL) 1291 return (EINVAL); 1292 1293 (void) ddi_strtol(pr_val, (char **)NULL, 0, &result); 1294 1295 if (result < 0 || result > VIOIF_TX_THRESH_MAX) 1296 return (EINVAL); 1297 sc->sc_txcopy_thresh = result; 1298 } 1299 if (strcmp(pr_name, vioif_rxcopy_thresh) == 0) { 1300 1301 if (pr_val == NULL) 1302 return (EINVAL); 1303 1304 (void) ddi_strtol(pr_val, (char **)NULL, 0, &result); 1305 1306 if (result < 0 || result > VIOIF_RX_THRESH_MAX) 1307 return (EINVAL); 1308 sc->sc_rxcopy_thresh = result; 1309 } 1310 return (0); 1311 } 1312 1313 static int 1314 vioif_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, 1315 uint_t pr_valsize, const void *pr_val) 1316 { 1317 struct vioif_softc *sc = arg; 1318 const uint32_t *new_mtu; 1319 int err; 1320 1321 switch (pr_num) { 1322 case MAC_PROP_MTU: 1323 new_mtu = pr_val; 1324 1325 if (*new_mtu > MAX_MTU) { 1326 return (EINVAL); 1327 } 1328 1329 err = mac_maxsdu_update(sc->sc_mac_handle, *new_mtu); 1330 if (err) { 1331 return (err); 1332 } 1333 break; 1334 case MAC_PROP_PRIVATE: 1335 err = vioif_set_prop_private(sc, pr_name, 1336 pr_valsize, pr_val); 1337 if (err) 1338 return (err); 1339 break; 1340 default: 1341 return (ENOTSUP); 1342 } 1343 1344 return (0); 1345 } 1346 1347 static int 1348 vioif_get_prop_private(struct vioif_softc *sc, const char *pr_name, 1349 uint_t pr_valsize, void *pr_val) 1350 { 1351 int err = ENOTSUP; 1352 int value; 1353 1354 if (strcmp(pr_name, vioif_txcopy_thresh) == 0) { 1355 1356 value = sc->sc_txcopy_thresh; 1357 err = 0; 1358 goto done; 1359 } 1360 if (strcmp(pr_name, vioif_rxcopy_thresh) == 0) { 1361 1362 value = sc->sc_rxcopy_thresh; 1363 err = 0; 1364 goto done; 1365 } 1366 done: 1367 if (err == 0) { 1368 (void) snprintf(pr_val, pr_valsize, "%d", value); 1369 } 1370 return (err); 1371 } 1372 1373 static int 1374 vioif_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, 1375 uint_t pr_valsize, void *pr_val) 1376 { 1377 struct vioif_softc *sc = arg; 1378 int err = ENOTSUP; 1379 1380 switch (pr_num) { 1381 case MAC_PROP_PRIVATE: 1382 err = vioif_get_prop_private(sc, pr_name, 1383 pr_valsize, pr_val); 1384 break; 1385 default: 1386 break; 1387 } 1388 return (err); 1389 } 1390 1391 static void 1392 vioif_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num, 1393 mac_prop_info_handle_t prh) 1394 { 1395 struct vioif_softc *sc = arg; 1396 char valstr[64]; 1397 int value; 1398 1399 switch (pr_num) { 1400 case MAC_PROP_MTU: 1401 mac_prop_info_set_range_uint32(prh, ETHERMIN, MAX_MTU); 1402 break; 1403 1404 case MAC_PROP_PRIVATE: 1405 bzero(valstr, sizeof (valstr)); 1406 if (strcmp(pr_name, vioif_txcopy_thresh) == 0) { 1407 1408 value = sc->sc_txcopy_thresh; 1409 } else if (strcmp(pr_name, 1410 vioif_rxcopy_thresh) == 0) { 1411 value = sc->sc_rxcopy_thresh; 1412 } else { 1413 return; 1414 } 1415 (void) snprintf(valstr, sizeof (valstr), "%d", value); 1416 break; 1417 1418 default: 1419 break; 1420 } 1421 } 1422 1423 static boolean_t 1424 vioif_getcapab(void *arg, mac_capab_t cap, void *cap_data) 1425 { 1426 struct vioif_softc *sc = arg; 1427 1428 switch (cap) { 1429 case MAC_CAPAB_HCKSUM: 1430 if (sc->sc_tx_csum) { 1431 uint32_t *txflags = cap_data; 1432 1433 *txflags = HCKSUM_INET_PARTIAL; 1434 return (B_TRUE); 1435 } 1436 return (B_FALSE); 1437 case MAC_CAPAB_LSO: 1438 if (sc->sc_tx_tso4) { 1439 mac_capab_lso_t *cap_lso = cap_data; 1440 1441 cap_lso->lso_flags = LSO_TX_BASIC_TCP_IPV4; 1442 cap_lso->lso_basic_tcp_ipv4.lso_max = MAX_MTU; 1443 return (B_TRUE); 1444 } 1445 return (B_FALSE); 1446 default: 1447 break; 1448 } 1449 return (B_FALSE); 1450 } 1451 1452 static mac_callbacks_t vioif_m_callbacks = { 1453 .mc_callbacks = (MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO), 1454 .mc_getstat = vioif_stat, 1455 .mc_start = vioif_start, 1456 .mc_stop = vioif_stop, 1457 .mc_setpromisc = vioif_promisc, 1458 .mc_multicst = vioif_multicst, 1459 .mc_unicst = vioif_unicst, 1460 .mc_tx = vioif_tx, 1461 /* Optional callbacks */ 1462 .mc_reserved = NULL, /* reserved */ 1463 .mc_ioctl = NULL, /* mc_ioctl */ 1464 .mc_getcapab = vioif_getcapab, /* mc_getcapab */ 1465 .mc_open = NULL, /* mc_open */ 1466 .mc_close = NULL, /* mc_close */ 1467 .mc_setprop = vioif_setprop, 1468 .mc_getprop = vioif_getprop, 1469 .mc_propinfo = vioif_propinfo, 1470 }; 1471 1472 static void 1473 vioif_show_features(struct vioif_softc *sc, const char *prefix, 1474 uint32_t features) 1475 { 1476 char buf[512]; 1477 char *bufp = buf; 1478 char *bufend = buf + sizeof (buf); 1479 1480 /* LINTED E_PTRDIFF_OVERFLOW */ 1481 bufp += snprintf(bufp, bufend - bufp, prefix); 1482 /* LINTED E_PTRDIFF_OVERFLOW */ 1483 bufp += virtio_show_features(features, bufp, bufend - bufp); 1484 *bufp = '\0'; 1485 1486 1487 /* Using '!' to only CE_NOTE this to the system log. */ 1488 dev_err(sc->sc_dev, CE_NOTE, "!%s Vioif (%b)", buf, features, 1489 VIRTIO_NET_FEATURE_BITS); 1490 } 1491 1492 /* 1493 * Find out which features are supported by the device and 1494 * choose which ones we wish to use. 1495 */ 1496 static int 1497 vioif_dev_features(struct vioif_softc *sc) 1498 { 1499 uint32_t host_features; 1500 1501 host_features = virtio_negotiate_features(&sc->sc_virtio, 1502 VIRTIO_NET_F_CSUM | 1503 VIRTIO_NET_F_HOST_TSO4 | 1504 VIRTIO_NET_F_HOST_ECN | 1505 VIRTIO_NET_F_MAC | 1506 VIRTIO_NET_F_STATUS | 1507 VIRTIO_F_RING_INDIRECT_DESC | 1508 VIRTIO_F_NOTIFY_ON_EMPTY); 1509 1510 vioif_show_features(sc, "Host features: ", host_features); 1511 vioif_show_features(sc, "Negotiated features: ", 1512 sc->sc_virtio.sc_features); 1513 1514 if (!(sc->sc_virtio.sc_features & VIRTIO_F_RING_INDIRECT_DESC)) { 1515 dev_err(sc->sc_dev, CE_NOTE, 1516 "Host does not support RING_INDIRECT_DESC, bye."); 1517 return (DDI_FAILURE); 1518 } 1519 1520 return (DDI_SUCCESS); 1521 } 1522 1523 static int 1524 vioif_has_feature(struct vioif_softc *sc, uint32_t feature) 1525 { 1526 return (virtio_has_feature(&sc->sc_virtio, feature)); 1527 } 1528 1529 static void 1530 vioif_set_mac(struct vioif_softc *sc) 1531 { 1532 int i; 1533 1534 for (i = 0; i < ETHERADDRL; i++) { 1535 virtio_write_device_config_1(&sc->sc_virtio, 1536 VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]); 1537 } 1538 } 1539 1540 /* Get the mac address out of the hardware, or make up one. */ 1541 static void 1542 vioif_get_mac(struct vioif_softc *sc) 1543 { 1544 int i; 1545 if (sc->sc_virtio.sc_features & VIRTIO_NET_F_MAC) { 1546 for (i = 0; i < ETHERADDRL; i++) { 1547 sc->sc_mac[i] = virtio_read_device_config_1( 1548 &sc->sc_virtio, 1549 VIRTIO_NET_CONFIG_MAC + i); 1550 } 1551 dev_err(sc->sc_dev, CE_NOTE, "Got MAC address from host: %s", 1552 ether_sprintf((struct ether_addr *)sc->sc_mac)); 1553 } else { 1554 /* Get a few random bytes */ 1555 (void) random_get_pseudo_bytes(sc->sc_mac, ETHERADDRL); 1556 /* Make sure it's a unicast MAC */ 1557 sc->sc_mac[0] &= ~1; 1558 /* Set the "locally administered" bit */ 1559 sc->sc_mac[1] |= 2; 1560 1561 vioif_set_mac(sc); 1562 1563 dev_err(sc->sc_dev, CE_NOTE, 1564 "Generated a random MAC address: %s", 1565 ether_sprintf((struct ether_addr *)sc->sc_mac)); 1566 } 1567 } 1568 1569 /* 1570 * Virtqueue interrupt handlers 1571 */ 1572 /* ARGSUSED */ 1573 uint_t 1574 vioif_rx_handler(caddr_t arg1, caddr_t arg2) 1575 { 1576 struct virtio_softc *vsc = (void *) arg1; 1577 struct vioif_softc *sc = container_of(vsc, 1578 struct vioif_softc, sc_virtio); 1579 1580 (void) vioif_process_rx(sc); 1581 1582 (void) vioif_populate_rx(sc, KM_NOSLEEP); 1583 1584 return (DDI_INTR_CLAIMED); 1585 } 1586 1587 /* ARGSUSED */ 1588 uint_t 1589 vioif_tx_handler(caddr_t arg1, caddr_t arg2) 1590 { 1591 struct virtio_softc *vsc = (void *)arg1; 1592 struct vioif_softc *sc = container_of(vsc, 1593 struct vioif_softc, sc_virtio); 1594 1595 vioif_reclaim_used_tx(sc); 1596 return (DDI_INTR_CLAIMED); 1597 } 1598 1599 static int 1600 vioif_register_ints(struct vioif_softc *sc) 1601 { 1602 int ret; 1603 1604 struct virtio_int_handler vioif_vq_h[] = { 1605 { vioif_rx_handler }, 1606 { vioif_tx_handler }, 1607 { NULL } 1608 }; 1609 1610 ret = virtio_register_ints(&sc->sc_virtio, NULL, vioif_vq_h); 1611 1612 return (ret); 1613 } 1614 1615 1616 static void 1617 vioif_check_features(struct vioif_softc *sc) 1618 { 1619 if (vioif_has_feature(sc, VIRTIO_NET_F_CSUM)) { 1620 /* The GSO/GRO featured depend on CSUM, check them here. */ 1621 sc->sc_tx_csum = 1; 1622 sc->sc_rx_csum = 1; 1623 1624 if (!vioif_has_feature(sc, VIRTIO_NET_F_GUEST_CSUM)) { 1625 sc->sc_rx_csum = 0; 1626 } 1627 cmn_err(CE_NOTE, "Csum enabled."); 1628 1629 if (vioif_has_feature(sc, VIRTIO_NET_F_HOST_TSO4)) { 1630 1631 sc->sc_tx_tso4 = 1; 1632 /* 1633 * We don't seem to have a way to ask the system 1634 * not to send us LSO packets with Explicit 1635 * Congestion Notification bit set, so we require 1636 * the device to support it in order to do 1637 * LSO. 1638 */ 1639 if (!vioif_has_feature(sc, VIRTIO_NET_F_HOST_ECN)) { 1640 dev_err(sc->sc_dev, CE_NOTE, 1641 "TSO4 supported, but not ECN. " 1642 "Not using LSO."); 1643 sc->sc_tx_tso4 = 0; 1644 } else { 1645 cmn_err(CE_NOTE, "LSO enabled"); 1646 } 1647 } 1648 } 1649 } 1650 1651 static int 1652 vioif_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd) 1653 { 1654 int ret, instance; 1655 struct vioif_softc *sc; 1656 struct virtio_softc *vsc; 1657 mac_register_t *macp; 1658 char cache_name[CACHE_NAME_SIZE]; 1659 1660 instance = ddi_get_instance(devinfo); 1661 1662 switch (cmd) { 1663 case DDI_ATTACH: 1664 break; 1665 1666 case DDI_RESUME: 1667 case DDI_PM_RESUME: 1668 /* We do not support suspend/resume for vioif. */ 1669 goto exit; 1670 1671 default: 1672 goto exit; 1673 } 1674 1675 sc = kmem_zalloc(sizeof (struct vioif_softc), KM_SLEEP); 1676 ddi_set_driver_private(devinfo, sc); 1677 1678 vsc = &sc->sc_virtio; 1679 1680 /* Duplicate for less typing */ 1681 sc->sc_dev = devinfo; 1682 vsc->sc_dev = devinfo; 1683 1684 /* 1685 * Initialize interrupt kstat. 1686 */ 1687 sc->sc_intrstat = kstat_create("vioif", instance, "intr", "controller", 1688 KSTAT_TYPE_INTR, 1, 0); 1689 if (sc->sc_intrstat == NULL) { 1690 dev_err(devinfo, CE_WARN, "kstat_create failed"); 1691 goto exit_intrstat; 1692 } 1693 kstat_install(sc->sc_intrstat); 1694 1695 /* map BAR 0 */ 1696 ret = ddi_regs_map_setup(devinfo, 1, 1697 (caddr_t *)&sc->sc_virtio.sc_io_addr, 1698 0, 0, &vioif_attr, &sc->sc_virtio.sc_ioh); 1699 if (ret != DDI_SUCCESS) { 1700 dev_err(devinfo, CE_WARN, "unable to map bar 0: %d", ret); 1701 goto exit_map; 1702 } 1703 1704 virtio_device_reset(&sc->sc_virtio); 1705 virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_ACK); 1706 virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER); 1707 1708 ret = vioif_dev_features(sc); 1709 if (ret) 1710 goto exit_features; 1711 1712 vsc->sc_nvqs = vioif_has_feature(sc, VIRTIO_NET_F_CTRL_VQ) ? 3 : 2; 1713 1714 (void) snprintf(cache_name, CACHE_NAME_SIZE, "vioif%d_rx", instance); 1715 sc->sc_rxbuf_cache = kmem_cache_create(cache_name, 1716 sizeof (struct vioif_rx_buf), 0, vioif_rx_construct, 1717 vioif_rx_destruct, NULL, sc, NULL, KM_SLEEP); 1718 if (sc->sc_rxbuf_cache == NULL) { 1719 dev_err(sc->sc_dev, CE_WARN, "Can't allocate the buffer cache"); 1720 goto exit_cache; 1721 } 1722 1723 ret = vioif_register_ints(sc); 1724 if (ret) { 1725 dev_err(sc->sc_dev, CE_WARN, 1726 "Failed to allocate interrupt(s)!"); 1727 goto exit_ints; 1728 } 1729 1730 /* 1731 * Register layout determined, can now access the 1732 * device-specific bits 1733 */ 1734 vioif_get_mac(sc); 1735 1736 sc->sc_rx_vq = virtio_alloc_vq(&sc->sc_virtio, 0, 1737 VIOIF_RX_QLEN, VIOIF_INDIRECT_MAX, "rx"); 1738 if (!sc->sc_rx_vq) 1739 goto exit_alloc1; 1740 virtio_stop_vq_intr(sc->sc_rx_vq); 1741 1742 sc->sc_tx_vq = virtio_alloc_vq(&sc->sc_virtio, 1, 1743 VIOIF_TX_QLEN, VIOIF_INDIRECT_MAX, "tx"); 1744 if (!sc->sc_rx_vq) 1745 goto exit_alloc2; 1746 virtio_stop_vq_intr(sc->sc_tx_vq); 1747 1748 if (vioif_has_feature(sc, VIRTIO_NET_F_CTRL_VQ)) { 1749 sc->sc_ctrl_vq = virtio_alloc_vq(&sc->sc_virtio, 2, 1750 VIOIF_CTRL_QLEN, 0, "ctrl"); 1751 if (!sc->sc_ctrl_vq) { 1752 goto exit_alloc3; 1753 } 1754 virtio_stop_vq_intr(sc->sc_ctrl_vq); 1755 } 1756 1757 virtio_set_status(&sc->sc_virtio, 1758 VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK); 1759 1760 sc->sc_rxloan = 0; 1761 1762 /* set some reasonable-small default values */ 1763 sc->sc_rxcopy_thresh = 300; 1764 sc->sc_txcopy_thresh = 300; 1765 sc->sc_mtu = ETHERMTU; 1766 1767 vioif_check_features(sc); 1768 1769 if (vioif_alloc_mems(sc)) 1770 goto exit_alloc_mems; 1771 1772 if ((macp = mac_alloc(MAC_VERSION)) == NULL) { 1773 dev_err(devinfo, CE_WARN, "Failed to allocate a mac_register"); 1774 goto exit_macalloc; 1775 } 1776 1777 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 1778 macp->m_driver = sc; 1779 macp->m_dip = devinfo; 1780 macp->m_src_addr = sc->sc_mac; 1781 macp->m_callbacks = &vioif_m_callbacks; 1782 macp->m_min_sdu = 0; 1783 macp->m_max_sdu = sc->sc_mtu; 1784 macp->m_margin = VLAN_TAGSZ; 1785 macp->m_priv_props = vioif_priv_props; 1786 1787 sc->sc_macp = macp; 1788 1789 /* Pre-fill the rx ring. */ 1790 (void) vioif_populate_rx(sc, KM_SLEEP); 1791 1792 ret = mac_register(macp, &sc->sc_mac_handle); 1793 if (ret != 0) { 1794 dev_err(devinfo, CE_WARN, "vioif_attach: " 1795 "mac_register() failed, ret=%d", ret); 1796 goto exit_register; 1797 } 1798 1799 ret = virtio_enable_ints(&sc->sc_virtio); 1800 if (ret) { 1801 dev_err(devinfo, CE_WARN, "Failed to enable interrupts"); 1802 goto exit_enable_ints; 1803 } 1804 1805 mac_link_update(sc->sc_mac_handle, LINK_STATE_UP); 1806 return (DDI_SUCCESS); 1807 1808 exit_enable_ints: 1809 (void) mac_unregister(sc->sc_mac_handle); 1810 exit_register: 1811 mac_free(macp); 1812 exit_macalloc: 1813 vioif_free_mems(sc); 1814 exit_alloc_mems: 1815 virtio_release_ints(&sc->sc_virtio); 1816 if (sc->sc_ctrl_vq) 1817 virtio_free_vq(sc->sc_ctrl_vq); 1818 exit_alloc3: 1819 virtio_free_vq(sc->sc_tx_vq); 1820 exit_alloc2: 1821 virtio_free_vq(sc->sc_rx_vq); 1822 exit_alloc1: 1823 exit_ints: 1824 kmem_cache_destroy(sc->sc_rxbuf_cache); 1825 exit_cache: 1826 exit_features: 1827 virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_FAILED); 1828 ddi_regs_map_free(&sc->sc_virtio.sc_ioh); 1829 exit_intrstat: 1830 exit_map: 1831 kstat_delete(sc->sc_intrstat); 1832 kmem_free(sc, sizeof (struct vioif_softc)); 1833 exit: 1834 return (DDI_FAILURE); 1835 } 1836 1837 static int 1838 vioif_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd) 1839 { 1840 struct vioif_softc *sc; 1841 1842 if ((sc = ddi_get_driver_private(devinfo)) == NULL) 1843 return (DDI_FAILURE); 1844 1845 switch (cmd) { 1846 case DDI_DETACH: 1847 break; 1848 1849 case DDI_PM_SUSPEND: 1850 /* We do not support suspend/resume for vioif. */ 1851 return (DDI_FAILURE); 1852 1853 default: 1854 return (DDI_FAILURE); 1855 } 1856 1857 if (sc->sc_rxloan) { 1858 dev_err(devinfo, CE_WARN, "!Some rx buffers are still upstream," 1859 " not detaching."); 1860 return (DDI_FAILURE); 1861 } 1862 1863 virtio_stop_vq_intr(sc->sc_rx_vq); 1864 virtio_stop_vq_intr(sc->sc_tx_vq); 1865 1866 virtio_release_ints(&sc->sc_virtio); 1867 1868 if (mac_unregister(sc->sc_mac_handle)) { 1869 return (DDI_FAILURE); 1870 } 1871 1872 mac_free(sc->sc_macp); 1873 1874 vioif_free_mems(sc); 1875 virtio_free_vq(sc->sc_rx_vq); 1876 virtio_free_vq(sc->sc_tx_vq); 1877 1878 virtio_device_reset(&sc->sc_virtio); 1879 1880 ddi_regs_map_free(&sc->sc_virtio.sc_ioh); 1881 1882 kmem_cache_destroy(sc->sc_rxbuf_cache); 1883 kstat_delete(sc->sc_intrstat); 1884 kmem_free(sc, sizeof (struct vioif_softc)); 1885 1886 return (DDI_SUCCESS); 1887 } 1888 1889 static int 1890 vioif_quiesce(dev_info_t *devinfo) 1891 { 1892 struct vioif_softc *sc; 1893 1894 if ((sc = ddi_get_driver_private(devinfo)) == NULL) 1895 return (DDI_FAILURE); 1896 1897 virtio_stop_vq_intr(sc->sc_rx_vq); 1898 virtio_stop_vq_intr(sc->sc_tx_vq); 1899 virtio_device_reset(&sc->sc_virtio); 1900 1901 return (DDI_SUCCESS); 1902 } 1903 1904 int 1905 _init(void) 1906 { 1907 int ret = 0; 1908 1909 mac_init_ops(&vioif_ops, "vioif"); 1910 1911 ret = mod_install(&modlinkage); 1912 if (ret != DDI_SUCCESS) { 1913 mac_fini_ops(&vioif_ops); 1914 return (ret); 1915 } 1916 1917 return (0); 1918 } 1919 1920 int 1921 _fini(void) 1922 { 1923 int ret; 1924 1925 ret = mod_remove(&modlinkage); 1926 if (ret == DDI_SUCCESS) { 1927 mac_fini_ops(&vioif_ops); 1928 } 1929 1930 return (ret); 1931 } 1932 1933 int 1934 _info(struct modinfo *pModinfo) 1935 { 1936 return (mod_info(&modlinkage, pModinfo)); 1937 } 1938