xref: /illumos-gate/usr/src/uts/common/io/vioif/vioif.c (revision a0fb1590788f4dcbcee3fabaeb082ab7d1ad4203)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2013 Nexenta Inc.  All rights reserved.
14  * Copyright (c) 2014, 2016 by Delphix. All rights reserved.
15  */
16 
17 /* Based on the NetBSD virtio driver by Minoura Makoto. */
18 /*
19  * Copyright (c) 2010 Minoura Makoto.
20  * All rights reserved.
21  *
22  * Redistribution and use in source and binary forms, with or without
23  * modification, are permitted provided that the following conditions
24  * are met:
25  * 1. Redistributions of source code must retain the above copyright
26  *    notice, this list of conditions and the following disclaimer.
27  * 2. Redistributions in binary form must reproduce the above copyright
28  *    notice, this list of conditions and the following disclaimer in the
29  *    documentation and/or other materials provided with the distribution.
30  *
31  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
32  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
33  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
34  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
35  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
37  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
38  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
39  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
40  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41  */
42 
43 #include <sys/types.h>
44 #include <sys/errno.h>
45 #include <sys/param.h>
46 #include <sys/stropts.h>
47 #include <sys/stream.h>
48 #include <sys/strsubr.h>
49 #include <sys/kmem.h>
50 #include <sys/conf.h>
51 #include <sys/devops.h>
52 #include <sys/ksynch.h>
53 #include <sys/stat.h>
54 #include <sys/modctl.h>
55 #include <sys/debug.h>
56 #include <sys/pci.h>
57 #include <sys/ethernet.h>
58 #include <sys/vlan.h>
59 
60 #include <sys/dlpi.h>
61 #include <sys/taskq.h>
62 #include <sys/cyclic.h>
63 
64 #include <sys/pattr.h>
65 #include <sys/strsun.h>
66 
67 #include <sys/random.h>
68 #include <sys/sysmacros.h>
69 #include <sys/stream.h>
70 
71 #include <sys/mac.h>
72 #include <sys/mac_provider.h>
73 #include <sys/mac_ether.h>
74 
75 #include "virtiovar.h"
76 #include "virtioreg.h"
77 
78 /* Configuration registers */
79 #define	VIRTIO_NET_CONFIG_MAC		0 /* 8bit x 6byte */
80 #define	VIRTIO_NET_CONFIG_STATUS	6 /* 16bit */
81 
82 /* Feature bits */
83 #define	VIRTIO_NET_F_CSUM	(1 << 0) /* Host handles pkts w/ partial csum */
84 #define	VIRTIO_NET_F_GUEST_CSUM	(1 << 1) /* Guest handles pkts w/ part csum */
85 #define	VIRTIO_NET_F_MAC	(1 << 5) /* Host has given MAC address. */
86 #define	VIRTIO_NET_F_GSO	(1 << 6) /* Host handles pkts w/ any GSO type */
87 #define	VIRTIO_NET_F_GUEST_TSO4	(1 << 7) /* Guest can handle TSOv4 in. */
88 #define	VIRTIO_NET_F_GUEST_TSO6	(1 << 8) /* Guest can handle TSOv6 in. */
89 #define	VIRTIO_NET_F_GUEST_ECN	(1 << 9) /* Guest can handle TSO[6] w/ ECN in */
90 #define	VIRTIO_NET_F_GUEST_UFO	(1 << 10) /* Guest can handle UFO in. */
91 #define	VIRTIO_NET_F_HOST_TSO4	(1 << 11) /* Host can handle TSOv4 in. */
92 #define	VIRTIO_NET_F_HOST_TSO6	(1 << 12) /* Host can handle TSOv6 in. */
93 #define	VIRTIO_NET_F_HOST_ECN	(1 << 13) /* Host can handle TSO[6] w/ ECN in */
94 #define	VIRTIO_NET_F_HOST_UFO	(1 << 14) /* Host can handle UFO in. */
95 #define	VIRTIO_NET_F_MRG_RXBUF	(1 << 15) /* Host can merge receive buffers. */
96 #define	VIRTIO_NET_F_STATUS	(1 << 16) /* Config.status available */
97 #define	VIRTIO_NET_F_CTRL_VQ	(1 << 17) /* Control channel available */
98 #define	VIRTIO_NET_F_CTRL_RX	(1 << 18) /* Control channel RX mode support */
99 #define	VIRTIO_NET_F_CTRL_VLAN	(1 << 19) /* Control channel VLAN filtering */
100 #define	VIRTIO_NET_F_CTRL_RX_EXTRA (1 << 20) /* Extra RX mode control support */
101 
102 #define	VIRTIO_NET_FEATURE_BITS \
103 	"\020" \
104 	"\1CSUM" \
105 	"\2GUEST_CSUM" \
106 	"\6MAC" \
107 	"\7GSO" \
108 	"\10GUEST_TSO4" \
109 	"\11GUEST_TSO6" \
110 	"\12GUEST_ECN" \
111 	"\13GUEST_UFO" \
112 	"\14HOST_TSO4" \
113 	"\15HOST_TSO6" \
114 	"\16HOST_ECN" \
115 	"\17HOST_UFO" \
116 	"\20MRG_RXBUF" \
117 	"\21STATUS" \
118 	"\22CTRL_VQ" \
119 	"\23CTRL_RX" \
120 	"\24CTRL_VLAN" \
121 	"\25CTRL_RX_EXTRA"
122 
123 /* Status */
124 #define	VIRTIO_NET_S_LINK_UP	1
125 
126 #pragma pack(1)
127 /* Packet header structure */
128 struct virtio_net_hdr {
129 	uint8_t		flags;
130 	uint8_t		gso_type;
131 	uint16_t	hdr_len;
132 	uint16_t	gso_size;
133 	uint16_t	csum_start;
134 	uint16_t	csum_offset;
135 };
136 #pragma pack()
137 
138 #define	VIRTIO_NET_HDR_F_NEEDS_CSUM	1 /* flags */
139 #define	VIRTIO_NET_HDR_GSO_NONE		0 /* gso_type */
140 #define	VIRTIO_NET_HDR_GSO_TCPV4	1 /* gso_type */
141 #define	VIRTIO_NET_HDR_GSO_UDP		3 /* gso_type */
142 #define	VIRTIO_NET_HDR_GSO_TCPV6	4 /* gso_type */
143 #define	VIRTIO_NET_HDR_GSO_ECN		0x80 /* gso_type, |'ed */
144 
145 
146 /* Control virtqueue */
147 #pragma pack(1)
148 struct virtio_net_ctrl_cmd {
149 	uint8_t	class;
150 	uint8_t	command;
151 };
152 #pragma pack()
153 
154 #define	VIRTIO_NET_CTRL_RX		0
155 #define	VIRTIO_NET_CTRL_RX_PROMISC	0
156 #define	VIRTIO_NET_CTRL_RX_ALLMULTI	1
157 
158 #define	VIRTIO_NET_CTRL_MAC		1
159 #define	VIRTIO_NET_CTRL_MAC_TABLE_SET	0
160 
161 #define	VIRTIO_NET_CTRL_VLAN		2
162 #define	VIRTIO_NET_CTRL_VLAN_ADD	0
163 #define	VIRTIO_NET_CTRL_VLAN_DEL	1
164 
165 #pragma pack(1)
166 struct virtio_net_ctrl_status {
167 	uint8_t	ack;
168 };
169 
170 struct virtio_net_ctrl_rx {
171 	uint8_t	onoff;
172 };
173 
174 struct virtio_net_ctrl_mac_tbl {
175 	uint32_t nentries;
176 	uint8_t macs[][ETHERADDRL];
177 };
178 
179 struct virtio_net_ctrl_vlan {
180 	uint16_t id;
181 };
182 #pragma pack()
183 
184 static int vioif_quiesce(dev_info_t *);
185 static int vioif_attach(dev_info_t *, ddi_attach_cmd_t);
186 static int vioif_detach(dev_info_t *, ddi_detach_cmd_t);
187 
188 DDI_DEFINE_STREAM_OPS(vioif_ops,
189     nulldev,		/* identify */
190     nulldev,		/* probe */
191     vioif_attach,	/* attach */
192     vioif_detach,	/* detach */
193     nodev,		/* reset */
194     NULL,		/* cb_ops */
195     D_MP,		/* bus_ops */
196     NULL,		/* power */
197     vioif_quiesce	/* quiesce */);
198 
199 static char vioif_ident[] = "VirtIO ethernet driver";
200 
201 /* Standard Module linkage initialization for a Streams driver */
202 extern struct mod_ops mod_driverops;
203 
204 static struct modldrv modldrv = {
205 	&mod_driverops,		/* Type of module.  This one is a driver */
206 	vioif_ident,		/* short description */
207 	&vioif_ops		/* driver specific ops */
208 };
209 
210 static struct modlinkage modlinkage = {
211 	MODREV_1,
212 	{
213 		(void *)&modldrv,
214 		NULL,
215 	},
216 };
217 
218 ddi_device_acc_attr_t vioif_attr = {
219 	DDI_DEVICE_ATTR_V0,
220 	DDI_NEVERSWAP_ACC,	/* virtio is always native byte order */
221 	DDI_STORECACHING_OK_ACC,
222 	DDI_DEFAULT_ACC
223 };
224 
225 /*
226  * A mapping represents a binding for a single buffer that is contiguous in the
227  * virtual address space.
228  */
229 struct vioif_buf_mapping {
230 	caddr_t			vbm_buf;
231 	ddi_dma_handle_t	vbm_dmah;
232 	ddi_acc_handle_t	vbm_acch;
233 	ddi_dma_cookie_t	vbm_dmac;
234 	unsigned int		vbm_ncookies;
235 };
236 
237 /*
238  * Rx buffers can be loaned upstream, so the code has
239  * to allocate them dynamically.
240  */
241 struct vioif_rx_buf {
242 	struct vioif_softc	*rb_sc;
243 	frtn_t			rb_frtn;
244 
245 	struct vioif_buf_mapping rb_mapping;
246 };
247 
248 /*
249  * Tx buffers have two mapping types. One, "inline", is pre-allocated and is
250  * used to hold the virtio_net_header. Small packets also get copied there, as
251  * it's faster then mapping them. Bigger packets get mapped using the "external"
252  * mapping array. An array is used, because a packet may consist of muptiple
253  * fragments, so each fragment gets bound to an entry. According to my
254  * observations, the number of fragments does not exceed 2, but just in case,
255  * a bigger, up to VIOIF_INDIRECT_MAX - 1 array is allocated. To save resources,
256  * the dma handles are allocated lazily in the tx path.
257  */
258 struct vioif_tx_buf {
259 	mblk_t			*tb_mp;
260 
261 	/* inline buffer */
262 	struct vioif_buf_mapping tb_inline_mapping;
263 
264 	/* External buffers */
265 	struct vioif_buf_mapping *tb_external_mapping;
266 	unsigned int		tb_external_num;
267 };
268 
269 struct vioif_softc {
270 	dev_info_t		*sc_dev; /* mirrors virtio_softc->sc_dev */
271 	struct virtio_softc	sc_virtio;
272 
273 	mac_handle_t sc_mac_handle;
274 	mac_register_t *sc_macp;
275 
276 	struct virtqueue	*sc_rx_vq;
277 	struct virtqueue	*sc_tx_vq;
278 	struct virtqueue	*sc_ctrl_vq;
279 
280 	unsigned int		sc_tx_stopped:1;
281 
282 	/* Feature bits. */
283 	unsigned int		sc_rx_csum:1;
284 	unsigned int		sc_tx_csum:1;
285 	unsigned int		sc_tx_tso4:1;
286 
287 	int			sc_mtu;
288 	uint8_t			sc_mac[ETHERADDRL];
289 	/*
290 	 * For rx buffers, we keep a pointer array, because the buffers
291 	 * can be loaned upstream, and we have to repopulate the array with
292 	 * new members.
293 	 */
294 	struct vioif_rx_buf	**sc_rxbufs;
295 
296 	/*
297 	 * For tx, we just allocate an array of buffers. The packet can
298 	 * either be copied into the inline buffer, or the external mapping
299 	 * could be used to map the packet
300 	 */
301 	struct vioif_tx_buf	*sc_txbufs;
302 
303 	kstat_t			*sc_intrstat;
304 	/*
305 	 * We "loan" rx buffers upstream and reuse them after they are
306 	 * freed. This lets us avoid allocations in the hot path.
307 	 */
308 	kmem_cache_t		*sc_rxbuf_cache;
309 	ulong_t			sc_rxloan;
310 
311 	/* Copying small packets turns out to be faster then mapping them. */
312 	unsigned long		sc_rxcopy_thresh;
313 	unsigned long		sc_txcopy_thresh;
314 	/* Some statistic coming here */
315 	uint64_t		sc_ipackets;
316 	uint64_t		sc_opackets;
317 	uint64_t		sc_rbytes;
318 	uint64_t		sc_obytes;
319 	uint64_t		sc_brdcstxmt;
320 	uint64_t		sc_brdcstrcv;
321 	uint64_t		sc_multixmt;
322 	uint64_t		sc_multircv;
323 	uint64_t		sc_norecvbuf;
324 	uint64_t		sc_notxbuf;
325 	uint64_t		sc_ierrors;
326 	uint64_t		sc_oerrors;
327 };
328 
329 #define	ETHER_HEADER_LEN		sizeof (struct ether_header)
330 
331 /* MTU + the ethernet header. */
332 #define	MAX_PAYLOAD	65535
333 #define	MAX_MTU		(MAX_PAYLOAD - ETHER_HEADER_LEN)
334 #define	DEFAULT_MTU	ETHERMTU
335 
336 /*
337  * Yeah, we spend 8M per device. Turns out, there is no point
338  * being smart and using merged rx buffers (VIRTIO_NET_F_MRG_RXBUF),
339  * because vhost does not support them, and we expect to be used with
340  * vhost in production environment.
341  */
342 /* The buffer keeps both the packet data and the virtio_net_header. */
343 #define	VIOIF_RX_SIZE (MAX_PAYLOAD + sizeof (struct virtio_net_hdr))
344 
345 /*
346  * We win a bit on header alignment, but the host wins a lot
347  * more on moving aligned buffers. Might need more thought.
348  */
349 #define	VIOIF_IP_ALIGN 0
350 
351 /* Maximum number of indirect descriptors, somewhat arbitrary. */
352 #define	VIOIF_INDIRECT_MAX 128
353 
354 /*
355  * We pre-allocate a reasonably large buffer to copy small packets
356  * there. Bigger packets are mapped, packets with multiple
357  * cookies are mapped as indirect buffers.
358  */
359 #define	VIOIF_TX_INLINE_SIZE 2048
360 
361 /* Native queue size for all queues */
362 #define	VIOIF_RX_QLEN 0
363 #define	VIOIF_TX_QLEN 0
364 #define	VIOIF_CTRL_QLEN 0
365 
366 static uchar_t vioif_broadcast[ETHERADDRL] = {
367 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
368 };
369 
370 #define	VIOIF_TX_THRESH_MAX	640
371 #define	VIOIF_RX_THRESH_MAX	640
372 
373 #define	CACHE_NAME_SIZE	32
374 
375 static char vioif_txcopy_thresh[] =
376 	"vioif_txcopy_thresh";
377 static char vioif_rxcopy_thresh[] =
378 	"vioif_rxcopy_thresh";
379 
380 static char *vioif_priv_props[] = {
381 	vioif_txcopy_thresh,
382 	vioif_rxcopy_thresh,
383 	NULL
384 };
385 
386 /* Add up to ddi? */
387 static ddi_dma_cookie_t *
388 vioif_dma_curr_cookie(ddi_dma_handle_t dmah)
389 {
390 	ddi_dma_impl_t *dmah_impl = (void *) dmah;
391 	ASSERT(dmah_impl->dmai_cookie);
392 	return (dmah_impl->dmai_cookie);
393 }
394 
395 static void
396 vioif_dma_reset_cookie(ddi_dma_handle_t dmah, ddi_dma_cookie_t *dmac)
397 {
398 	ddi_dma_impl_t *dmah_impl = (void *) dmah;
399 	dmah_impl->dmai_cookie = dmac;
400 }
401 
402 static link_state_t
403 vioif_link_state(struct vioif_softc *sc)
404 {
405 	if (sc->sc_virtio.sc_features & VIRTIO_NET_F_STATUS) {
406 		if (virtio_read_device_config_2(&sc->sc_virtio,
407 		    VIRTIO_NET_CONFIG_STATUS) & VIRTIO_NET_S_LINK_UP) {
408 			return (LINK_STATE_UP);
409 		} else {
410 			return (LINK_STATE_DOWN);
411 		}
412 	}
413 
414 	return (LINK_STATE_UP);
415 }
416 
417 static ddi_dma_attr_t vioif_inline_buf_dma_attr = {
418 	DMA_ATTR_V0,		/* Version number */
419 	0,			/* low address */
420 	0xFFFFFFFFFFFFFFFF,	/* high address */
421 	0xFFFFFFFF,		/* counter register max */
422 	1,			/* page alignment */
423 	1,			/* burst sizes: 1 - 32 */
424 	1,			/* minimum transfer size */
425 	0xFFFFFFFF,		/* max transfer size */
426 	0xFFFFFFFFFFFFFFF,	/* address register max */
427 	1,			/* scatter-gather capacity */
428 	1,			/* device operates on bytes */
429 	0,			/* attr flag: set to 0 */
430 };
431 
432 static ddi_dma_attr_t vioif_mapped_buf_dma_attr = {
433 	DMA_ATTR_V0,		/* Version number */
434 	0,			/* low address */
435 	0xFFFFFFFFFFFFFFFF,	/* high address */
436 	0xFFFFFFFF,		/* counter register max */
437 	1,			/* page alignment */
438 	1,			/* burst sizes: 1 - 32 */
439 	1,			/* minimum transfer size */
440 	0xFFFFFFFF,		/* max transfer size */
441 	0xFFFFFFFFFFFFFFF,	/* address register max */
442 
443 	/* One entry is used for the virtio_net_hdr on the tx path */
444 	VIOIF_INDIRECT_MAX - 1,	/* scatter-gather capacity */
445 	1,			/* device operates on bytes */
446 	0,			/* attr flag: set to 0 */
447 };
448 
449 static ddi_device_acc_attr_t vioif_bufattr = {
450 	DDI_DEVICE_ATTR_V0,
451 	DDI_NEVERSWAP_ACC,
452 	DDI_STORECACHING_OK_ACC,
453 	DDI_DEFAULT_ACC
454 };
455 
456 static void
457 vioif_rx_free(caddr_t free_arg)
458 {
459 	struct vioif_rx_buf *buf = (void *) free_arg;
460 	struct vioif_softc *sc = buf->rb_sc;
461 
462 	kmem_cache_free(sc->sc_rxbuf_cache, buf);
463 	atomic_dec_ulong(&sc->sc_rxloan);
464 }
465 
466 static int
467 vioif_rx_construct(void *buffer, void *user_arg, int kmflags)
468 {
469 	_NOTE(ARGUNUSED(kmflags));
470 	struct vioif_softc *sc = user_arg;
471 	struct vioif_rx_buf *buf = buffer;
472 	size_t len;
473 
474 	if (ddi_dma_alloc_handle(sc->sc_dev, &vioif_mapped_buf_dma_attr,
475 	    DDI_DMA_SLEEP, NULL, &buf->rb_mapping.vbm_dmah)) {
476 		dev_err(sc->sc_dev, CE_WARN,
477 		    "Can't allocate dma handle for rx buffer");
478 		goto exit_handle;
479 	}
480 
481 	if (ddi_dma_mem_alloc(buf->rb_mapping.vbm_dmah,
482 	    VIOIF_RX_SIZE + sizeof (struct virtio_net_hdr),
483 	    &vioif_bufattr, DDI_DMA_STREAMING, DDI_DMA_SLEEP,
484 	    NULL, &buf->rb_mapping.vbm_buf, &len, &buf->rb_mapping.vbm_acch)) {
485 		dev_err(sc->sc_dev, CE_WARN,
486 		    "Can't allocate rx buffer");
487 		goto exit_alloc;
488 	}
489 	ASSERT(len >= VIOIF_RX_SIZE);
490 
491 	if (ddi_dma_addr_bind_handle(buf->rb_mapping.vbm_dmah, NULL,
492 	    buf->rb_mapping.vbm_buf, len, DDI_DMA_READ | DDI_DMA_STREAMING,
493 	    DDI_DMA_SLEEP, NULL, &buf->rb_mapping.vbm_dmac,
494 	    &buf->rb_mapping.vbm_ncookies)) {
495 		dev_err(sc->sc_dev, CE_WARN, "Can't bind tx buffer");
496 
497 		goto exit_bind;
498 	}
499 
500 	ASSERT(buf->rb_mapping.vbm_ncookies <= VIOIF_INDIRECT_MAX);
501 
502 	buf->rb_sc = sc;
503 	buf->rb_frtn.free_arg = (void *) buf;
504 	buf->rb_frtn.free_func = vioif_rx_free;
505 
506 	return (0);
507 exit_bind:
508 	ddi_dma_mem_free(&buf->rb_mapping.vbm_acch);
509 exit_alloc:
510 	ddi_dma_free_handle(&buf->rb_mapping.vbm_dmah);
511 exit_handle:
512 
513 	return (ENOMEM);
514 }
515 
516 static void
517 vioif_rx_destruct(void *buffer, void *user_arg)
518 {
519 	_NOTE(ARGUNUSED(user_arg));
520 	struct vioif_rx_buf *buf = buffer;
521 
522 	ASSERT(buf->rb_mapping.vbm_acch);
523 	ASSERT(buf->rb_mapping.vbm_acch);
524 
525 	(void) ddi_dma_unbind_handle(buf->rb_mapping.vbm_dmah);
526 	ddi_dma_mem_free(&buf->rb_mapping.vbm_acch);
527 	ddi_dma_free_handle(&buf->rb_mapping.vbm_dmah);
528 }
529 
530 static void
531 vioif_free_mems(struct vioif_softc *sc)
532 {
533 	int i;
534 
535 	for (i = 0; i < sc->sc_tx_vq->vq_num; i++) {
536 		struct vioif_tx_buf *buf = &sc->sc_txbufs[i];
537 		int j;
538 
539 		/* Tear down the internal mapping. */
540 
541 		ASSERT(buf->tb_inline_mapping.vbm_acch);
542 		ASSERT(buf->tb_inline_mapping.vbm_dmah);
543 
544 		(void) ddi_dma_unbind_handle(buf->tb_inline_mapping.vbm_dmah);
545 		ddi_dma_mem_free(&buf->tb_inline_mapping.vbm_acch);
546 		ddi_dma_free_handle(&buf->tb_inline_mapping.vbm_dmah);
547 
548 		/* We should not see any in-flight buffers at this point. */
549 		ASSERT(!buf->tb_mp);
550 
551 		/* Free all the dma hdnales we allocated lazily. */
552 		for (j = 0; buf->tb_external_mapping[j].vbm_dmah; j++)
553 			ddi_dma_free_handle(
554 			    &buf->tb_external_mapping[j].vbm_dmah);
555 		/* Free the external mapping array. */
556 		kmem_free(buf->tb_external_mapping,
557 		    sizeof (struct vioif_tx_buf) * VIOIF_INDIRECT_MAX - 1);
558 	}
559 
560 	kmem_free(sc->sc_txbufs, sizeof (struct vioif_tx_buf) *
561 	    sc->sc_tx_vq->vq_num);
562 
563 	for (i = 0; i < sc->sc_rx_vq->vq_num; i++) {
564 		struct vioif_rx_buf *buf = sc->sc_rxbufs[i];
565 
566 		if (buf)
567 			kmem_cache_free(sc->sc_rxbuf_cache, buf);
568 	}
569 	kmem_free(sc->sc_rxbufs, sizeof (struct vioif_rx_buf *) *
570 	    sc->sc_rx_vq->vq_num);
571 }
572 
573 static int
574 vioif_alloc_mems(struct vioif_softc *sc)
575 {
576 	int i, txqsize, rxqsize;
577 	size_t len;
578 	unsigned int nsegments;
579 
580 	txqsize = sc->sc_tx_vq->vq_num;
581 	rxqsize = sc->sc_rx_vq->vq_num;
582 
583 	sc->sc_txbufs = kmem_zalloc(sizeof (struct vioif_tx_buf) * txqsize,
584 	    KM_SLEEP);
585 	if (sc->sc_txbufs == NULL) {
586 		dev_err(sc->sc_dev, CE_WARN,
587 		    "Failed to allocate the tx buffers array");
588 		goto exit_txalloc;
589 	}
590 
591 	/*
592 	 * We don't allocate the rx vioif_bufs, just the pointers, as
593 	 * rx vioif_bufs can be loaned upstream, and we don't know the
594 	 * total number we need.
595 	 */
596 	sc->sc_rxbufs = kmem_zalloc(sizeof (struct vioif_rx_buf *) * rxqsize,
597 	    KM_SLEEP);
598 	if (sc->sc_rxbufs == NULL) {
599 		dev_err(sc->sc_dev, CE_WARN,
600 		    "Failed to allocate the rx buffers pointer array");
601 		goto exit_rxalloc;
602 	}
603 
604 	for (i = 0; i < txqsize; i++) {
605 		struct vioif_tx_buf *buf = &sc->sc_txbufs[i];
606 
607 		/* Allocate and bind an inline mapping. */
608 
609 		if (ddi_dma_alloc_handle(sc->sc_dev,
610 		    &vioif_inline_buf_dma_attr,
611 		    DDI_DMA_SLEEP, NULL, &buf->tb_inline_mapping.vbm_dmah)) {
612 
613 			dev_err(sc->sc_dev, CE_WARN,
614 			    "Can't allocate dma handle for tx buffer %d", i);
615 			goto exit_tx;
616 		}
617 
618 		if (ddi_dma_mem_alloc(buf->tb_inline_mapping.vbm_dmah,
619 		    VIOIF_TX_INLINE_SIZE, &vioif_bufattr, DDI_DMA_STREAMING,
620 		    DDI_DMA_SLEEP, NULL, &buf->tb_inline_mapping.vbm_buf,
621 		    &len, &buf->tb_inline_mapping.vbm_acch)) {
622 
623 			dev_err(sc->sc_dev, CE_WARN,
624 			    "Can't allocate tx buffer %d", i);
625 			goto exit_tx;
626 		}
627 		ASSERT(len >= VIOIF_TX_INLINE_SIZE);
628 
629 		if (ddi_dma_addr_bind_handle(buf->tb_inline_mapping.vbm_dmah,
630 		    NULL, buf->tb_inline_mapping.vbm_buf, len,
631 		    DDI_DMA_WRITE | DDI_DMA_STREAMING, DDI_DMA_SLEEP, NULL,
632 		    &buf->tb_inline_mapping.vbm_dmac, &nsegments)) {
633 
634 			dev_err(sc->sc_dev, CE_WARN,
635 			    "Can't bind tx buffer %d", i);
636 			goto exit_tx;
637 		}
638 
639 		/* We asked for a single segment */
640 		ASSERT(nsegments == 1);
641 
642 		/*
643 		 * We allow up to VIOIF_INDIRECT_MAX - 1 external mappings.
644 		 * In reality, I don't expect more then 2-3 used, but who
645 		 * knows.
646 		 */
647 		buf->tb_external_mapping = kmem_zalloc(
648 		    sizeof (struct vioif_tx_buf) * VIOIF_INDIRECT_MAX - 1,
649 		    KM_SLEEP);
650 
651 		/*
652 		 * The external mapping's dma handles are allocate lazily,
653 		 * as we don't expect most of them to be used..
654 		 */
655 	}
656 
657 	return (0);
658 
659 exit_tx:
660 	for (i = 0; i < txqsize; i++) {
661 		struct vioif_tx_buf *buf = &sc->sc_txbufs[i];
662 
663 		if (buf->tb_inline_mapping.vbm_dmah)
664 			(void) ddi_dma_unbind_handle(
665 			    buf->tb_inline_mapping.vbm_dmah);
666 
667 		if (buf->tb_inline_mapping.vbm_acch)
668 			ddi_dma_mem_free(
669 			    &buf->tb_inline_mapping.vbm_acch);
670 
671 		if (buf->tb_inline_mapping.vbm_dmah)
672 			ddi_dma_free_handle(
673 			    &buf->tb_inline_mapping.vbm_dmah);
674 
675 		if (buf->tb_external_mapping)
676 			kmem_free(buf->tb_external_mapping,
677 			    sizeof (struct vioif_tx_buf) *
678 			    VIOIF_INDIRECT_MAX - 1);
679 	}
680 
681 	kmem_free(sc->sc_rxbufs, sizeof (struct vioif_rx_buf) * rxqsize);
682 
683 exit_rxalloc:
684 	kmem_free(sc->sc_txbufs, sizeof (struct vioif_tx_buf) * txqsize);
685 exit_txalloc:
686 	return (ENOMEM);
687 }
688 
689 /* ARGSUSED */
690 int
691 vioif_multicst(void *arg, boolean_t add, const uint8_t *macaddr)
692 {
693 	return (DDI_SUCCESS);
694 }
695 
696 /* ARGSUSED */
697 int
698 vioif_promisc(void *arg, boolean_t on)
699 {
700 	return (DDI_SUCCESS);
701 }
702 
703 /* ARGSUSED */
704 int
705 vioif_unicst(void *arg, const uint8_t *macaddr)
706 {
707 	return (DDI_FAILURE);
708 }
709 
710 
711 static uint_t
712 vioif_add_rx(struct vioif_softc *sc, int kmflag)
713 {
714 	uint_t num_added = 0;
715 
716 	for (;;) {
717 		struct vq_entry *ve;
718 		struct vioif_rx_buf *buf;
719 
720 		ve = vq_alloc_entry(sc->sc_rx_vq);
721 		if (!ve) {
722 			/*
723 			 * Out of free descriptors - ring already full.
724 			 * It would be better to update sc_norxdescavail
725 			 * but MAC does not ask for this info, hence we
726 			 * update sc_norecvbuf.
727 			 */
728 			sc->sc_norecvbuf++;
729 			break;
730 		}
731 		buf = sc->sc_rxbufs[ve->qe_index];
732 
733 		if (!buf) {
734 			/* First run, allocate the buffer. */
735 			buf = kmem_cache_alloc(sc->sc_rxbuf_cache, kmflag);
736 			sc->sc_rxbufs[ve->qe_index] = buf;
737 		}
738 
739 		/* Still nothing? Bye. */
740 		if (!buf) {
741 			dev_err(sc->sc_dev, CE_WARN,
742 			    "Can't allocate rx buffer");
743 			sc->sc_norecvbuf++;
744 			vq_free_entry(sc->sc_rx_vq, ve);
745 			break;
746 		}
747 
748 		ASSERT(buf->rb_mapping.vbm_ncookies >= 1);
749 
750 		/*
751 		 * For an unknown reason, the virtio_net_hdr must be placed
752 		 * as a separate virtio queue entry.
753 		 */
754 		virtio_ve_add_indirect_buf(ve,
755 		    buf->rb_mapping.vbm_dmac.dmac_laddress,
756 		    sizeof (struct virtio_net_hdr), B_FALSE);
757 
758 		/* Add the rest of the first cookie. */
759 		virtio_ve_add_indirect_buf(ve,
760 		    buf->rb_mapping.vbm_dmac.dmac_laddress +
761 		    sizeof (struct virtio_net_hdr),
762 		    buf->rb_mapping.vbm_dmac.dmac_size -
763 		    sizeof (struct virtio_net_hdr), B_FALSE);
764 
765 		/*
766 		 * If the buffer consists of a single cookie (unlikely for a
767 		 * 64-k buffer), we are done. Otherwise, add the rest of the
768 		 * cookies using indirect entries.
769 		 */
770 		if (buf->rb_mapping.vbm_ncookies > 1) {
771 			ddi_dma_cookie_t *first_extra_dmac;
772 			ddi_dma_cookie_t dmac;
773 			first_extra_dmac =
774 			    vioif_dma_curr_cookie(buf->rb_mapping.vbm_dmah);
775 
776 			ddi_dma_nextcookie(buf->rb_mapping.vbm_dmah, &dmac);
777 			virtio_ve_add_cookie(ve, buf->rb_mapping.vbm_dmah,
778 			    dmac, buf->rb_mapping.vbm_ncookies - 1, B_FALSE);
779 			vioif_dma_reset_cookie(buf->rb_mapping.vbm_dmah,
780 			    first_extra_dmac);
781 		}
782 
783 		virtio_push_chain(ve, B_FALSE);
784 		num_added++;
785 	}
786 
787 	return (num_added);
788 }
789 
790 static uint_t
791 vioif_populate_rx(struct vioif_softc *sc, int kmflag)
792 {
793 	uint_t num_added = vioif_add_rx(sc, kmflag);
794 
795 	if (num_added > 0)
796 		virtio_sync_vq(sc->sc_rx_vq);
797 
798 	return (num_added);
799 }
800 
801 static uint_t
802 vioif_process_rx(struct vioif_softc *sc)
803 {
804 	struct vq_entry *ve;
805 	struct vioif_rx_buf *buf;
806 	mblk_t *mphead = NULL, *lastmp = NULL, *mp;
807 	uint32_t len;
808 	uint_t num_processed = 0;
809 
810 	while ((ve = virtio_pull_chain(sc->sc_rx_vq, &len))) {
811 
812 		buf = sc->sc_rxbufs[ve->qe_index];
813 		ASSERT(buf);
814 
815 		if (len < sizeof (struct virtio_net_hdr)) {
816 			dev_err(sc->sc_dev, CE_WARN, "RX: Cnain too small: %u",
817 			    len - (uint32_t)sizeof (struct virtio_net_hdr));
818 			sc->sc_ierrors++;
819 			virtio_free_chain(ve);
820 			continue;
821 		}
822 
823 		len -= sizeof (struct virtio_net_hdr);
824 		/*
825 		 * We copy small packets that happen to fit into a single
826 		 * cookie and reuse the buffers. For bigger ones, we loan
827 		 * the buffers upstream.
828 		 */
829 		if (len < sc->sc_rxcopy_thresh) {
830 			mp = allocb(len, 0);
831 			if (!mp) {
832 				sc->sc_norecvbuf++;
833 				sc->sc_ierrors++;
834 
835 				virtio_free_chain(ve);
836 				break;
837 			}
838 
839 			bcopy((char *)buf->rb_mapping.vbm_buf +
840 			    sizeof (struct virtio_net_hdr), mp->b_rptr, len);
841 			mp->b_wptr = mp->b_rptr + len;
842 
843 		} else {
844 			mp = desballoc((unsigned char *)
845 			    buf->rb_mapping.vbm_buf +
846 			    sizeof (struct virtio_net_hdr) +
847 			    VIOIF_IP_ALIGN, len, 0, &buf->rb_frtn);
848 			if (!mp) {
849 				sc->sc_norecvbuf++;
850 				sc->sc_ierrors++;
851 
852 				virtio_free_chain(ve);
853 				break;
854 			}
855 			mp->b_wptr = mp->b_rptr + len;
856 
857 			atomic_inc_ulong(&sc->sc_rxloan);
858 			/*
859 			 * Buffer loaned, we will have to allocate a new one
860 			 * for this slot.
861 			 */
862 			sc->sc_rxbufs[ve->qe_index] = NULL;
863 		}
864 
865 		/*
866 		 * virtio-net does not tell us if this packet is multicast
867 		 * or broadcast, so we have to check it.
868 		 */
869 		if (mp->b_rptr[0] & 0x1) {
870 			if (bcmp(mp->b_rptr, vioif_broadcast, ETHERADDRL) != 0)
871 				sc->sc_multircv++;
872 			else
873 				sc->sc_brdcstrcv++;
874 		}
875 
876 		sc->sc_rbytes += len;
877 		sc->sc_ipackets++;
878 
879 		virtio_free_chain(ve);
880 
881 		if (lastmp == NULL) {
882 			mphead = mp;
883 		} else {
884 			lastmp->b_next = mp;
885 		}
886 		lastmp = mp;
887 		num_processed++;
888 	}
889 
890 	if (mphead != NULL) {
891 		mac_rx(sc->sc_mac_handle, NULL, mphead);
892 	}
893 
894 	return (num_processed);
895 }
896 
897 static uint_t
898 vioif_reclaim_used_tx(struct vioif_softc *sc)
899 {
900 	struct vq_entry *ve;
901 	struct vioif_tx_buf *buf;
902 	uint32_t len;
903 	mblk_t *mp;
904 	uint_t num_reclaimed = 0;
905 
906 	while ((ve = virtio_pull_chain(sc->sc_tx_vq, &len))) {
907 		/* We don't chain descriptors for tx, so don't expect any. */
908 		ASSERT(!ve->qe_next);
909 
910 		buf = &sc->sc_txbufs[ve->qe_index];
911 		mp = buf->tb_mp;
912 		buf->tb_mp = NULL;
913 
914 		if (mp) {
915 			for (int i = 0; i < buf->tb_external_num; i++)
916 				(void) ddi_dma_unbind_handle(
917 				    buf->tb_external_mapping[i].vbm_dmah);
918 		}
919 
920 		virtio_free_chain(ve);
921 
922 		/* External mapping used, mp was not freed in vioif_send() */
923 		if (mp)
924 			freemsg(mp);
925 		num_reclaimed++;
926 	}
927 
928 	if (sc->sc_tx_stopped && num_reclaimed > 0) {
929 		sc->sc_tx_stopped = 0;
930 		mac_tx_update(sc->sc_mac_handle);
931 	}
932 
933 	return (num_reclaimed);
934 }
935 
936 /* sc will be used to update stat counters. */
937 /* ARGSUSED */
938 static inline void
939 vioif_tx_inline(struct vioif_softc *sc, struct vq_entry *ve, mblk_t *mp,
940     size_t msg_size)
941 {
942 	struct vioif_tx_buf *buf;
943 	buf = &sc->sc_txbufs[ve->qe_index];
944 
945 	ASSERT(buf);
946 
947 	/* Frees mp */
948 	mcopymsg(mp, buf->tb_inline_mapping.vbm_buf +
949 	    sizeof (struct virtio_net_hdr));
950 
951 	virtio_ve_add_indirect_buf(ve,
952 	    buf->tb_inline_mapping.vbm_dmac.dmac_laddress +
953 	    sizeof (struct virtio_net_hdr), msg_size, B_TRUE);
954 }
955 
956 static inline int
957 vioif_tx_lazy_handle_alloc(struct vioif_softc *sc, struct vioif_tx_buf *buf,
958     int i)
959 {
960 	int ret = DDI_SUCCESS;
961 
962 	if (!buf->tb_external_mapping[i].vbm_dmah) {
963 		ret = ddi_dma_alloc_handle(sc->sc_dev,
964 		    &vioif_mapped_buf_dma_attr, DDI_DMA_SLEEP, NULL,
965 		    &buf->tb_external_mapping[i].vbm_dmah);
966 		if (ret != DDI_SUCCESS) {
967 			dev_err(sc->sc_dev, CE_WARN,
968 			    "Can't allocate dma handle for external tx buffer");
969 		}
970 	}
971 
972 	return (ret);
973 }
974 
975 static inline int
976 vioif_tx_external(struct vioif_softc *sc, struct vq_entry *ve, mblk_t *mp,
977     size_t msg_size)
978 {
979 	_NOTE(ARGUNUSED(msg_size));
980 
981 	struct vioif_tx_buf *buf;
982 	mblk_t *nmp;
983 	int i, j;
984 	int ret = DDI_SUCCESS;
985 
986 	buf = &sc->sc_txbufs[ve->qe_index];
987 
988 	ASSERT(buf);
989 
990 	buf->tb_external_num = 0;
991 	i = 0;
992 	nmp = mp;
993 
994 	while (nmp) {
995 		size_t len;
996 		ddi_dma_cookie_t dmac;
997 		unsigned int ncookies;
998 
999 		len = MBLKL(nmp);
1000 		/*
1001 		 * For some reason, the network stack can
1002 		 * actually send us zero-length fragments.
1003 		 */
1004 		if (len == 0) {
1005 			nmp = nmp->b_cont;
1006 			continue;
1007 		}
1008 
1009 		ret = vioif_tx_lazy_handle_alloc(sc, buf, i);
1010 		if (ret != DDI_SUCCESS) {
1011 			sc->sc_notxbuf++;
1012 			sc->sc_oerrors++;
1013 			goto exit_lazy_alloc;
1014 		}
1015 		ret = ddi_dma_addr_bind_handle(
1016 		    buf->tb_external_mapping[i].vbm_dmah, NULL,
1017 		    (caddr_t)nmp->b_rptr, len,
1018 		    DDI_DMA_WRITE | DDI_DMA_STREAMING,
1019 		    DDI_DMA_SLEEP, NULL, &dmac, &ncookies);
1020 
1021 		if (ret != DDI_SUCCESS) {
1022 			sc->sc_oerrors++;
1023 			dev_err(sc->sc_dev, CE_NOTE,
1024 			    "TX: Failed to bind external handle");
1025 			goto exit_bind;
1026 		}
1027 
1028 		/* Check if we still fit into the indirect table. */
1029 		if (virtio_ve_indirect_available(ve) < ncookies) {
1030 			dev_err(sc->sc_dev, CE_NOTE,
1031 			    "TX: Indirect descriptor table limit reached."
1032 			    " It took %d fragments.", i);
1033 			sc->sc_notxbuf++;
1034 			sc->sc_oerrors++;
1035 
1036 			ret = DDI_FAILURE;
1037 			goto exit_limit;
1038 		}
1039 
1040 		virtio_ve_add_cookie(ve, buf->tb_external_mapping[i].vbm_dmah,
1041 		    dmac, ncookies, B_TRUE);
1042 
1043 		nmp = nmp->b_cont;
1044 		i++;
1045 	}
1046 
1047 	buf->tb_external_num = i;
1048 	/* Save the mp to free it when the packet is sent. */
1049 	buf->tb_mp = mp;
1050 
1051 	return (DDI_SUCCESS);
1052 
1053 exit_limit:
1054 exit_bind:
1055 exit_lazy_alloc:
1056 
1057 	for (j = 0; j < i; j++) {
1058 		(void) ddi_dma_unbind_handle(
1059 		    buf->tb_external_mapping[j].vbm_dmah);
1060 	}
1061 
1062 	return (ret);
1063 }
1064 
1065 static boolean_t
1066 vioif_send(struct vioif_softc *sc, mblk_t *mp)
1067 {
1068 	struct vq_entry *ve;
1069 	struct vioif_tx_buf *buf;
1070 	struct virtio_net_hdr *net_header = NULL;
1071 	size_t msg_size = 0;
1072 	uint32_t csum_start;
1073 	uint32_t csum_stuff;
1074 	uint32_t csum_flags;
1075 	uint32_t lso_flags;
1076 	uint32_t lso_mss;
1077 	mblk_t *nmp;
1078 	int ret;
1079 	boolean_t lso_required = B_FALSE;
1080 
1081 	for (nmp = mp; nmp; nmp = nmp->b_cont)
1082 		msg_size += MBLKL(nmp);
1083 
1084 	if (sc->sc_tx_tso4) {
1085 		mac_lso_get(mp, &lso_mss, &lso_flags);
1086 		lso_required = (lso_flags & HW_LSO);
1087 	}
1088 
1089 	ve = vq_alloc_entry(sc->sc_tx_vq);
1090 
1091 	if (!ve) {
1092 		sc->sc_notxbuf++;
1093 		/* Out of free descriptors - try later. */
1094 		return (B_FALSE);
1095 	}
1096 	buf = &sc->sc_txbufs[ve->qe_index];
1097 
1098 	/* Use the inline buffer of the first entry for the virtio_net_hdr. */
1099 	(void) memset(buf->tb_inline_mapping.vbm_buf, 0,
1100 	    sizeof (struct virtio_net_hdr));
1101 
1102 	net_header = (struct virtio_net_hdr *)buf->tb_inline_mapping.vbm_buf;
1103 
1104 	mac_hcksum_get(mp, &csum_start, &csum_stuff, NULL,
1105 	    NULL, &csum_flags);
1106 
1107 	/* They want us to do the TCP/UDP csum calculation. */
1108 	if (csum_flags & HCK_PARTIALCKSUM) {
1109 		struct ether_header *eth_header;
1110 		int eth_hsize;
1111 
1112 		/* Did we ask for it? */
1113 		ASSERT(sc->sc_tx_csum);
1114 
1115 		/* We only asked for partial csum packets. */
1116 		ASSERT(!(csum_flags & HCK_IPV4_HDRCKSUM));
1117 		ASSERT(!(csum_flags & HCK_FULLCKSUM));
1118 
1119 		eth_header = (void *) mp->b_rptr;
1120 		if (eth_header->ether_type == htons(ETHERTYPE_VLAN)) {
1121 			eth_hsize = sizeof (struct ether_vlan_header);
1122 		} else {
1123 			eth_hsize = sizeof (struct ether_header);
1124 		}
1125 		net_header->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
1126 		net_header->csum_start = eth_hsize + csum_start;
1127 		net_header->csum_offset = csum_stuff - csum_start;
1128 	}
1129 
1130 	/* setup LSO fields if required */
1131 	if (lso_required) {
1132 		net_header->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1133 		net_header->gso_size = (uint16_t)lso_mss;
1134 	}
1135 
1136 	virtio_ve_add_indirect_buf(ve,
1137 	    buf->tb_inline_mapping.vbm_dmac.dmac_laddress,
1138 	    sizeof (struct virtio_net_hdr), B_TRUE);
1139 
1140 	/* meanwhile update the statistic */
1141 	if (mp->b_rptr[0] & 0x1) {
1142 		if (bcmp(mp->b_rptr, vioif_broadcast, ETHERADDRL) != 0)
1143 				sc->sc_multixmt++;
1144 			else
1145 				sc->sc_brdcstxmt++;
1146 	}
1147 
1148 	/*
1149 	 * We copy small packets into the inline buffer. The bigger ones
1150 	 * get mapped using the mapped buffer.
1151 	 */
1152 	if (msg_size < sc->sc_txcopy_thresh) {
1153 		vioif_tx_inline(sc, ve, mp, msg_size);
1154 	} else {
1155 		/* statistic gets updated by vioif_tx_external when fail */
1156 		ret = vioif_tx_external(sc, ve, mp, msg_size);
1157 		if (ret != DDI_SUCCESS)
1158 			goto exit_tx_external;
1159 	}
1160 
1161 	virtio_push_chain(ve, B_TRUE);
1162 
1163 	sc->sc_opackets++;
1164 	sc->sc_obytes += msg_size;
1165 
1166 	return (B_TRUE);
1167 
1168 exit_tx_external:
1169 
1170 	vq_free_entry(sc->sc_tx_vq, ve);
1171 	/*
1172 	 * vioif_tx_external can fail when the buffer does not fit into the
1173 	 * indirect descriptor table. Free the mp. I don't expect this ever
1174 	 * to happen.
1175 	 */
1176 	freemsg(mp);
1177 
1178 	return (B_TRUE);
1179 }
1180 
1181 mblk_t *
1182 vioif_tx(void *arg, mblk_t *mp)
1183 {
1184 	struct vioif_softc *sc = arg;
1185 	mblk_t	*nmp;
1186 
1187 	while (mp != NULL) {
1188 		nmp = mp->b_next;
1189 		mp->b_next = NULL;
1190 
1191 		if (!vioif_send(sc, mp)) {
1192 			sc->sc_tx_stopped = 1;
1193 			mp->b_next = nmp;
1194 			break;
1195 		}
1196 		mp = nmp;
1197 	}
1198 
1199 	return (mp);
1200 }
1201 
1202 int
1203 vioif_start(void *arg)
1204 {
1205 	struct vioif_softc *sc = arg;
1206 	struct vq_entry *ve;
1207 	uint32_t len;
1208 
1209 	mac_link_update(sc->sc_mac_handle,
1210 	    vioif_link_state(sc));
1211 
1212 	virtio_start_vq_intr(sc->sc_rx_vq);
1213 
1214 	/*
1215 	 * Don't start interrupts on sc_tx_vq. We use VIRTIO_F_NOTIFY_ON_EMPTY,
1216 	 * so the device will send a transmit interrupt when the queue is empty
1217 	 * and we can reclaim it in one sweep.
1218 	 */
1219 
1220 	/*
1221 	 * Clear any data that arrived early on the receive queue and populate
1222 	 * it with free buffers that the device can use moving forward.
1223 	 */
1224 	while ((ve = virtio_pull_chain(sc->sc_rx_vq, &len)) != NULL) {
1225 		virtio_free_chain(ve);
1226 	}
1227 	(void) vioif_populate_rx(sc, KM_SLEEP);
1228 
1229 	return (DDI_SUCCESS);
1230 }
1231 
1232 void
1233 vioif_stop(void *arg)
1234 {
1235 	struct vioif_softc *sc = arg;
1236 
1237 	virtio_stop_vq_intr(sc->sc_rx_vq);
1238 }
1239 
1240 /* ARGSUSED */
1241 static int
1242 vioif_stat(void *arg, uint_t stat, uint64_t *val)
1243 {
1244 	struct vioif_softc *sc = arg;
1245 
1246 	switch (stat) {
1247 	case MAC_STAT_IERRORS:
1248 		*val = sc->sc_ierrors;
1249 		break;
1250 	case MAC_STAT_OERRORS:
1251 		*val = sc->sc_oerrors;
1252 		break;
1253 	case MAC_STAT_MULTIRCV:
1254 		*val = sc->sc_multircv;
1255 		break;
1256 	case MAC_STAT_BRDCSTRCV:
1257 		*val = sc->sc_brdcstrcv;
1258 		break;
1259 	case MAC_STAT_MULTIXMT:
1260 		*val = sc->sc_multixmt;
1261 		break;
1262 	case MAC_STAT_BRDCSTXMT:
1263 		*val = sc->sc_brdcstxmt;
1264 		break;
1265 	case MAC_STAT_IPACKETS:
1266 		*val = sc->sc_ipackets;
1267 		break;
1268 	case MAC_STAT_RBYTES:
1269 		*val = sc->sc_rbytes;
1270 		break;
1271 	case MAC_STAT_OPACKETS:
1272 		*val = sc->sc_opackets;
1273 		break;
1274 	case MAC_STAT_OBYTES:
1275 		*val = sc->sc_obytes;
1276 		break;
1277 	case MAC_STAT_NORCVBUF:
1278 		*val = sc->sc_norecvbuf;
1279 		break;
1280 	case MAC_STAT_NOXMTBUF:
1281 		*val = sc->sc_notxbuf;
1282 		break;
1283 	case MAC_STAT_IFSPEED:
1284 		/* always 1 Gbit */
1285 		*val = 1000000000ULL;
1286 		break;
1287 	case ETHER_STAT_LINK_DUPLEX:
1288 		/* virtual device, always full-duplex */
1289 		*val = LINK_DUPLEX_FULL;
1290 		break;
1291 
1292 	default:
1293 		return (ENOTSUP);
1294 	}
1295 
1296 	return (DDI_SUCCESS);
1297 }
1298 
1299 static int
1300 vioif_set_prop_private(struct vioif_softc *sc, const char *pr_name,
1301     uint_t pr_valsize, const void *pr_val)
1302 {
1303 	_NOTE(ARGUNUSED(pr_valsize));
1304 
1305 	long result;
1306 
1307 	if (strcmp(pr_name, vioif_txcopy_thresh) == 0) {
1308 
1309 		if (pr_val == NULL)
1310 			return (EINVAL);
1311 
1312 		(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
1313 
1314 		if (result < 0 || result > VIOIF_TX_THRESH_MAX)
1315 			return (EINVAL);
1316 		sc->sc_txcopy_thresh = result;
1317 	}
1318 	if (strcmp(pr_name, vioif_rxcopy_thresh) == 0) {
1319 
1320 		if (pr_val == NULL)
1321 			return (EINVAL);
1322 
1323 		(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
1324 
1325 		if (result < 0 || result > VIOIF_RX_THRESH_MAX)
1326 			return (EINVAL);
1327 		sc->sc_rxcopy_thresh = result;
1328 	}
1329 	return (0);
1330 }
1331 
1332 static int
1333 vioif_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
1334     uint_t pr_valsize, const void *pr_val)
1335 {
1336 	struct vioif_softc *sc = arg;
1337 	const uint32_t *new_mtu;
1338 	int err;
1339 
1340 	switch (pr_num) {
1341 	case MAC_PROP_MTU:
1342 		new_mtu = pr_val;
1343 
1344 		if (*new_mtu > MAX_MTU) {
1345 			return (EINVAL);
1346 		}
1347 
1348 		err = mac_maxsdu_update(sc->sc_mac_handle, *new_mtu);
1349 		if (err) {
1350 			return (err);
1351 		}
1352 		break;
1353 	case MAC_PROP_PRIVATE:
1354 		err = vioif_set_prop_private(sc, pr_name,
1355 		    pr_valsize, pr_val);
1356 		if (err)
1357 			return (err);
1358 		break;
1359 	default:
1360 		return (ENOTSUP);
1361 	}
1362 
1363 	return (0);
1364 }
1365 
1366 static int
1367 vioif_get_prop_private(struct vioif_softc *sc, const char *pr_name,
1368     uint_t pr_valsize, void *pr_val)
1369 {
1370 	int err = ENOTSUP;
1371 	int value;
1372 
1373 	if (strcmp(pr_name, vioif_txcopy_thresh) == 0) {
1374 
1375 		value = sc->sc_txcopy_thresh;
1376 		err = 0;
1377 		goto done;
1378 	}
1379 	if (strcmp(pr_name, vioif_rxcopy_thresh) == 0) {
1380 
1381 		value = sc->sc_rxcopy_thresh;
1382 		err = 0;
1383 		goto done;
1384 	}
1385 done:
1386 	if (err == 0) {
1387 		(void) snprintf(pr_val, pr_valsize, "%d", value);
1388 	}
1389 	return (err);
1390 }
1391 
1392 static int
1393 vioif_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
1394     uint_t pr_valsize, void *pr_val)
1395 {
1396 	struct vioif_softc *sc = arg;
1397 	int err = ENOTSUP;
1398 
1399 	switch (pr_num) {
1400 	case MAC_PROP_PRIVATE:
1401 		err = vioif_get_prop_private(sc, pr_name,
1402 		    pr_valsize, pr_val);
1403 		break;
1404 	default:
1405 		break;
1406 	}
1407 	return (err);
1408 }
1409 
1410 static void
1411 vioif_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num,
1412     mac_prop_info_handle_t prh)
1413 {
1414 	struct vioif_softc *sc = arg;
1415 	char valstr[64];
1416 	int value;
1417 
1418 	switch (pr_num) {
1419 	case MAC_PROP_MTU:
1420 		mac_prop_info_set_range_uint32(prh, ETHERMIN, MAX_MTU);
1421 		break;
1422 
1423 	case MAC_PROP_PRIVATE:
1424 		bzero(valstr, sizeof (valstr));
1425 		if (strcmp(pr_name, vioif_txcopy_thresh) == 0) {
1426 
1427 			value = sc->sc_txcopy_thresh;
1428 		} else	if (strcmp(pr_name,
1429 		    vioif_rxcopy_thresh) == 0) {
1430 			value = sc->sc_rxcopy_thresh;
1431 		} else {
1432 			return;
1433 		}
1434 		(void) snprintf(valstr, sizeof (valstr), "%d", value);
1435 		break;
1436 
1437 	default:
1438 		break;
1439 	}
1440 }
1441 
1442 static boolean_t
1443 vioif_getcapab(void *arg, mac_capab_t cap, void *cap_data)
1444 {
1445 	struct vioif_softc *sc = arg;
1446 
1447 	switch (cap) {
1448 	case MAC_CAPAB_HCKSUM:
1449 		if (sc->sc_tx_csum) {
1450 			uint32_t *txflags = cap_data;
1451 
1452 			*txflags = HCKSUM_INET_PARTIAL;
1453 			return (B_TRUE);
1454 		}
1455 		return (B_FALSE);
1456 	case MAC_CAPAB_LSO:
1457 		if (sc->sc_tx_tso4) {
1458 			mac_capab_lso_t *cap_lso = cap_data;
1459 
1460 			cap_lso->lso_flags = LSO_TX_BASIC_TCP_IPV4;
1461 			cap_lso->lso_basic_tcp_ipv4.lso_max = MAX_MTU;
1462 			return (B_TRUE);
1463 		}
1464 		return (B_FALSE);
1465 	default:
1466 		break;
1467 	}
1468 	return (B_FALSE);
1469 }
1470 
1471 static mac_callbacks_t vioif_m_callbacks = {
1472 	.mc_callbacks	= (MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO),
1473 	.mc_getstat	= vioif_stat,
1474 	.mc_start	= vioif_start,
1475 	.mc_stop	= vioif_stop,
1476 	.mc_setpromisc	= vioif_promisc,
1477 	.mc_multicst	= vioif_multicst,
1478 	.mc_unicst	= vioif_unicst,
1479 	.mc_tx		= vioif_tx,
1480 	/* Optional callbacks */
1481 	.mc_reserved	= NULL,		/* reserved */
1482 	.mc_ioctl	= NULL,		/* mc_ioctl */
1483 	.mc_getcapab	= vioif_getcapab,		/* mc_getcapab */
1484 	.mc_open	= NULL,		/* mc_open */
1485 	.mc_close	= NULL,		/* mc_close */
1486 	.mc_setprop	= vioif_setprop,
1487 	.mc_getprop	= vioif_getprop,
1488 	.mc_propinfo	= vioif_propinfo,
1489 };
1490 
1491 static void
1492 vioif_show_features(struct vioif_softc *sc, const char *prefix,
1493     uint32_t features)
1494 {
1495 	char buf[512];
1496 	char *bufp = buf;
1497 	char *bufend = buf + sizeof (buf);
1498 
1499 	/* LINTED E_PTRDIFF_OVERFLOW */
1500 	bufp += snprintf(bufp, bufend - bufp, prefix);
1501 	/* LINTED E_PTRDIFF_OVERFLOW */
1502 	bufp += virtio_show_features(features, bufp, bufend - bufp);
1503 	*bufp = '\0';
1504 
1505 
1506 	/* Using '!' to only CE_NOTE this to the system log. */
1507 	dev_err(sc->sc_dev, CE_NOTE, "!%s Vioif (%b)", buf, features,
1508 	    VIRTIO_NET_FEATURE_BITS);
1509 }
1510 
1511 /*
1512  * Find out which features are supported by the device and
1513  * choose which ones we wish to use.
1514  */
1515 static int
1516 vioif_dev_features(struct vioif_softc *sc)
1517 {
1518 	uint32_t host_features;
1519 
1520 	host_features = virtio_negotiate_features(&sc->sc_virtio,
1521 	    VIRTIO_NET_F_CSUM |
1522 	    VIRTIO_NET_F_HOST_TSO4 |
1523 	    VIRTIO_NET_F_HOST_ECN |
1524 	    VIRTIO_NET_F_MAC |
1525 	    VIRTIO_NET_F_STATUS |
1526 	    VIRTIO_F_RING_INDIRECT_DESC |
1527 	    VIRTIO_F_NOTIFY_ON_EMPTY);
1528 
1529 	vioif_show_features(sc, "Host features: ", host_features);
1530 	vioif_show_features(sc, "Negotiated features: ",
1531 	    sc->sc_virtio.sc_features);
1532 
1533 	if (!(sc->sc_virtio.sc_features & VIRTIO_F_RING_INDIRECT_DESC)) {
1534 		dev_err(sc->sc_dev, CE_NOTE,
1535 		    "Host does not support RING_INDIRECT_DESC, bye.");
1536 		return (DDI_FAILURE);
1537 	}
1538 
1539 	return (DDI_SUCCESS);
1540 }
1541 
1542 static int
1543 vioif_has_feature(struct vioif_softc *sc, uint32_t feature)
1544 {
1545 	return (virtio_has_feature(&sc->sc_virtio, feature));
1546 }
1547 
1548 static void
1549 vioif_set_mac(struct vioif_softc *sc)
1550 {
1551 	int i;
1552 
1553 	for (i = 0; i < ETHERADDRL; i++) {
1554 		virtio_write_device_config_1(&sc->sc_virtio,
1555 		    VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
1556 	}
1557 }
1558 
1559 /* Get the mac address out of the hardware, or make up one. */
1560 static void
1561 vioif_get_mac(struct vioif_softc *sc)
1562 {
1563 	int i;
1564 	if (sc->sc_virtio.sc_features & VIRTIO_NET_F_MAC) {
1565 		for (i = 0; i < ETHERADDRL; i++) {
1566 			sc->sc_mac[i] = virtio_read_device_config_1(
1567 			    &sc->sc_virtio,
1568 			    VIRTIO_NET_CONFIG_MAC + i);
1569 		}
1570 		dev_err(sc->sc_dev, CE_NOTE, "Got MAC address from host: %s",
1571 		    ether_sprintf((struct ether_addr *)sc->sc_mac));
1572 	} else {
1573 		/* Get a few random bytes */
1574 		(void) random_get_pseudo_bytes(sc->sc_mac, ETHERADDRL);
1575 		/* Make sure it's a unicast MAC */
1576 		sc->sc_mac[0] &= ~1;
1577 		/* Set the "locally administered" bit */
1578 		sc->sc_mac[1] |= 2;
1579 
1580 		vioif_set_mac(sc);
1581 
1582 		dev_err(sc->sc_dev, CE_NOTE,
1583 		    "Generated a random MAC address: %s",
1584 		    ether_sprintf((struct ether_addr *)sc->sc_mac));
1585 	}
1586 }
1587 
1588 /*
1589  * Virtqueue interrupt handlers
1590  */
1591 /* ARGSUSED */
1592 uint_t
1593 vioif_rx_handler(caddr_t arg1, caddr_t arg2)
1594 {
1595 	struct virtio_softc *vsc = (void *) arg1;
1596 	struct vioif_softc *sc = container_of(vsc,
1597 	    struct vioif_softc, sc_virtio);
1598 
1599 	/*
1600 	 * The return values of these functions are not needed but they make
1601 	 * debugging interrupts simpler because you can use them to detect when
1602 	 * stuff was processed and repopulated in this handler.
1603 	 */
1604 	(void) vioif_process_rx(sc);
1605 	(void) vioif_populate_rx(sc, KM_NOSLEEP);
1606 
1607 	return (DDI_INTR_CLAIMED);
1608 }
1609 
1610 /* ARGSUSED */
1611 uint_t
1612 vioif_tx_handler(caddr_t arg1, caddr_t arg2)
1613 {
1614 	struct virtio_softc *vsc = (void *)arg1;
1615 	struct vioif_softc *sc = container_of(vsc,
1616 	    struct vioif_softc, sc_virtio);
1617 
1618 	/*
1619 	 * The return value of this function is not needed but makes debugging
1620 	 * interrupts simpler because you can use it to detect if anything was
1621 	 * reclaimed in this handler.
1622 	 */
1623 	(void) vioif_reclaim_used_tx(sc);
1624 
1625 	return (DDI_INTR_CLAIMED);
1626 }
1627 
1628 static int
1629 vioif_register_ints(struct vioif_softc *sc)
1630 {
1631 	int ret;
1632 
1633 	struct virtio_int_handler vioif_vq_h[] = {
1634 		{ vioif_rx_handler },
1635 		{ vioif_tx_handler },
1636 		{ NULL }
1637 	};
1638 
1639 	ret = virtio_register_ints(&sc->sc_virtio, NULL, vioif_vq_h);
1640 
1641 	return (ret);
1642 }
1643 
1644 
1645 static void
1646 vioif_check_features(struct vioif_softc *sc)
1647 {
1648 	if (vioif_has_feature(sc, VIRTIO_NET_F_CSUM)) {
1649 		/* The GSO/GRO featured depend on CSUM, check them here. */
1650 		sc->sc_tx_csum = 1;
1651 		sc->sc_rx_csum = 1;
1652 
1653 		if (!vioif_has_feature(sc, VIRTIO_NET_F_GUEST_CSUM)) {
1654 			sc->sc_rx_csum = 0;
1655 		}
1656 		cmn_err(CE_NOTE, "Csum enabled.");
1657 
1658 		if (vioif_has_feature(sc, VIRTIO_NET_F_HOST_TSO4)) {
1659 
1660 			sc->sc_tx_tso4 = 1;
1661 			/*
1662 			 * We don't seem to have a way to ask the system
1663 			 * not to send us LSO packets with Explicit
1664 			 * Congestion Notification bit set, so we require
1665 			 * the device to support it in order to do
1666 			 * LSO.
1667 			 */
1668 			if (!vioif_has_feature(sc, VIRTIO_NET_F_HOST_ECN)) {
1669 				dev_err(sc->sc_dev, CE_NOTE,
1670 				    "TSO4 supported, but not ECN. "
1671 				    "Not using LSO.");
1672 				sc->sc_tx_tso4 = 0;
1673 			} else {
1674 				cmn_err(CE_NOTE, "LSO enabled");
1675 			}
1676 		}
1677 	}
1678 }
1679 
1680 static int
1681 vioif_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd)
1682 {
1683 	int ret, instance;
1684 	struct vioif_softc *sc;
1685 	struct virtio_softc *vsc;
1686 	mac_register_t *macp;
1687 	char cache_name[CACHE_NAME_SIZE];
1688 
1689 	instance = ddi_get_instance(devinfo);
1690 
1691 	switch (cmd) {
1692 	case DDI_ATTACH:
1693 		break;
1694 
1695 	case DDI_RESUME:
1696 	case DDI_PM_RESUME:
1697 		/* We do not support suspend/resume for vioif. */
1698 		goto exit;
1699 
1700 	default:
1701 		goto exit;
1702 	}
1703 
1704 	sc = kmem_zalloc(sizeof (struct vioif_softc), KM_SLEEP);
1705 	ddi_set_driver_private(devinfo, sc);
1706 
1707 	vsc = &sc->sc_virtio;
1708 
1709 	/* Duplicate for less typing */
1710 	sc->sc_dev = devinfo;
1711 	vsc->sc_dev = devinfo;
1712 
1713 	/*
1714 	 * Initialize interrupt kstat.
1715 	 */
1716 	sc->sc_intrstat = kstat_create("vioif", instance, "intr", "controller",
1717 	    KSTAT_TYPE_INTR, 1, 0);
1718 	if (sc->sc_intrstat == NULL) {
1719 		dev_err(devinfo, CE_WARN, "kstat_create failed");
1720 		goto exit_intrstat;
1721 	}
1722 	kstat_install(sc->sc_intrstat);
1723 
1724 	/* map BAR 0 */
1725 	ret = ddi_regs_map_setup(devinfo, 1,
1726 	    (caddr_t *)&sc->sc_virtio.sc_io_addr,
1727 	    0, 0, &vioif_attr, &sc->sc_virtio.sc_ioh);
1728 	if (ret != DDI_SUCCESS) {
1729 		dev_err(devinfo, CE_WARN, "unable to map bar 0: %d", ret);
1730 		goto exit_map;
1731 	}
1732 
1733 	virtio_device_reset(&sc->sc_virtio);
1734 	virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_ACK);
1735 	virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER);
1736 
1737 	ret = vioif_dev_features(sc);
1738 	if (ret)
1739 		goto exit_features;
1740 
1741 	vsc->sc_nvqs = vioif_has_feature(sc, VIRTIO_NET_F_CTRL_VQ) ? 3 : 2;
1742 
1743 	(void) snprintf(cache_name, CACHE_NAME_SIZE, "vioif%d_rx", instance);
1744 	sc->sc_rxbuf_cache = kmem_cache_create(cache_name,
1745 	    sizeof (struct vioif_rx_buf), 0, vioif_rx_construct,
1746 	    vioif_rx_destruct, NULL, sc, NULL, KM_SLEEP);
1747 	if (sc->sc_rxbuf_cache == NULL) {
1748 		dev_err(sc->sc_dev, CE_WARN, "Can't allocate the buffer cache");
1749 		goto exit_cache;
1750 	}
1751 
1752 	ret = vioif_register_ints(sc);
1753 	if (ret) {
1754 		dev_err(sc->sc_dev, CE_WARN,
1755 		    "Failed to allocate interrupt(s)!");
1756 		goto exit_ints;
1757 	}
1758 
1759 	/*
1760 	 * Register layout determined, can now access the
1761 	 * device-specific bits
1762 	 */
1763 	vioif_get_mac(sc);
1764 
1765 	sc->sc_rx_vq = virtio_alloc_vq(&sc->sc_virtio, 0,
1766 	    VIOIF_RX_QLEN, VIOIF_INDIRECT_MAX, "rx");
1767 	if (!sc->sc_rx_vq)
1768 		goto exit_alloc1;
1769 	virtio_stop_vq_intr(sc->sc_rx_vq);
1770 
1771 	sc->sc_tx_vq = virtio_alloc_vq(&sc->sc_virtio, 1,
1772 	    VIOIF_TX_QLEN, VIOIF_INDIRECT_MAX, "tx");
1773 	if (!sc->sc_rx_vq)
1774 		goto exit_alloc2;
1775 	virtio_stop_vq_intr(sc->sc_tx_vq);
1776 
1777 	if (vioif_has_feature(sc, VIRTIO_NET_F_CTRL_VQ)) {
1778 		sc->sc_ctrl_vq = virtio_alloc_vq(&sc->sc_virtio, 2,
1779 		    VIOIF_CTRL_QLEN, 0, "ctrl");
1780 		if (!sc->sc_ctrl_vq) {
1781 			goto exit_alloc3;
1782 		}
1783 		virtio_stop_vq_intr(sc->sc_ctrl_vq);
1784 	}
1785 
1786 	virtio_set_status(&sc->sc_virtio,
1787 	    VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
1788 
1789 	sc->sc_rxloan = 0;
1790 
1791 	/* set some reasonable-small default values */
1792 	sc->sc_rxcopy_thresh = 300;
1793 	sc->sc_txcopy_thresh = 300;
1794 	sc->sc_mtu = ETHERMTU;
1795 
1796 	vioif_check_features(sc);
1797 
1798 	if (vioif_alloc_mems(sc))
1799 		goto exit_alloc_mems;
1800 
1801 	if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
1802 		dev_err(devinfo, CE_WARN, "Failed to allocate a mac_register");
1803 		goto exit_macalloc;
1804 	}
1805 
1806 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1807 	macp->m_driver = sc;
1808 	macp->m_dip = devinfo;
1809 	macp->m_src_addr = sc->sc_mac;
1810 	macp->m_callbacks = &vioif_m_callbacks;
1811 	macp->m_min_sdu = 0;
1812 	macp->m_max_sdu = sc->sc_mtu;
1813 	macp->m_margin = VLAN_TAGSZ;
1814 	macp->m_priv_props = vioif_priv_props;
1815 
1816 	sc->sc_macp = macp;
1817 
1818 	/* Pre-fill the rx ring. */
1819 	(void) vioif_populate_rx(sc, KM_SLEEP);
1820 
1821 	ret = mac_register(macp, &sc->sc_mac_handle);
1822 	if (ret != 0) {
1823 		dev_err(devinfo, CE_WARN, "vioif_attach: "
1824 		    "mac_register() failed, ret=%d", ret);
1825 		goto exit_register;
1826 	}
1827 
1828 	ret = virtio_enable_ints(&sc->sc_virtio);
1829 	if (ret) {
1830 		dev_err(devinfo, CE_WARN, "Failed to enable interrupts");
1831 		goto exit_enable_ints;
1832 	}
1833 
1834 	mac_link_update(sc->sc_mac_handle, LINK_STATE_UP);
1835 	return (DDI_SUCCESS);
1836 
1837 exit_enable_ints:
1838 	(void) mac_unregister(sc->sc_mac_handle);
1839 exit_register:
1840 	mac_free(macp);
1841 exit_macalloc:
1842 	vioif_free_mems(sc);
1843 exit_alloc_mems:
1844 	virtio_release_ints(&sc->sc_virtio);
1845 	if (sc->sc_ctrl_vq)
1846 		virtio_free_vq(sc->sc_ctrl_vq);
1847 exit_alloc3:
1848 	virtio_free_vq(sc->sc_tx_vq);
1849 exit_alloc2:
1850 	virtio_free_vq(sc->sc_rx_vq);
1851 exit_alloc1:
1852 exit_ints:
1853 	kmem_cache_destroy(sc->sc_rxbuf_cache);
1854 exit_cache:
1855 exit_features:
1856 	virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
1857 	ddi_regs_map_free(&sc->sc_virtio.sc_ioh);
1858 exit_intrstat:
1859 exit_map:
1860 	kstat_delete(sc->sc_intrstat);
1861 	kmem_free(sc, sizeof (struct vioif_softc));
1862 exit:
1863 	return (DDI_FAILURE);
1864 }
1865 
1866 static int
1867 vioif_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd)
1868 {
1869 	struct vioif_softc *sc;
1870 
1871 	if ((sc = ddi_get_driver_private(devinfo)) == NULL)
1872 		return (DDI_FAILURE);
1873 
1874 	switch (cmd) {
1875 	case DDI_DETACH:
1876 		break;
1877 
1878 	case DDI_PM_SUSPEND:
1879 		/* We do not support suspend/resume for vioif. */
1880 		return (DDI_FAILURE);
1881 
1882 	default:
1883 		return (DDI_FAILURE);
1884 	}
1885 
1886 	if (sc->sc_rxloan) {
1887 		dev_err(devinfo, CE_WARN, "!Some rx buffers are still upstream,"
1888 		    " not detaching.");
1889 		return (DDI_FAILURE);
1890 	}
1891 
1892 	virtio_stop_vq_intr(sc->sc_rx_vq);
1893 	virtio_stop_vq_intr(sc->sc_tx_vq);
1894 
1895 	virtio_release_ints(&sc->sc_virtio);
1896 
1897 	if (mac_unregister(sc->sc_mac_handle)) {
1898 		return (DDI_FAILURE);
1899 	}
1900 
1901 	mac_free(sc->sc_macp);
1902 
1903 	vioif_free_mems(sc);
1904 	virtio_free_vq(sc->sc_rx_vq);
1905 	virtio_free_vq(sc->sc_tx_vq);
1906 
1907 	virtio_device_reset(&sc->sc_virtio);
1908 
1909 	ddi_regs_map_free(&sc->sc_virtio.sc_ioh);
1910 
1911 	kmem_cache_destroy(sc->sc_rxbuf_cache);
1912 	kstat_delete(sc->sc_intrstat);
1913 	kmem_free(sc, sizeof (struct vioif_softc));
1914 
1915 	return (DDI_SUCCESS);
1916 }
1917 
1918 static int
1919 vioif_quiesce(dev_info_t *devinfo)
1920 {
1921 	struct vioif_softc *sc;
1922 
1923 	if ((sc = ddi_get_driver_private(devinfo)) == NULL)
1924 		return (DDI_FAILURE);
1925 
1926 	virtio_stop_vq_intr(sc->sc_rx_vq);
1927 	virtio_stop_vq_intr(sc->sc_tx_vq);
1928 	virtio_device_reset(&sc->sc_virtio);
1929 
1930 	return (DDI_SUCCESS);
1931 }
1932 
1933 int
1934 _init(void)
1935 {
1936 	int ret = 0;
1937 
1938 	mac_init_ops(&vioif_ops, "vioif");
1939 
1940 	ret = mod_install(&modlinkage);
1941 	if (ret != DDI_SUCCESS) {
1942 		mac_fini_ops(&vioif_ops);
1943 		return (ret);
1944 	}
1945 
1946 	return (0);
1947 }
1948 
1949 int
1950 _fini(void)
1951 {
1952 	int ret;
1953 
1954 	ret = mod_remove(&modlinkage);
1955 	if (ret == DDI_SUCCESS) {
1956 		mac_fini_ops(&vioif_ops);
1957 	}
1958 
1959 	return (ret);
1960 }
1961 
1962 int
1963 _info(struct modinfo *pModinfo)
1964 {
1965 	return (mod_info(&modlinkage, pModinfo));
1966 }
1967