xref: /illumos-gate/usr/src/uts/common/io/vioif/vioif.c (revision c5749750a3e052f1194f65a303456224c51dea63)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2013 Nexenta Inc.  All rights reserved.
14  * Copyright (c) 2014, 2016 by Delphix. All rights reserved.
15  * Copyright 2015 Joyent, Inc.
16  */
17 
18 /* Based on the NetBSD virtio driver by Minoura Makoto. */
19 /*
20  * Copyright (c) 2010 Minoura Makoto.
21  * All rights reserved.
22  *
23  * Redistribution and use in source and binary forms, with or without
24  * modification, are permitted provided that the following conditions
25  * are met:
26  * 1. Redistributions of source code must retain the above copyright
27  *    notice, this list of conditions and the following disclaimer.
28  * 2. Redistributions in binary form must reproduce the above copyright
29  *    notice, this list of conditions and the following disclaimer in the
30  *    documentation and/or other materials provided with the distribution.
31  *
32  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
33  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
34  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
35  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
36  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
37  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
38  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
39  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
40  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
41  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42  */
43 
44 #include <sys/types.h>
45 #include <sys/errno.h>
46 #include <sys/param.h>
47 #include <sys/stropts.h>
48 #include <sys/stream.h>
49 #include <sys/strsubr.h>
50 #include <sys/kmem.h>
51 #include <sys/conf.h>
52 #include <sys/devops.h>
53 #include <sys/ksynch.h>
54 #include <sys/stat.h>
55 #include <sys/modctl.h>
56 #include <sys/debug.h>
57 #include <sys/pci.h>
58 #include <sys/ethernet.h>
59 #include <sys/vlan.h>
60 
61 #include <sys/dlpi.h>
62 #include <sys/taskq.h>
63 #include <sys/cyclic.h>
64 
65 #include <sys/pattr.h>
66 #include <sys/strsun.h>
67 
68 #include <sys/random.h>
69 #include <sys/containerof.h>
70 #include <sys/stream.h>
71 
72 #include <sys/mac.h>
73 #include <sys/mac_provider.h>
74 #include <sys/mac_ether.h>
75 
76 #include "virtiovar.h"
77 #include "virtioreg.h"
78 
79 /* Configuration registers */
80 #define	VIRTIO_NET_CONFIG_MAC		0 /* 8bit x 6byte */
81 #define	VIRTIO_NET_CONFIG_STATUS	6 /* 16bit */
82 
83 /* Feature bits */
84 #define	VIRTIO_NET_F_CSUM	(1 << 0) /* Host handles pkts w/ partial csum */
85 #define	VIRTIO_NET_F_GUEST_CSUM	(1 << 1) /* Guest handles pkts w/ part csum */
86 #define	VIRTIO_NET_F_MAC	(1 << 5) /* Host has given MAC address. */
87 #define	VIRTIO_NET_F_GSO	(1 << 6) /* Host handles pkts w/ any GSO type */
88 #define	VIRTIO_NET_F_GUEST_TSO4	(1 << 7) /* Guest can handle TSOv4 in. */
89 #define	VIRTIO_NET_F_GUEST_TSO6	(1 << 8) /* Guest can handle TSOv6 in. */
90 #define	VIRTIO_NET_F_GUEST_ECN	(1 << 9) /* Guest can handle TSO[6] w/ ECN in */
91 #define	VIRTIO_NET_F_GUEST_UFO	(1 << 10) /* Guest can handle UFO in. */
92 #define	VIRTIO_NET_F_HOST_TSO4	(1 << 11) /* Host can handle TSOv4 in. */
93 #define	VIRTIO_NET_F_HOST_TSO6	(1 << 12) /* Host can handle TSOv6 in. */
94 #define	VIRTIO_NET_F_HOST_ECN	(1 << 13) /* Host can handle TSO[6] w/ ECN in */
95 #define	VIRTIO_NET_F_HOST_UFO	(1 << 14) /* Host can handle UFO in. */
96 #define	VIRTIO_NET_F_MRG_RXBUF	(1 << 15) /* Host can merge receive buffers. */
97 #define	VIRTIO_NET_F_STATUS	(1 << 16) /* Config.status available */
98 #define	VIRTIO_NET_F_CTRL_VQ	(1 << 17) /* Control channel available */
99 #define	VIRTIO_NET_F_CTRL_RX	(1 << 18) /* Control channel RX mode support */
100 #define	VIRTIO_NET_F_CTRL_VLAN	(1 << 19) /* Control channel VLAN filtering */
101 #define	VIRTIO_NET_F_CTRL_RX_EXTRA (1 << 20) /* Extra RX mode control support */
102 
103 #define	VIRTIO_NET_FEATURE_BITS \
104 	"\020" \
105 	"\1CSUM" \
106 	"\2GUEST_CSUM" \
107 	"\6MAC" \
108 	"\7GSO" \
109 	"\10GUEST_TSO4" \
110 	"\11GUEST_TSO6" \
111 	"\12GUEST_ECN" \
112 	"\13GUEST_UFO" \
113 	"\14HOST_TSO4" \
114 	"\15HOST_TSO6" \
115 	"\16HOST_ECN" \
116 	"\17HOST_UFO" \
117 	"\20MRG_RXBUF" \
118 	"\21STATUS" \
119 	"\22CTRL_VQ" \
120 	"\23CTRL_RX" \
121 	"\24CTRL_VLAN" \
122 	"\25CTRL_RX_EXTRA"
123 
124 /* Status */
125 #define	VIRTIO_NET_S_LINK_UP	1
126 
127 #pragma pack(1)
128 /* Packet header structure */
129 struct virtio_net_hdr {
130 	uint8_t		flags;
131 	uint8_t		gso_type;
132 	uint16_t	hdr_len;
133 	uint16_t	gso_size;
134 	uint16_t	csum_start;
135 	uint16_t	csum_offset;
136 };
137 #pragma pack()
138 
139 #define	VIRTIO_NET_HDR_F_NEEDS_CSUM	1 /* flags */
140 #define	VIRTIO_NET_HDR_GSO_NONE		0 /* gso_type */
141 #define	VIRTIO_NET_HDR_GSO_TCPV4	1 /* gso_type */
142 #define	VIRTIO_NET_HDR_GSO_UDP		3 /* gso_type */
143 #define	VIRTIO_NET_HDR_GSO_TCPV6	4 /* gso_type */
144 #define	VIRTIO_NET_HDR_GSO_ECN		0x80 /* gso_type, |'ed */
145 
146 
147 /* Control virtqueue */
148 #pragma pack(1)
149 struct virtio_net_ctrl_cmd {
150 	uint8_t	class;
151 	uint8_t	command;
152 };
153 #pragma pack()
154 
155 #define	VIRTIO_NET_CTRL_RX		0
156 #define	VIRTIO_NET_CTRL_RX_PROMISC	0
157 #define	VIRTIO_NET_CTRL_RX_ALLMULTI	1
158 
159 #define	VIRTIO_NET_CTRL_MAC		1
160 #define	VIRTIO_NET_CTRL_MAC_TABLE_SET	0
161 
162 #define	VIRTIO_NET_CTRL_VLAN		2
163 #define	VIRTIO_NET_CTRL_VLAN_ADD	0
164 #define	VIRTIO_NET_CTRL_VLAN_DEL	1
165 
166 #pragma pack(1)
167 struct virtio_net_ctrl_status {
168 	uint8_t	ack;
169 };
170 
171 struct virtio_net_ctrl_rx {
172 	uint8_t	onoff;
173 };
174 
175 struct virtio_net_ctrl_mac_tbl {
176 	uint32_t nentries;
177 	uint8_t macs[][ETHERADDRL];
178 };
179 
180 struct virtio_net_ctrl_vlan {
181 	uint16_t id;
182 };
183 #pragma pack()
184 
185 static int vioif_quiesce(dev_info_t *);
186 static int vioif_attach(dev_info_t *, ddi_attach_cmd_t);
187 static int vioif_detach(dev_info_t *, ddi_detach_cmd_t);
188 
189 DDI_DEFINE_STREAM_OPS(vioif_ops,
190     nulldev,		/* identify */
191     nulldev,		/* probe */
192     vioif_attach,	/* attach */
193     vioif_detach,	/* detach */
194     nodev,		/* reset */
195     NULL,		/* cb_ops */
196     D_MP,		/* bus_ops */
197     NULL,		/* power */
198     vioif_quiesce	/* quiesce */);
199 
200 static char vioif_ident[] = "VirtIO ethernet driver";
201 
202 /* Standard Module linkage initialization for a Streams driver */
203 extern struct mod_ops mod_driverops;
204 
205 static struct modldrv modldrv = {
206 	&mod_driverops,		/* Type of module.  This one is a driver */
207 	vioif_ident,		/* short description */
208 	&vioif_ops		/* driver specific ops */
209 };
210 
211 static struct modlinkage modlinkage = {
212 	MODREV_1,
213 	{
214 		(void *)&modldrv,
215 		NULL,
216 	},
217 };
218 
219 ddi_device_acc_attr_t vioif_attr = {
220 	DDI_DEVICE_ATTR_V0,
221 	DDI_NEVERSWAP_ACC,	/* virtio is always native byte order */
222 	DDI_STORECACHING_OK_ACC,
223 	DDI_DEFAULT_ACC
224 };
225 
226 /*
227  * A mapping represents a binding for a single buffer that is contiguous in the
228  * virtual address space.
229  */
230 struct vioif_buf_mapping {
231 	caddr_t			vbm_buf;
232 	ddi_dma_handle_t	vbm_dmah;
233 	ddi_acc_handle_t	vbm_acch;
234 	ddi_dma_cookie_t	vbm_dmac;
235 	unsigned int		vbm_ncookies;
236 };
237 
238 /*
239  * Rx buffers can be loaned upstream, so the code has
240  * to allocate them dynamically.
241  */
242 struct vioif_rx_buf {
243 	struct vioif_softc	*rb_sc;
244 	frtn_t			rb_frtn;
245 
246 	struct vioif_buf_mapping rb_mapping;
247 };
248 
249 /*
250  * Tx buffers have two mapping types. One, "inline", is pre-allocated and is
251  * used to hold the virtio_net_header. Small packets also get copied there, as
252  * it's faster then mapping them. Bigger packets get mapped using the "external"
253  * mapping array. An array is used, because a packet may consist of muptiple
254  * fragments, so each fragment gets bound to an entry. According to my
255  * observations, the number of fragments does not exceed 2, but just in case,
256  * a bigger, up to VIOIF_INDIRECT_MAX - 1 array is allocated. To save resources,
257  * the dma handles are allocated lazily in the tx path.
258  */
259 struct vioif_tx_buf {
260 	mblk_t			*tb_mp;
261 
262 	/* inline buffer */
263 	struct vioif_buf_mapping tb_inline_mapping;
264 
265 	/* External buffers */
266 	struct vioif_buf_mapping *tb_external_mapping;
267 	unsigned int		tb_external_num;
268 };
269 
270 struct vioif_softc {
271 	dev_info_t		*sc_dev; /* mirrors virtio_softc->sc_dev */
272 	struct virtio_softc	sc_virtio;
273 
274 	mac_handle_t sc_mac_handle;
275 	mac_register_t *sc_macp;
276 
277 	struct virtqueue	*sc_rx_vq;
278 	struct virtqueue	*sc_tx_vq;
279 	struct virtqueue	*sc_ctrl_vq;
280 
281 	unsigned int		sc_tx_stopped:1;
282 
283 	/* Feature bits. */
284 	unsigned int		sc_rx_csum:1;
285 	unsigned int		sc_tx_csum:1;
286 	unsigned int		sc_tx_tso4:1;
287 
288 	/*
289 	 * For debugging, it is useful to know whether the MAC address we
290 	 * are using came from the host (via VIRTIO_NET_CONFIG_MAC) or
291 	 * was otherwise generated or set from within the guest.
292 	 */
293 	unsigned int		sc_mac_from_host:1;
294 
295 	int			sc_mtu;
296 	uint8_t			sc_mac[ETHERADDRL];
297 	/*
298 	 * For rx buffers, we keep a pointer array, because the buffers
299 	 * can be loaned upstream, and we have to repopulate the array with
300 	 * new members.
301 	 */
302 	struct vioif_rx_buf	**sc_rxbufs;
303 
304 	/*
305 	 * For tx, we just allocate an array of buffers. The packet can
306 	 * either be copied into the inline buffer, or the external mapping
307 	 * could be used to map the packet
308 	 */
309 	struct vioif_tx_buf	*sc_txbufs;
310 
311 	kstat_t			*sc_intrstat;
312 	/*
313 	 * We "loan" rx buffers upstream and reuse them after they are
314 	 * freed. This lets us avoid allocations in the hot path.
315 	 */
316 	kmem_cache_t		*sc_rxbuf_cache;
317 	ulong_t			sc_rxloan;
318 
319 	/* Copying small packets turns out to be faster then mapping them. */
320 	unsigned long		sc_rxcopy_thresh;
321 	unsigned long		sc_txcopy_thresh;
322 
323 	/*
324 	 * Statistics visible through mac:
325 	 */
326 	uint64_t		sc_ipackets;
327 	uint64_t		sc_opackets;
328 	uint64_t		sc_rbytes;
329 	uint64_t		sc_obytes;
330 	uint64_t		sc_brdcstxmt;
331 	uint64_t		sc_brdcstrcv;
332 	uint64_t		sc_multixmt;
333 	uint64_t		sc_multircv;
334 	uint64_t		sc_norecvbuf;
335 	uint64_t		sc_notxbuf;
336 	uint64_t		sc_ierrors;
337 	uint64_t		sc_oerrors;
338 
339 	/*
340 	 * Internal debugging statistics:
341 	 */
342 	uint64_t		sc_rxfail_dma_handle;
343 	uint64_t		sc_rxfail_dma_buffer;
344 	uint64_t		sc_rxfail_dma_bind;
345 	uint64_t		sc_rxfail_chain_undersize;
346 	uint64_t		sc_rxfail_no_descriptors;
347 	uint64_t		sc_txfail_dma_handle;
348 	uint64_t		sc_txfail_dma_bind;
349 	uint64_t		sc_txfail_indirect_limit;
350 };
351 
352 #define	ETHER_HEADER_LEN		sizeof (struct ether_header)
353 
354 /* MTU + the ethernet header. */
355 #define	MAX_PAYLOAD	65535
356 #define	MAX_MTU		(MAX_PAYLOAD - ETHER_HEADER_LEN)
357 #define	DEFAULT_MTU	ETHERMTU
358 
359 /*
360  * Yeah, we spend 8M per device. Turns out, there is no point
361  * being smart and using merged rx buffers (VIRTIO_NET_F_MRG_RXBUF),
362  * because vhost does not support them, and we expect to be used with
363  * vhost in production environment.
364  */
365 /* The buffer keeps both the packet data and the virtio_net_header. */
366 #define	VIOIF_RX_SIZE (MAX_PAYLOAD + sizeof (struct virtio_net_hdr))
367 
368 /*
369  * We win a bit on header alignment, but the host wins a lot
370  * more on moving aligned buffers. Might need more thought.
371  */
372 #define	VIOIF_IP_ALIGN 0
373 
374 /* Maximum number of indirect descriptors, somewhat arbitrary. */
375 #define	VIOIF_INDIRECT_MAX 128
376 
377 /*
378  * We pre-allocate a reasonably large buffer to copy small packets
379  * there. Bigger packets are mapped, packets with multiple
380  * cookies are mapped as indirect buffers.
381  */
382 #define	VIOIF_TX_INLINE_SIZE 2048
383 
384 /* Native queue size for all queues */
385 #define	VIOIF_RX_QLEN 0
386 #define	VIOIF_TX_QLEN 0
387 #define	VIOIF_CTRL_QLEN 0
388 
389 static uchar_t vioif_broadcast[ETHERADDRL] = {
390 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
391 };
392 
393 #define	VIOIF_TX_THRESH_MAX	640
394 #define	VIOIF_RX_THRESH_MAX	640
395 
396 #define	CACHE_NAME_SIZE	32
397 
398 static char vioif_txcopy_thresh[] =
399 	"vioif_txcopy_thresh";
400 static char vioif_rxcopy_thresh[] =
401 	"vioif_rxcopy_thresh";
402 
403 static char *vioif_priv_props[] = {
404 	vioif_txcopy_thresh,
405 	vioif_rxcopy_thresh,
406 	NULL
407 };
408 
409 /* Add up to ddi? */
410 static ddi_dma_cookie_t *
411 vioif_dma_curr_cookie(ddi_dma_handle_t dmah)
412 {
413 	ddi_dma_impl_t *dmah_impl = (void *) dmah;
414 	ASSERT(dmah_impl->dmai_cookie);
415 	return (dmah_impl->dmai_cookie);
416 }
417 
418 static void
419 vioif_dma_reset_cookie(ddi_dma_handle_t dmah, ddi_dma_cookie_t *dmac)
420 {
421 	ddi_dma_impl_t *dmah_impl = (void *) dmah;
422 	dmah_impl->dmai_cookie = dmac;
423 }
424 
425 static link_state_t
426 vioif_link_state(struct vioif_softc *sc)
427 {
428 	if (sc->sc_virtio.sc_features & VIRTIO_NET_F_STATUS) {
429 		if (virtio_read_device_config_2(&sc->sc_virtio,
430 		    VIRTIO_NET_CONFIG_STATUS) & VIRTIO_NET_S_LINK_UP) {
431 			return (LINK_STATE_UP);
432 		} else {
433 			return (LINK_STATE_DOWN);
434 		}
435 	}
436 
437 	return (LINK_STATE_UP);
438 }
439 
440 static ddi_dma_attr_t vioif_inline_buf_dma_attr = {
441 	DMA_ATTR_V0,		/* Version number */
442 	0,			/* low address */
443 	0xFFFFFFFFFFFFFFFF,	/* high address */
444 	0xFFFFFFFF,		/* counter register max */
445 	1,			/* page alignment */
446 	1,			/* burst sizes: 1 - 32 */
447 	1,			/* minimum transfer size */
448 	0xFFFFFFFF,		/* max transfer size */
449 	0xFFFFFFFFFFFFFFF,	/* address register max */
450 	1,			/* scatter-gather capacity */
451 	1,			/* device operates on bytes */
452 	0,			/* attr flag: set to 0 */
453 };
454 
455 static ddi_dma_attr_t vioif_mapped_buf_dma_attr = {
456 	DMA_ATTR_V0,		/* Version number */
457 	0,			/* low address */
458 	0xFFFFFFFFFFFFFFFF,	/* high address */
459 	0xFFFFFFFF,		/* counter register max */
460 	1,			/* page alignment */
461 	1,			/* burst sizes: 1 - 32 */
462 	1,			/* minimum transfer size */
463 	0xFFFFFFFF,		/* max transfer size */
464 	0xFFFFFFFFFFFFFFF,	/* address register max */
465 
466 	/* One entry is used for the virtio_net_hdr on the tx path */
467 	VIOIF_INDIRECT_MAX - 1,	/* scatter-gather capacity */
468 	1,			/* device operates on bytes */
469 	0,			/* attr flag: set to 0 */
470 };
471 
472 static ddi_device_acc_attr_t vioif_bufattr = {
473 	DDI_DEVICE_ATTR_V0,
474 	DDI_NEVERSWAP_ACC,
475 	DDI_STORECACHING_OK_ACC,
476 	DDI_DEFAULT_ACC
477 };
478 
479 static void
480 vioif_rx_free(caddr_t free_arg)
481 {
482 	struct vioif_rx_buf *buf = (void *) free_arg;
483 	struct vioif_softc *sc = buf->rb_sc;
484 
485 	kmem_cache_free(sc->sc_rxbuf_cache, buf);
486 	atomic_dec_ulong(&sc->sc_rxloan);
487 }
488 
489 static int
490 vioif_rx_construct(void *buffer, void *user_arg, int kmflags)
491 {
492 	_NOTE(ARGUNUSED(kmflags));
493 	struct vioif_softc *sc = user_arg;
494 	struct vioif_rx_buf *buf = buffer;
495 	size_t len;
496 
497 	if (ddi_dma_alloc_handle(sc->sc_dev, &vioif_mapped_buf_dma_attr,
498 	    DDI_DMA_SLEEP, NULL, &buf->rb_mapping.vbm_dmah)) {
499 		sc->sc_rxfail_dma_handle++;
500 		goto exit_handle;
501 	}
502 
503 	if (ddi_dma_mem_alloc(buf->rb_mapping.vbm_dmah,
504 	    VIOIF_RX_SIZE + sizeof (struct virtio_net_hdr),
505 	    &vioif_bufattr, DDI_DMA_STREAMING, DDI_DMA_SLEEP,
506 	    NULL, &buf->rb_mapping.vbm_buf, &len, &buf->rb_mapping.vbm_acch)) {
507 		sc->sc_rxfail_dma_buffer++;
508 		goto exit_alloc;
509 	}
510 	ASSERT(len >= VIOIF_RX_SIZE);
511 
512 	if (ddi_dma_addr_bind_handle(buf->rb_mapping.vbm_dmah, NULL,
513 	    buf->rb_mapping.vbm_buf, len, DDI_DMA_READ | DDI_DMA_STREAMING,
514 	    DDI_DMA_SLEEP, NULL, &buf->rb_mapping.vbm_dmac,
515 	    &buf->rb_mapping.vbm_ncookies)) {
516 		sc->sc_rxfail_dma_bind++;
517 		goto exit_bind;
518 	}
519 
520 	ASSERT(buf->rb_mapping.vbm_ncookies <= VIOIF_INDIRECT_MAX);
521 
522 	buf->rb_sc = sc;
523 	buf->rb_frtn.free_arg = (void *) buf;
524 	buf->rb_frtn.free_func = vioif_rx_free;
525 
526 	return (0);
527 exit_bind:
528 	ddi_dma_mem_free(&buf->rb_mapping.vbm_acch);
529 exit_alloc:
530 	ddi_dma_free_handle(&buf->rb_mapping.vbm_dmah);
531 exit_handle:
532 
533 	return (ENOMEM);
534 }
535 
536 static void
537 vioif_rx_destruct(void *buffer, void *user_arg)
538 {
539 	_NOTE(ARGUNUSED(user_arg));
540 	struct vioif_rx_buf *buf = buffer;
541 
542 	ASSERT(buf->rb_mapping.vbm_acch);
543 	ASSERT(buf->rb_mapping.vbm_acch);
544 
545 	(void) ddi_dma_unbind_handle(buf->rb_mapping.vbm_dmah);
546 	ddi_dma_mem_free(&buf->rb_mapping.vbm_acch);
547 	ddi_dma_free_handle(&buf->rb_mapping.vbm_dmah);
548 }
549 
550 static void
551 vioif_free_mems(struct vioif_softc *sc)
552 {
553 	int i;
554 
555 	for (i = 0; i < sc->sc_tx_vq->vq_num; i++) {
556 		struct vioif_tx_buf *buf = &sc->sc_txbufs[i];
557 		int j;
558 
559 		/* Tear down the internal mapping. */
560 
561 		ASSERT(buf->tb_inline_mapping.vbm_acch);
562 		ASSERT(buf->tb_inline_mapping.vbm_dmah);
563 
564 		(void) ddi_dma_unbind_handle(buf->tb_inline_mapping.vbm_dmah);
565 		ddi_dma_mem_free(&buf->tb_inline_mapping.vbm_acch);
566 		ddi_dma_free_handle(&buf->tb_inline_mapping.vbm_dmah);
567 
568 		/* We should not see any in-flight buffers at this point. */
569 		ASSERT(!buf->tb_mp);
570 
571 		/* Free all the dma hdnales we allocated lazily. */
572 		for (j = 0; buf->tb_external_mapping[j].vbm_dmah; j++)
573 			ddi_dma_free_handle(
574 			    &buf->tb_external_mapping[j].vbm_dmah);
575 		/* Free the external mapping array. */
576 		kmem_free(buf->tb_external_mapping,
577 		    sizeof (struct vioif_tx_buf) * VIOIF_INDIRECT_MAX - 1);
578 	}
579 
580 	kmem_free(sc->sc_txbufs, sizeof (struct vioif_tx_buf) *
581 	    sc->sc_tx_vq->vq_num);
582 
583 	for (i = 0; i < sc->sc_rx_vq->vq_num; i++) {
584 		struct vioif_rx_buf *buf = sc->sc_rxbufs[i];
585 
586 		if (buf)
587 			kmem_cache_free(sc->sc_rxbuf_cache, buf);
588 	}
589 	kmem_free(sc->sc_rxbufs, sizeof (struct vioif_rx_buf *) *
590 	    sc->sc_rx_vq->vq_num);
591 }
592 
593 static int
594 vioif_alloc_mems(struct vioif_softc *sc)
595 {
596 	int i, txqsize, rxqsize;
597 	size_t len;
598 	unsigned int nsegments;
599 
600 	txqsize = sc->sc_tx_vq->vq_num;
601 	rxqsize = sc->sc_rx_vq->vq_num;
602 
603 	sc->sc_txbufs = kmem_zalloc(sizeof (struct vioif_tx_buf) * txqsize,
604 	    KM_SLEEP);
605 	if (sc->sc_txbufs == NULL) {
606 		dev_err(sc->sc_dev, CE_WARN,
607 		    "Failed to allocate the tx buffers array");
608 		goto exit_txalloc;
609 	}
610 
611 	/*
612 	 * We don't allocate the rx vioif_bufs, just the pointers, as
613 	 * rx vioif_bufs can be loaned upstream, and we don't know the
614 	 * total number we need.
615 	 */
616 	sc->sc_rxbufs = kmem_zalloc(sizeof (struct vioif_rx_buf *) * rxqsize,
617 	    KM_SLEEP);
618 	if (sc->sc_rxbufs == NULL) {
619 		dev_err(sc->sc_dev, CE_WARN,
620 		    "Failed to allocate the rx buffers pointer array");
621 		goto exit_rxalloc;
622 	}
623 
624 	for (i = 0; i < txqsize; i++) {
625 		struct vioif_tx_buf *buf = &sc->sc_txbufs[i];
626 
627 		/* Allocate and bind an inline mapping. */
628 
629 		if (ddi_dma_alloc_handle(sc->sc_dev,
630 		    &vioif_inline_buf_dma_attr,
631 		    DDI_DMA_SLEEP, NULL, &buf->tb_inline_mapping.vbm_dmah)) {
632 
633 			dev_err(sc->sc_dev, CE_WARN,
634 			    "Can't allocate dma handle for tx buffer %d", i);
635 			goto exit_tx;
636 		}
637 
638 		if (ddi_dma_mem_alloc(buf->tb_inline_mapping.vbm_dmah,
639 		    VIOIF_TX_INLINE_SIZE, &vioif_bufattr, DDI_DMA_STREAMING,
640 		    DDI_DMA_SLEEP, NULL, &buf->tb_inline_mapping.vbm_buf,
641 		    &len, &buf->tb_inline_mapping.vbm_acch)) {
642 
643 			dev_err(sc->sc_dev, CE_WARN,
644 			    "Can't allocate tx buffer %d", i);
645 			goto exit_tx;
646 		}
647 		ASSERT(len >= VIOIF_TX_INLINE_SIZE);
648 
649 		if (ddi_dma_addr_bind_handle(buf->tb_inline_mapping.vbm_dmah,
650 		    NULL, buf->tb_inline_mapping.vbm_buf, len,
651 		    DDI_DMA_WRITE | DDI_DMA_STREAMING, DDI_DMA_SLEEP, NULL,
652 		    &buf->tb_inline_mapping.vbm_dmac, &nsegments)) {
653 
654 			dev_err(sc->sc_dev, CE_WARN,
655 			    "Can't bind tx buffer %d", i);
656 			goto exit_tx;
657 		}
658 
659 		/* We asked for a single segment */
660 		ASSERT(nsegments == 1);
661 
662 		/*
663 		 * We allow up to VIOIF_INDIRECT_MAX - 1 external mappings.
664 		 * In reality, I don't expect more then 2-3 used, but who
665 		 * knows.
666 		 */
667 		buf->tb_external_mapping = kmem_zalloc(
668 		    sizeof (struct vioif_tx_buf) * VIOIF_INDIRECT_MAX - 1,
669 		    KM_SLEEP);
670 
671 		/*
672 		 * The external mapping's dma handles are allocate lazily,
673 		 * as we don't expect most of them to be used..
674 		 */
675 	}
676 
677 	return (0);
678 
679 exit_tx:
680 	for (i = 0; i < txqsize; i++) {
681 		struct vioif_tx_buf *buf = &sc->sc_txbufs[i];
682 
683 		if (buf->tb_inline_mapping.vbm_dmah)
684 			(void) ddi_dma_unbind_handle(
685 			    buf->tb_inline_mapping.vbm_dmah);
686 
687 		if (buf->tb_inline_mapping.vbm_acch)
688 			ddi_dma_mem_free(
689 			    &buf->tb_inline_mapping.vbm_acch);
690 
691 		if (buf->tb_inline_mapping.vbm_dmah)
692 			ddi_dma_free_handle(
693 			    &buf->tb_inline_mapping.vbm_dmah);
694 
695 		if (buf->tb_external_mapping)
696 			kmem_free(buf->tb_external_mapping,
697 			    sizeof (struct vioif_tx_buf) *
698 			    VIOIF_INDIRECT_MAX - 1);
699 	}
700 
701 	kmem_free(sc->sc_rxbufs, sizeof (struct vioif_rx_buf) * rxqsize);
702 
703 exit_rxalloc:
704 	kmem_free(sc->sc_txbufs, sizeof (struct vioif_tx_buf) * txqsize);
705 exit_txalloc:
706 	return (ENOMEM);
707 }
708 
709 /* ARGSUSED */
710 int
711 vioif_multicst(void *arg, boolean_t add, const uint8_t *macaddr)
712 {
713 	return (DDI_SUCCESS);
714 }
715 
716 /* ARGSUSED */
717 int
718 vioif_promisc(void *arg, boolean_t on)
719 {
720 	return (DDI_SUCCESS);
721 }
722 
723 /* ARGSUSED */
724 int
725 vioif_unicst(void *arg, const uint8_t *macaddr)
726 {
727 	return (DDI_FAILURE);
728 }
729 
730 
731 static uint_t
732 vioif_add_rx(struct vioif_softc *sc, int kmflag)
733 {
734 	uint_t num_added = 0;
735 	struct vq_entry *ve;
736 
737 	while ((ve = vq_alloc_entry(sc->sc_rx_vq)) != NULL) {
738 		struct vioif_rx_buf *buf = sc->sc_rxbufs[ve->qe_index];
739 
740 		if (buf == NULL) {
741 			/* First run, allocate the buffer. */
742 			buf = kmem_cache_alloc(sc->sc_rxbuf_cache, kmflag);
743 			sc->sc_rxbufs[ve->qe_index] = buf;
744 		}
745 
746 		/* Still nothing? Bye. */
747 		if (buf == NULL) {
748 			sc->sc_norecvbuf++;
749 			vq_free_entry(sc->sc_rx_vq, ve);
750 			break;
751 		}
752 
753 		ASSERT(buf->rb_mapping.vbm_ncookies >= 1);
754 
755 		/*
756 		 * For an unknown reason, the virtio_net_hdr must be placed
757 		 * as a separate virtio queue entry.
758 		 */
759 		virtio_ve_add_indirect_buf(ve,
760 		    buf->rb_mapping.vbm_dmac.dmac_laddress,
761 		    sizeof (struct virtio_net_hdr), B_FALSE);
762 
763 		/* Add the rest of the first cookie. */
764 		virtio_ve_add_indirect_buf(ve,
765 		    buf->rb_mapping.vbm_dmac.dmac_laddress +
766 		    sizeof (struct virtio_net_hdr),
767 		    buf->rb_mapping.vbm_dmac.dmac_size -
768 		    sizeof (struct virtio_net_hdr), B_FALSE);
769 
770 		/*
771 		 * If the buffer consists of a single cookie (unlikely for a
772 		 * 64-k buffer), we are done. Otherwise, add the rest of the
773 		 * cookies using indirect entries.
774 		 */
775 		if (buf->rb_mapping.vbm_ncookies > 1) {
776 			ddi_dma_cookie_t *first_extra_dmac;
777 			ddi_dma_cookie_t dmac;
778 			first_extra_dmac =
779 			    vioif_dma_curr_cookie(buf->rb_mapping.vbm_dmah);
780 
781 			ddi_dma_nextcookie(buf->rb_mapping.vbm_dmah, &dmac);
782 			virtio_ve_add_cookie(ve, buf->rb_mapping.vbm_dmah,
783 			    dmac, buf->rb_mapping.vbm_ncookies - 1, B_FALSE);
784 			vioif_dma_reset_cookie(buf->rb_mapping.vbm_dmah,
785 			    first_extra_dmac);
786 		}
787 
788 		virtio_push_chain(ve, B_FALSE);
789 		num_added++;
790 	}
791 
792 	return (num_added);
793 }
794 
795 static uint_t
796 vioif_populate_rx(struct vioif_softc *sc, int kmflag)
797 {
798 	uint_t num_added = vioif_add_rx(sc, kmflag);
799 
800 	if (num_added > 0)
801 		virtio_sync_vq(sc->sc_rx_vq);
802 
803 	return (num_added);
804 }
805 
806 static uint_t
807 vioif_process_rx(struct vioif_softc *sc)
808 {
809 	struct vq_entry *ve;
810 	struct vioif_rx_buf *buf;
811 	mblk_t *mphead = NULL, *lastmp = NULL, *mp;
812 	uint32_t len;
813 	uint_t num_processed = 0;
814 
815 	while ((ve = virtio_pull_chain(sc->sc_rx_vq, &len))) {
816 
817 		buf = sc->sc_rxbufs[ve->qe_index];
818 		ASSERT(buf);
819 
820 		if (len < sizeof (struct virtio_net_hdr)) {
821 			sc->sc_rxfail_chain_undersize++;
822 			sc->sc_ierrors++;
823 			virtio_free_chain(ve);
824 			continue;
825 		}
826 
827 		len -= sizeof (struct virtio_net_hdr);
828 		/*
829 		 * We copy small packets that happen to fit into a single
830 		 * cookie and reuse the buffers. For bigger ones, we loan
831 		 * the buffers upstream.
832 		 */
833 		if (len < sc->sc_rxcopy_thresh) {
834 			mp = allocb(len, 0);
835 			if (mp == NULL) {
836 				sc->sc_norecvbuf++;
837 				sc->sc_ierrors++;
838 
839 				virtio_free_chain(ve);
840 				break;
841 			}
842 
843 			bcopy((char *)buf->rb_mapping.vbm_buf +
844 			    sizeof (struct virtio_net_hdr), mp->b_rptr, len);
845 			mp->b_wptr = mp->b_rptr + len;
846 
847 		} else {
848 			mp = desballoc((unsigned char *)
849 			    buf->rb_mapping.vbm_buf +
850 			    sizeof (struct virtio_net_hdr) +
851 			    VIOIF_IP_ALIGN, len, 0, &buf->rb_frtn);
852 			if (mp == NULL) {
853 				sc->sc_norecvbuf++;
854 				sc->sc_ierrors++;
855 
856 				virtio_free_chain(ve);
857 				break;
858 			}
859 			mp->b_wptr = mp->b_rptr + len;
860 
861 			atomic_inc_ulong(&sc->sc_rxloan);
862 			/*
863 			 * Buffer loaned, we will have to allocate a new one
864 			 * for this slot.
865 			 */
866 			sc->sc_rxbufs[ve->qe_index] = NULL;
867 		}
868 
869 		/*
870 		 * virtio-net does not tell us if this packet is multicast
871 		 * or broadcast, so we have to check it.
872 		 */
873 		if (mp->b_rptr[0] & 0x1) {
874 			if (bcmp(mp->b_rptr, vioif_broadcast, ETHERADDRL) != 0)
875 				sc->sc_multircv++;
876 			else
877 				sc->sc_brdcstrcv++;
878 		}
879 
880 		sc->sc_rbytes += len;
881 		sc->sc_ipackets++;
882 
883 		virtio_free_chain(ve);
884 
885 		if (lastmp == NULL) {
886 			mphead = mp;
887 		} else {
888 			lastmp->b_next = mp;
889 		}
890 		lastmp = mp;
891 		num_processed++;
892 	}
893 
894 	if (mphead != NULL) {
895 		mac_rx(sc->sc_mac_handle, NULL, mphead);
896 	}
897 
898 	return (num_processed);
899 }
900 
901 static uint_t
902 vioif_reclaim_used_tx(struct vioif_softc *sc)
903 {
904 	struct vq_entry *ve;
905 	struct vioif_tx_buf *buf;
906 	uint32_t len;
907 	mblk_t *mp;
908 	uint_t num_reclaimed = 0;
909 
910 	while ((ve = virtio_pull_chain(sc->sc_tx_vq, &len))) {
911 		/* We don't chain descriptors for tx, so don't expect any. */
912 		ASSERT(!ve->qe_next);
913 
914 		buf = &sc->sc_txbufs[ve->qe_index];
915 		mp = buf->tb_mp;
916 		buf->tb_mp = NULL;
917 
918 		if (mp != NULL) {
919 			for (int i = 0; i < buf->tb_external_num; i++)
920 				(void) ddi_dma_unbind_handle(
921 				    buf->tb_external_mapping[i].vbm_dmah);
922 		}
923 
924 		virtio_free_chain(ve);
925 
926 		/* External mapping used, mp was not freed in vioif_send() */
927 		if (mp != NULL)
928 			freemsg(mp);
929 		num_reclaimed++;
930 	}
931 
932 	if (sc->sc_tx_stopped && num_reclaimed > 0) {
933 		sc->sc_tx_stopped = 0;
934 		mac_tx_update(sc->sc_mac_handle);
935 	}
936 
937 	return (num_reclaimed);
938 }
939 
940 /* sc will be used to update stat counters. */
941 /* ARGSUSED */
942 static inline void
943 vioif_tx_inline(struct vioif_softc *sc, struct vq_entry *ve, mblk_t *mp,
944     size_t msg_size)
945 {
946 	struct vioif_tx_buf *buf;
947 	buf = &sc->sc_txbufs[ve->qe_index];
948 
949 	ASSERT(buf);
950 
951 	/* Frees mp */
952 	mcopymsg(mp, buf->tb_inline_mapping.vbm_buf +
953 	    sizeof (struct virtio_net_hdr));
954 
955 	virtio_ve_add_indirect_buf(ve,
956 	    buf->tb_inline_mapping.vbm_dmac.dmac_laddress +
957 	    sizeof (struct virtio_net_hdr), msg_size, B_TRUE);
958 }
959 
960 static inline int
961 vioif_tx_lazy_handle_alloc(struct vioif_softc *sc, struct vioif_tx_buf *buf,
962     int i)
963 {
964 	int ret = DDI_SUCCESS;
965 
966 	if (!buf->tb_external_mapping[i].vbm_dmah) {
967 		ret = ddi_dma_alloc_handle(sc->sc_dev,
968 		    &vioif_mapped_buf_dma_attr, DDI_DMA_SLEEP, NULL,
969 		    &buf->tb_external_mapping[i].vbm_dmah);
970 		if (ret != DDI_SUCCESS) {
971 			sc->sc_txfail_dma_handle++;
972 		}
973 	}
974 
975 	return (ret);
976 }
977 
978 static inline int
979 vioif_tx_external(struct vioif_softc *sc, struct vq_entry *ve, mblk_t *mp,
980     size_t msg_size)
981 {
982 	_NOTE(ARGUNUSED(msg_size));
983 
984 	struct vioif_tx_buf *buf;
985 	mblk_t *nmp;
986 	int i, j;
987 	int ret = DDI_SUCCESS;
988 
989 	buf = &sc->sc_txbufs[ve->qe_index];
990 
991 	ASSERT(buf);
992 
993 	buf->tb_external_num = 0;
994 	i = 0;
995 	nmp = mp;
996 
997 	while (nmp) {
998 		size_t len;
999 		ddi_dma_cookie_t dmac;
1000 		unsigned int ncookies;
1001 
1002 		len = MBLKL(nmp);
1003 		/*
1004 		 * For some reason, the network stack can
1005 		 * actually send us zero-length fragments.
1006 		 */
1007 		if (len == 0) {
1008 			nmp = nmp->b_cont;
1009 			continue;
1010 		}
1011 
1012 		ret = vioif_tx_lazy_handle_alloc(sc, buf, i);
1013 		if (ret != DDI_SUCCESS) {
1014 			sc->sc_notxbuf++;
1015 			sc->sc_oerrors++;
1016 			goto exit_lazy_alloc;
1017 		}
1018 		ret = ddi_dma_addr_bind_handle(
1019 		    buf->tb_external_mapping[i].vbm_dmah, NULL,
1020 		    (caddr_t)nmp->b_rptr, len,
1021 		    DDI_DMA_WRITE | DDI_DMA_STREAMING,
1022 		    DDI_DMA_SLEEP, NULL, &dmac, &ncookies);
1023 
1024 		if (ret != DDI_SUCCESS) {
1025 			sc->sc_txfail_dma_bind++;
1026 			sc->sc_oerrors++;
1027 			goto exit_bind;
1028 		}
1029 
1030 		/* Check if we still fit into the indirect table. */
1031 		if (virtio_ve_indirect_available(ve) < ncookies) {
1032 			sc->sc_txfail_indirect_limit++;
1033 			sc->sc_notxbuf++;
1034 			sc->sc_oerrors++;
1035 
1036 			ret = DDI_FAILURE;
1037 			goto exit_limit;
1038 		}
1039 
1040 		virtio_ve_add_cookie(ve, buf->tb_external_mapping[i].vbm_dmah,
1041 		    dmac, ncookies, B_TRUE);
1042 
1043 		nmp = nmp->b_cont;
1044 		i++;
1045 	}
1046 
1047 	buf->tb_external_num = i;
1048 	/* Save the mp to free it when the packet is sent. */
1049 	buf->tb_mp = mp;
1050 
1051 	return (DDI_SUCCESS);
1052 
1053 exit_limit:
1054 exit_bind:
1055 exit_lazy_alloc:
1056 
1057 	for (j = 0; j < i; j++) {
1058 		(void) ddi_dma_unbind_handle(
1059 		    buf->tb_external_mapping[j].vbm_dmah);
1060 	}
1061 
1062 	return (ret);
1063 }
1064 
1065 static boolean_t
1066 vioif_send(struct vioif_softc *sc, mblk_t *mp)
1067 {
1068 	struct vq_entry *ve;
1069 	struct vioif_tx_buf *buf;
1070 	struct virtio_net_hdr *net_header = NULL;
1071 	size_t msg_size = 0;
1072 	uint32_t csum_start;
1073 	uint32_t csum_stuff;
1074 	uint32_t csum_flags;
1075 	uint32_t lso_flags;
1076 	uint32_t lso_mss;
1077 	mblk_t *nmp;
1078 	int ret;
1079 	boolean_t lso_required = B_FALSE;
1080 
1081 	for (nmp = mp; nmp; nmp = nmp->b_cont)
1082 		msg_size += MBLKL(nmp);
1083 
1084 	if (sc->sc_tx_tso4) {
1085 		mac_lso_get(mp, &lso_mss, &lso_flags);
1086 		lso_required = (lso_flags & HW_LSO);
1087 	}
1088 
1089 	ve = vq_alloc_entry(sc->sc_tx_vq);
1090 
1091 	if (ve == NULL) {
1092 		sc->sc_notxbuf++;
1093 		/* Out of free descriptors - try later. */
1094 		return (B_FALSE);
1095 	}
1096 	buf = &sc->sc_txbufs[ve->qe_index];
1097 
1098 	/* Use the inline buffer of the first entry for the virtio_net_hdr. */
1099 	(void) memset(buf->tb_inline_mapping.vbm_buf, 0,
1100 	    sizeof (struct virtio_net_hdr));
1101 
1102 	net_header = (struct virtio_net_hdr *)buf->tb_inline_mapping.vbm_buf;
1103 
1104 	mac_hcksum_get(mp, &csum_start, &csum_stuff, NULL,
1105 	    NULL, &csum_flags);
1106 
1107 	/* They want us to do the TCP/UDP csum calculation. */
1108 	if (csum_flags & HCK_PARTIALCKSUM) {
1109 		struct ether_header *eth_header;
1110 		int eth_hsize;
1111 
1112 		/* Did we ask for it? */
1113 		ASSERT(sc->sc_tx_csum);
1114 
1115 		/* We only asked for partial csum packets. */
1116 		ASSERT(!(csum_flags & HCK_IPV4_HDRCKSUM));
1117 		ASSERT(!(csum_flags & HCK_FULLCKSUM));
1118 
1119 		eth_header = (void *) mp->b_rptr;
1120 		if (eth_header->ether_type == htons(ETHERTYPE_VLAN)) {
1121 			eth_hsize = sizeof (struct ether_vlan_header);
1122 		} else {
1123 			eth_hsize = sizeof (struct ether_header);
1124 		}
1125 		net_header->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
1126 		net_header->csum_start = eth_hsize + csum_start;
1127 		net_header->csum_offset = csum_stuff - csum_start;
1128 	}
1129 
1130 	/* setup LSO fields if required */
1131 	if (lso_required) {
1132 		net_header->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1133 		net_header->gso_size = (uint16_t)lso_mss;
1134 	}
1135 
1136 	virtio_ve_add_indirect_buf(ve,
1137 	    buf->tb_inline_mapping.vbm_dmac.dmac_laddress,
1138 	    sizeof (struct virtio_net_hdr), B_TRUE);
1139 
1140 	/* meanwhile update the statistic */
1141 	if (mp->b_rptr[0] & 0x1) {
1142 		if (bcmp(mp->b_rptr, vioif_broadcast, ETHERADDRL) != 0)
1143 			sc->sc_multixmt++;
1144 		else
1145 			sc->sc_brdcstxmt++;
1146 	}
1147 
1148 	/*
1149 	 * We copy small packets into the inline buffer. The bigger ones
1150 	 * get mapped using the mapped buffer.
1151 	 */
1152 	if (msg_size < sc->sc_txcopy_thresh) {
1153 		vioif_tx_inline(sc, ve, mp, msg_size);
1154 	} else {
1155 		/* statistic gets updated by vioif_tx_external when fail */
1156 		ret = vioif_tx_external(sc, ve, mp, msg_size);
1157 		if (ret != DDI_SUCCESS)
1158 			goto exit_tx_external;
1159 	}
1160 
1161 	virtio_push_chain(ve, B_TRUE);
1162 
1163 	sc->sc_opackets++;
1164 	sc->sc_obytes += msg_size;
1165 
1166 	return (B_TRUE);
1167 
1168 exit_tx_external:
1169 
1170 	vq_free_entry(sc->sc_tx_vq, ve);
1171 	/*
1172 	 * vioif_tx_external can fail when the buffer does not fit into the
1173 	 * indirect descriptor table. Free the mp. I don't expect this ever
1174 	 * to happen.
1175 	 */
1176 	freemsg(mp);
1177 
1178 	return (B_TRUE);
1179 }
1180 
1181 mblk_t *
1182 vioif_tx(void *arg, mblk_t *mp)
1183 {
1184 	struct vioif_softc *sc = arg;
1185 	mblk_t	*nmp;
1186 
1187 	while (mp != NULL) {
1188 		nmp = mp->b_next;
1189 		mp->b_next = NULL;
1190 
1191 		if (!vioif_send(sc, mp)) {
1192 			sc->sc_tx_stopped = 1;
1193 			mp->b_next = nmp;
1194 			break;
1195 		}
1196 		mp = nmp;
1197 	}
1198 
1199 	return (mp);
1200 }
1201 
1202 int
1203 vioif_start(void *arg)
1204 {
1205 	struct vioif_softc *sc = arg;
1206 	struct vq_entry *ve;
1207 	uint32_t len;
1208 
1209 	mac_link_update(sc->sc_mac_handle, vioif_link_state(sc));
1210 
1211 	virtio_start_vq_intr(sc->sc_rx_vq);
1212 
1213 	/*
1214 	 * Don't start interrupts on sc_tx_vq. We use VIRTIO_F_NOTIFY_ON_EMPTY,
1215 	 * so the device will send a transmit interrupt when the queue is empty
1216 	 * and we can reclaim it in one sweep.
1217 	 */
1218 
1219 	/*
1220 	 * Clear any data that arrived early on the receive queue and populate
1221 	 * it with free buffers that the device can use moving forward.
1222 	 */
1223 	while ((ve = virtio_pull_chain(sc->sc_rx_vq, &len)) != NULL) {
1224 		virtio_free_chain(ve);
1225 	}
1226 	(void) vioif_populate_rx(sc, KM_SLEEP);
1227 
1228 	return (DDI_SUCCESS);
1229 }
1230 
1231 void
1232 vioif_stop(void *arg)
1233 {
1234 	struct vioif_softc *sc = arg;
1235 
1236 	virtio_stop_vq_intr(sc->sc_rx_vq);
1237 }
1238 
1239 /* ARGSUSED */
1240 static int
1241 vioif_stat(void *arg, uint_t stat, uint64_t *val)
1242 {
1243 	struct vioif_softc *sc = arg;
1244 
1245 	switch (stat) {
1246 	case MAC_STAT_IERRORS:
1247 		*val = sc->sc_ierrors;
1248 		break;
1249 	case MAC_STAT_OERRORS:
1250 		*val = sc->sc_oerrors;
1251 		break;
1252 	case MAC_STAT_MULTIRCV:
1253 		*val = sc->sc_multircv;
1254 		break;
1255 	case MAC_STAT_BRDCSTRCV:
1256 		*val = sc->sc_brdcstrcv;
1257 		break;
1258 	case MAC_STAT_MULTIXMT:
1259 		*val = sc->sc_multixmt;
1260 		break;
1261 	case MAC_STAT_BRDCSTXMT:
1262 		*val = sc->sc_brdcstxmt;
1263 		break;
1264 	case MAC_STAT_IPACKETS:
1265 		*val = sc->sc_ipackets;
1266 		break;
1267 	case MAC_STAT_RBYTES:
1268 		*val = sc->sc_rbytes;
1269 		break;
1270 	case MAC_STAT_OPACKETS:
1271 		*val = sc->sc_opackets;
1272 		break;
1273 	case MAC_STAT_OBYTES:
1274 		*val = sc->sc_obytes;
1275 		break;
1276 	case MAC_STAT_NORCVBUF:
1277 		*val = sc->sc_norecvbuf;
1278 		break;
1279 	case MAC_STAT_NOXMTBUF:
1280 		*val = sc->sc_notxbuf;
1281 		break;
1282 	case MAC_STAT_IFSPEED:
1283 		/* always 1 Gbit */
1284 		*val = 1000000000ULL;
1285 		break;
1286 	case ETHER_STAT_LINK_DUPLEX:
1287 		/* virtual device, always full-duplex */
1288 		*val = LINK_DUPLEX_FULL;
1289 		break;
1290 
1291 	default:
1292 		return (ENOTSUP);
1293 	}
1294 
1295 	return (DDI_SUCCESS);
1296 }
1297 
1298 static int
1299 vioif_set_prop_private(struct vioif_softc *sc, const char *pr_name,
1300     uint_t pr_valsize, const void *pr_val)
1301 {
1302 	_NOTE(ARGUNUSED(pr_valsize));
1303 
1304 	long result;
1305 
1306 	if (strcmp(pr_name, vioif_txcopy_thresh) == 0) {
1307 
1308 		if (pr_val == NULL)
1309 			return (EINVAL);
1310 
1311 		(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
1312 
1313 		if (result < 0 || result > VIOIF_TX_THRESH_MAX)
1314 			return (EINVAL);
1315 		sc->sc_txcopy_thresh = result;
1316 	}
1317 	if (strcmp(pr_name, vioif_rxcopy_thresh) == 0) {
1318 
1319 		if (pr_val == NULL)
1320 			return (EINVAL);
1321 
1322 		(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
1323 
1324 		if (result < 0 || result > VIOIF_RX_THRESH_MAX)
1325 			return (EINVAL);
1326 		sc->sc_rxcopy_thresh = result;
1327 	}
1328 	return (0);
1329 }
1330 
1331 static int
1332 vioif_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
1333     uint_t pr_valsize, const void *pr_val)
1334 {
1335 	struct vioif_softc *sc = arg;
1336 	const uint32_t *new_mtu;
1337 	int err;
1338 
1339 	switch (pr_num) {
1340 	case MAC_PROP_MTU:
1341 		new_mtu = pr_val;
1342 
1343 		if (*new_mtu > MAX_MTU) {
1344 			return (EINVAL);
1345 		}
1346 
1347 		err = mac_maxsdu_update(sc->sc_mac_handle, *new_mtu);
1348 		if (err) {
1349 			return (err);
1350 		}
1351 		break;
1352 	case MAC_PROP_PRIVATE:
1353 		err = vioif_set_prop_private(sc, pr_name,
1354 		    pr_valsize, pr_val);
1355 		if (err)
1356 			return (err);
1357 		break;
1358 	default:
1359 		return (ENOTSUP);
1360 	}
1361 
1362 	return (0);
1363 }
1364 
1365 static int
1366 vioif_get_prop_private(struct vioif_softc *sc, const char *pr_name,
1367     uint_t pr_valsize, void *pr_val)
1368 {
1369 	int err = ENOTSUP;
1370 	int value;
1371 
1372 	if (strcmp(pr_name, vioif_txcopy_thresh) == 0) {
1373 
1374 		value = sc->sc_txcopy_thresh;
1375 		err = 0;
1376 		goto done;
1377 	}
1378 	if (strcmp(pr_name, vioif_rxcopy_thresh) == 0) {
1379 
1380 		value = sc->sc_rxcopy_thresh;
1381 		err = 0;
1382 		goto done;
1383 	}
1384 done:
1385 	if (err == 0) {
1386 		(void) snprintf(pr_val, pr_valsize, "%d", value);
1387 	}
1388 	return (err);
1389 }
1390 
1391 static int
1392 vioif_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
1393     uint_t pr_valsize, void *pr_val)
1394 {
1395 	struct vioif_softc *sc = arg;
1396 	int err = ENOTSUP;
1397 
1398 	switch (pr_num) {
1399 	case MAC_PROP_PRIVATE:
1400 		err = vioif_get_prop_private(sc, pr_name,
1401 		    pr_valsize, pr_val);
1402 		break;
1403 	default:
1404 		break;
1405 	}
1406 	return (err);
1407 }
1408 
1409 static void
1410 vioif_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num,
1411     mac_prop_info_handle_t prh)
1412 {
1413 	struct vioif_softc *sc = arg;
1414 	char valstr[64];
1415 	int value;
1416 
1417 	switch (pr_num) {
1418 	case MAC_PROP_MTU:
1419 		mac_prop_info_set_range_uint32(prh, ETHERMIN, MAX_MTU);
1420 		break;
1421 
1422 	case MAC_PROP_PRIVATE:
1423 		bzero(valstr, sizeof (valstr));
1424 		if (strcmp(pr_name, vioif_txcopy_thresh) == 0) {
1425 			value = sc->sc_txcopy_thresh;
1426 		} else if (strcmp(pr_name, vioif_rxcopy_thresh) == 0) {
1427 			value = sc->sc_rxcopy_thresh;
1428 		} else {
1429 			return;
1430 		}
1431 		(void) snprintf(valstr, sizeof (valstr), "%d", value);
1432 		break;
1433 
1434 	default:
1435 		break;
1436 	}
1437 }
1438 
1439 static boolean_t
1440 vioif_getcapab(void *arg, mac_capab_t cap, void *cap_data)
1441 {
1442 	struct vioif_softc *sc = arg;
1443 
1444 	switch (cap) {
1445 	case MAC_CAPAB_HCKSUM:
1446 		if (sc->sc_tx_csum) {
1447 			uint32_t *txflags = cap_data;
1448 
1449 			*txflags = HCKSUM_INET_PARTIAL;
1450 			return (B_TRUE);
1451 		}
1452 		return (B_FALSE);
1453 	case MAC_CAPAB_LSO:
1454 		if (sc->sc_tx_tso4) {
1455 			mac_capab_lso_t *cap_lso = cap_data;
1456 
1457 			cap_lso->lso_flags = LSO_TX_BASIC_TCP_IPV4;
1458 			cap_lso->lso_basic_tcp_ipv4.lso_max = MAX_MTU;
1459 			return (B_TRUE);
1460 		}
1461 		return (B_FALSE);
1462 	default:
1463 		break;
1464 	}
1465 	return (B_FALSE);
1466 }
1467 
1468 static mac_callbacks_t vioif_m_callbacks = {
1469 	.mc_callbacks	= (MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO),
1470 	.mc_getstat	= vioif_stat,
1471 	.mc_start	= vioif_start,
1472 	.mc_stop	= vioif_stop,
1473 	.mc_setpromisc	= vioif_promisc,
1474 	.mc_multicst	= vioif_multicst,
1475 	.mc_unicst	= vioif_unicst,
1476 	.mc_tx		= vioif_tx,
1477 	/* Optional callbacks */
1478 	.mc_reserved	= NULL,		/* reserved */
1479 	.mc_ioctl	= NULL,		/* mc_ioctl */
1480 	.mc_getcapab	= vioif_getcapab,		/* mc_getcapab */
1481 	.mc_open	= NULL,		/* mc_open */
1482 	.mc_close	= NULL,		/* mc_close */
1483 	.mc_setprop	= vioif_setprop,
1484 	.mc_getprop	= vioif_getprop,
1485 	.mc_propinfo	= vioif_propinfo,
1486 };
1487 
1488 static void
1489 vioif_show_features(struct vioif_softc *sc, const char *prefix,
1490     uint32_t features)
1491 {
1492 	char buf[512];
1493 	char *bufp = buf;
1494 	char *bufend = buf + sizeof (buf);
1495 
1496 	/* LINTED E_PTRDIFF_OVERFLOW */
1497 	bufp += snprintf(bufp, bufend - bufp, prefix);
1498 	/* LINTED E_PTRDIFF_OVERFLOW */
1499 	bufp += virtio_show_features(features, bufp, bufend - bufp);
1500 	*bufp = '\0';
1501 
1502 	/* Using '!' to only CE_NOTE this to the system log. */
1503 	dev_err(sc->sc_dev, CE_NOTE, "!%s Vioif (%b)", buf, features,
1504 	    VIRTIO_NET_FEATURE_BITS);
1505 }
1506 
1507 /*
1508  * Find out which features are supported by the device and
1509  * choose which ones we wish to use.
1510  */
1511 static int
1512 vioif_dev_features(struct vioif_softc *sc)
1513 {
1514 	uint32_t host_features;
1515 
1516 	host_features = virtio_negotiate_features(&sc->sc_virtio,
1517 	    VIRTIO_NET_F_CSUM |
1518 	    VIRTIO_NET_F_HOST_TSO4 |
1519 	    VIRTIO_NET_F_HOST_ECN |
1520 	    VIRTIO_NET_F_MAC |
1521 	    VIRTIO_NET_F_STATUS |
1522 	    VIRTIO_F_RING_INDIRECT_DESC |
1523 	    VIRTIO_F_NOTIFY_ON_EMPTY);
1524 
1525 	vioif_show_features(sc, "Host features: ", host_features);
1526 	vioif_show_features(sc, "Negotiated features: ",
1527 	    sc->sc_virtio.sc_features);
1528 
1529 	if (!(sc->sc_virtio.sc_features & VIRTIO_F_RING_INDIRECT_DESC)) {
1530 		dev_err(sc->sc_dev, CE_WARN,
1531 		    "Host does not support RING_INDIRECT_DESC. Cannot attach.");
1532 		return (DDI_FAILURE);
1533 	}
1534 
1535 	return (DDI_SUCCESS);
1536 }
1537 
1538 static int
1539 vioif_has_feature(struct vioif_softc *sc, uint32_t feature)
1540 {
1541 	return (virtio_has_feature(&sc->sc_virtio, feature));
1542 }
1543 
1544 static void
1545 vioif_set_mac(struct vioif_softc *sc)
1546 {
1547 	int i;
1548 
1549 	for (i = 0; i < ETHERADDRL; i++) {
1550 		virtio_write_device_config_1(&sc->sc_virtio,
1551 		    VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
1552 	}
1553 	sc->sc_mac_from_host = 0;
1554 }
1555 
1556 /* Get the mac address out of the hardware, or make up one. */
1557 static void
1558 vioif_get_mac(struct vioif_softc *sc)
1559 {
1560 	int i;
1561 	if (sc->sc_virtio.sc_features & VIRTIO_NET_F_MAC) {
1562 		for (i = 0; i < ETHERADDRL; i++) {
1563 			sc->sc_mac[i] = virtio_read_device_config_1(
1564 			    &sc->sc_virtio,
1565 			    VIRTIO_NET_CONFIG_MAC + i);
1566 		}
1567 		sc->sc_mac_from_host = 1;
1568 	} else {
1569 		/* Get a few random bytes */
1570 		(void) random_get_pseudo_bytes(sc->sc_mac, ETHERADDRL);
1571 		/* Make sure it's a unicast MAC */
1572 		sc->sc_mac[0] &= ~1;
1573 		/* Set the "locally administered" bit */
1574 		sc->sc_mac[1] |= 2;
1575 
1576 		vioif_set_mac(sc);
1577 
1578 		dev_err(sc->sc_dev, CE_NOTE,
1579 		    "!Generated a random MAC address: %s",
1580 		    ether_sprintf((struct ether_addr *)sc->sc_mac));
1581 	}
1582 }
1583 
1584 /*
1585  * Virtqueue interrupt handlers
1586  */
1587 /* ARGSUSED */
1588 uint_t
1589 vioif_rx_handler(caddr_t arg1, caddr_t arg2)
1590 {
1591 	struct virtio_softc *vsc = (void *) arg1;
1592 	struct vioif_softc *sc = __containerof(vsc,
1593 	    struct vioif_softc, sc_virtio);
1594 
1595 	/*
1596 	 * The return values of these functions are not needed but they make
1597 	 * debugging interrupts simpler because you can use them to detect when
1598 	 * stuff was processed and repopulated in this handler.
1599 	 */
1600 	(void) vioif_process_rx(sc);
1601 	(void) vioif_populate_rx(sc, KM_NOSLEEP);
1602 
1603 	return (DDI_INTR_CLAIMED);
1604 }
1605 
1606 /* ARGSUSED */
1607 uint_t
1608 vioif_tx_handler(caddr_t arg1, caddr_t arg2)
1609 {
1610 	struct virtio_softc *vsc = (void *)arg1;
1611 	struct vioif_softc *sc = __containerof(vsc,
1612 	    struct vioif_softc, sc_virtio);
1613 
1614 	/*
1615 	 * The return value of this function is not needed but makes debugging
1616 	 * interrupts simpler because you can use it to detect if anything was
1617 	 * reclaimed in this handler.
1618 	 */
1619 	(void) vioif_reclaim_used_tx(sc);
1620 
1621 	return (DDI_INTR_CLAIMED);
1622 }
1623 
1624 static int
1625 vioif_register_ints(struct vioif_softc *sc)
1626 {
1627 	int ret;
1628 
1629 	struct virtio_int_handler vioif_vq_h[] = {
1630 		{ vioif_rx_handler },
1631 		{ vioif_tx_handler },
1632 		{ NULL }
1633 	};
1634 
1635 	ret = virtio_register_ints(&sc->sc_virtio, NULL, vioif_vq_h);
1636 
1637 	return (ret);
1638 }
1639 
1640 
1641 static void
1642 vioif_check_features(struct vioif_softc *sc)
1643 {
1644 	if (vioif_has_feature(sc, VIRTIO_NET_F_CSUM)) {
1645 		/* The GSO/GRO featured depend on CSUM, check them here. */
1646 		sc->sc_tx_csum = 1;
1647 		sc->sc_rx_csum = 1;
1648 
1649 		if (!vioif_has_feature(sc, VIRTIO_NET_F_GUEST_CSUM)) {
1650 			sc->sc_rx_csum = 0;
1651 		}
1652 		dev_err(sc->sc_dev, CE_NOTE, "!Csum enabled.");
1653 
1654 		if (vioif_has_feature(sc, VIRTIO_NET_F_HOST_TSO4)) {
1655 
1656 			sc->sc_tx_tso4 = 1;
1657 			/*
1658 			 * We don't seem to have a way to ask the system
1659 			 * not to send us LSO packets with Explicit
1660 			 * Congestion Notification bit set, so we require
1661 			 * the device to support it in order to do
1662 			 * LSO.
1663 			 */
1664 			if (!vioif_has_feature(sc, VIRTIO_NET_F_HOST_ECN)) {
1665 				dev_err(sc->sc_dev, CE_NOTE,
1666 				    "!TSO4 supported, but not ECN. "
1667 				    "Not using LSO.");
1668 				sc->sc_tx_tso4 = 0;
1669 			} else {
1670 				dev_err(sc->sc_dev, CE_NOTE, "!LSO enabled");
1671 			}
1672 		}
1673 	}
1674 }
1675 
1676 static int
1677 vioif_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd)
1678 {
1679 	int ret, instance;
1680 	struct vioif_softc *sc;
1681 	struct virtio_softc *vsc;
1682 	mac_register_t *macp;
1683 	char cache_name[CACHE_NAME_SIZE];
1684 
1685 	instance = ddi_get_instance(devinfo);
1686 
1687 	switch (cmd) {
1688 	case DDI_ATTACH:
1689 		break;
1690 
1691 	case DDI_RESUME:
1692 	case DDI_PM_RESUME:
1693 		/* We do not support suspend/resume for vioif. */
1694 		goto exit;
1695 
1696 	default:
1697 		goto exit;
1698 	}
1699 
1700 	sc = kmem_zalloc(sizeof (struct vioif_softc), KM_SLEEP);
1701 	ddi_set_driver_private(devinfo, sc);
1702 
1703 	vsc = &sc->sc_virtio;
1704 
1705 	/* Duplicate for less typing */
1706 	sc->sc_dev = devinfo;
1707 	vsc->sc_dev = devinfo;
1708 
1709 	/*
1710 	 * Initialize interrupt kstat.
1711 	 */
1712 	sc->sc_intrstat = kstat_create("vioif", instance, "intr", "controller",
1713 	    KSTAT_TYPE_INTR, 1, 0);
1714 	if (sc->sc_intrstat == NULL) {
1715 		dev_err(devinfo, CE_WARN, "kstat_create failed");
1716 		goto exit_intrstat;
1717 	}
1718 	kstat_install(sc->sc_intrstat);
1719 
1720 	/* map BAR 0 */
1721 	ret = ddi_regs_map_setup(devinfo, 1,
1722 	    (caddr_t *)&sc->sc_virtio.sc_io_addr,
1723 	    0, 0, &vioif_attr, &sc->sc_virtio.sc_ioh);
1724 	if (ret != DDI_SUCCESS) {
1725 		dev_err(devinfo, CE_WARN, "unable to map bar 0: %d", ret);
1726 		goto exit_map;
1727 	}
1728 
1729 	virtio_device_reset(&sc->sc_virtio);
1730 	virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_ACK);
1731 	virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER);
1732 
1733 	ret = vioif_dev_features(sc);
1734 	if (ret)
1735 		goto exit_features;
1736 
1737 	vsc->sc_nvqs = vioif_has_feature(sc, VIRTIO_NET_F_CTRL_VQ) ? 3 : 2;
1738 
1739 	(void) snprintf(cache_name, CACHE_NAME_SIZE, "vioif%d_rx", instance);
1740 	sc->sc_rxbuf_cache = kmem_cache_create(cache_name,
1741 	    sizeof (struct vioif_rx_buf), 0, vioif_rx_construct,
1742 	    vioif_rx_destruct, NULL, sc, NULL, KM_SLEEP);
1743 	if (sc->sc_rxbuf_cache == NULL) {
1744 		dev_err(sc->sc_dev, CE_WARN, "Can't allocate the buffer cache");
1745 		goto exit_cache;
1746 	}
1747 
1748 	ret = vioif_register_ints(sc);
1749 	if (ret) {
1750 		dev_err(sc->sc_dev, CE_WARN,
1751 		    "Failed to allocate interrupt(s)!");
1752 		goto exit_ints;
1753 	}
1754 
1755 	/*
1756 	 * Register layout determined, can now access the
1757 	 * device-specific bits
1758 	 */
1759 	vioif_get_mac(sc);
1760 
1761 	sc->sc_rx_vq = virtio_alloc_vq(&sc->sc_virtio, 0,
1762 	    VIOIF_RX_QLEN, VIOIF_INDIRECT_MAX, "rx");
1763 	if (!sc->sc_rx_vq)
1764 		goto exit_alloc1;
1765 	virtio_stop_vq_intr(sc->sc_rx_vq);
1766 
1767 	sc->sc_tx_vq = virtio_alloc_vq(&sc->sc_virtio, 1,
1768 	    VIOIF_TX_QLEN, VIOIF_INDIRECT_MAX, "tx");
1769 	if (!sc->sc_tx_vq)
1770 		goto exit_alloc2;
1771 	virtio_stop_vq_intr(sc->sc_tx_vq);
1772 
1773 	if (vioif_has_feature(sc, VIRTIO_NET_F_CTRL_VQ)) {
1774 		sc->sc_ctrl_vq = virtio_alloc_vq(&sc->sc_virtio, 2,
1775 		    VIOIF_CTRL_QLEN, 0, "ctrl");
1776 		if (!sc->sc_ctrl_vq) {
1777 			goto exit_alloc3;
1778 		}
1779 		virtio_stop_vq_intr(sc->sc_ctrl_vq);
1780 	}
1781 
1782 	virtio_set_status(&sc->sc_virtio,
1783 	    VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
1784 
1785 	sc->sc_rxloan = 0;
1786 
1787 	/* set some reasonable-small default values */
1788 	sc->sc_rxcopy_thresh = 300;
1789 	sc->sc_txcopy_thresh = 300;
1790 	sc->sc_mtu = ETHERMTU;
1791 
1792 	vioif_check_features(sc);
1793 
1794 	if (vioif_alloc_mems(sc) != 0)
1795 		goto exit_alloc_mems;
1796 
1797 	if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
1798 		dev_err(devinfo, CE_WARN, "Failed to allocate a mac_register");
1799 		goto exit_macalloc;
1800 	}
1801 
1802 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1803 	macp->m_driver = sc;
1804 	macp->m_dip = devinfo;
1805 	macp->m_src_addr = sc->sc_mac;
1806 	macp->m_callbacks = &vioif_m_callbacks;
1807 	macp->m_min_sdu = 0;
1808 	macp->m_max_sdu = sc->sc_mtu;
1809 	macp->m_margin = VLAN_TAGSZ;
1810 	macp->m_priv_props = vioif_priv_props;
1811 
1812 	sc->sc_macp = macp;
1813 
1814 	/* Pre-fill the rx ring. */
1815 	(void) vioif_populate_rx(sc, KM_SLEEP);
1816 
1817 	ret = mac_register(macp, &sc->sc_mac_handle);
1818 	if (ret != 0) {
1819 		dev_err(devinfo, CE_WARN, "vioif_attach: "
1820 		    "mac_register() failed, ret=%d", ret);
1821 		goto exit_register;
1822 	}
1823 
1824 	ret = virtio_enable_ints(&sc->sc_virtio);
1825 	if (ret) {
1826 		dev_err(devinfo, CE_WARN, "Failed to enable interrupts");
1827 		goto exit_enable_ints;
1828 	}
1829 
1830 	mac_link_update(sc->sc_mac_handle, LINK_STATE_UP);
1831 	return (DDI_SUCCESS);
1832 
1833 exit_enable_ints:
1834 	(void) mac_unregister(sc->sc_mac_handle);
1835 exit_register:
1836 	mac_free(macp);
1837 exit_macalloc:
1838 	vioif_free_mems(sc);
1839 exit_alloc_mems:
1840 	virtio_release_ints(&sc->sc_virtio);
1841 	if (sc->sc_ctrl_vq)
1842 		virtio_free_vq(sc->sc_ctrl_vq);
1843 exit_alloc3:
1844 	virtio_free_vq(sc->sc_tx_vq);
1845 exit_alloc2:
1846 	virtio_free_vq(sc->sc_rx_vq);
1847 exit_alloc1:
1848 exit_ints:
1849 	kmem_cache_destroy(sc->sc_rxbuf_cache);
1850 exit_cache:
1851 exit_features:
1852 	virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
1853 	ddi_regs_map_free(&sc->sc_virtio.sc_ioh);
1854 exit_intrstat:
1855 exit_map:
1856 	kstat_delete(sc->sc_intrstat);
1857 	kmem_free(sc, sizeof (struct vioif_softc));
1858 exit:
1859 	return (DDI_FAILURE);
1860 }
1861 
1862 static int
1863 vioif_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd)
1864 {
1865 	struct vioif_softc *sc;
1866 
1867 	if ((sc = ddi_get_driver_private(devinfo)) == NULL)
1868 		return (DDI_FAILURE);
1869 
1870 	switch (cmd) {
1871 	case DDI_DETACH:
1872 		break;
1873 
1874 	case DDI_PM_SUSPEND:
1875 		/* We do not support suspend/resume for vioif. */
1876 		return (DDI_FAILURE);
1877 
1878 	default:
1879 		return (DDI_FAILURE);
1880 	}
1881 
1882 	if (sc->sc_rxloan > 0) {
1883 		dev_err(devinfo, CE_WARN, "!Some rx buffers are still upstream,"
1884 		    " not detaching.");
1885 		return (DDI_FAILURE);
1886 	}
1887 
1888 	virtio_stop_vq_intr(sc->sc_rx_vq);
1889 	virtio_stop_vq_intr(sc->sc_tx_vq);
1890 
1891 	virtio_release_ints(&sc->sc_virtio);
1892 
1893 	if (mac_unregister(sc->sc_mac_handle)) {
1894 		return (DDI_FAILURE);
1895 	}
1896 
1897 	mac_free(sc->sc_macp);
1898 
1899 	vioif_free_mems(sc);
1900 	virtio_free_vq(sc->sc_rx_vq);
1901 	virtio_free_vq(sc->sc_tx_vq);
1902 
1903 	virtio_device_reset(&sc->sc_virtio);
1904 
1905 	ddi_regs_map_free(&sc->sc_virtio.sc_ioh);
1906 
1907 	kmem_cache_destroy(sc->sc_rxbuf_cache);
1908 	kstat_delete(sc->sc_intrstat);
1909 	kmem_free(sc, sizeof (struct vioif_softc));
1910 
1911 	return (DDI_SUCCESS);
1912 }
1913 
1914 static int
1915 vioif_quiesce(dev_info_t *devinfo)
1916 {
1917 	struct vioif_softc *sc;
1918 
1919 	if ((sc = ddi_get_driver_private(devinfo)) == NULL)
1920 		return (DDI_FAILURE);
1921 
1922 	virtio_stop_vq_intr(sc->sc_rx_vq);
1923 	virtio_stop_vq_intr(sc->sc_tx_vq);
1924 	virtio_device_reset(&sc->sc_virtio);
1925 
1926 	return (DDI_SUCCESS);
1927 }
1928 
1929 int
1930 _init(void)
1931 {
1932 	int ret = 0;
1933 
1934 	mac_init_ops(&vioif_ops, "vioif");
1935 
1936 	ret = mod_install(&modlinkage);
1937 	if (ret != DDI_SUCCESS) {
1938 		mac_fini_ops(&vioif_ops);
1939 		return (ret);
1940 	}
1941 
1942 	return (0);
1943 }
1944 
1945 int
1946 _fini(void)
1947 {
1948 	int ret;
1949 
1950 	ret = mod_remove(&modlinkage);
1951 	if (ret == DDI_SUCCESS) {
1952 		mac_fini_ops(&vioif_ops);
1953 	}
1954 
1955 	return (ret);
1956 }
1957 
1958 int
1959 _info(struct modinfo *pModinfo)
1960 {
1961 	return (mod_info(&modlinkage, pModinfo));
1962 }
1963