xref: /titanic_50/usr/src/uts/common/io/vioif/vioif.c (revision a0f9c00cd82d49a710cee8aeb83ce42b5fb293a9)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2013 Nexenta Inc.  All rights reserved.
14  * Copyright (c) 2014, 2015 by Delphix. All rights reserved.
15  */
16 
17 /* Based on the NetBSD virtio driver by Minoura Makoto. */
18 /*
19  * Copyright (c) 2010 Minoura Makoto.
20  * All rights reserved.
21  *
22  * Redistribution and use in source and binary forms, with or without
23  * modification, are permitted provided that the following conditions
24  * are met:
25  * 1. Redistributions of source code must retain the above copyright
26  *    notice, this list of conditions and the following disclaimer.
27  * 2. Redistributions in binary form must reproduce the above copyright
28  *    notice, this list of conditions and the following disclaimer in the
29  *    documentation and/or other materials provided with the distribution.
30  *
31  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
32  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
33  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
34  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
35  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
37  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
38  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
39  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
40  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41  */
42 
43 #include <sys/types.h>
44 #include <sys/errno.h>
45 #include <sys/param.h>
46 #include <sys/stropts.h>
47 #include <sys/stream.h>
48 #include <sys/strsubr.h>
49 #include <sys/kmem.h>
50 #include <sys/conf.h>
51 #include <sys/devops.h>
52 #include <sys/ksynch.h>
53 #include <sys/stat.h>
54 #include <sys/modctl.h>
55 #include <sys/debug.h>
56 #include <sys/pci.h>
57 #include <sys/ethernet.h>
58 #include <sys/vlan.h>
59 
60 #include <sys/dlpi.h>
61 #include <sys/taskq.h>
62 #include <sys/cyclic.h>
63 
64 #include <sys/pattr.h>
65 #include <sys/strsun.h>
66 
67 #include <sys/random.h>
68 #include <sys/sysmacros.h>
69 #include <sys/stream.h>
70 
71 #include <sys/mac.h>
72 #include <sys/mac_provider.h>
73 #include <sys/mac_ether.h>
74 
75 #include "virtiovar.h"
76 #include "virtioreg.h"
77 
78 /* Configuration registers */
79 #define	VIRTIO_NET_CONFIG_MAC		0 /* 8bit x 6byte */
80 #define	VIRTIO_NET_CONFIG_STATUS	6 /* 16bit */
81 
82 /* Feature bits */
83 #define	VIRTIO_NET_F_CSUM	(1 << 0) /* Host handles pkts w/ partial csum */
84 #define	VIRTIO_NET_F_GUEST_CSUM	(1 << 1) /* Guest handles pkts w/ part csum */
85 #define	VIRTIO_NET_F_MAC	(1 << 5) /* Host has given MAC address. */
86 #define	VIRTIO_NET_F_GSO	(1 << 6) /* Host handles pkts w/ any GSO type */
87 #define	VIRTIO_NET_F_GUEST_TSO4	(1 << 7) /* Guest can handle TSOv4 in. */
88 #define	VIRTIO_NET_F_GUEST_TSO6	(1 << 8) /* Guest can handle TSOv6 in. */
89 #define	VIRTIO_NET_F_GUEST_ECN	(1 << 9) /* Guest can handle TSO[6] w/ ECN in */
90 #define	VIRTIO_NET_F_GUEST_UFO	(1 << 10) /* Guest can handle UFO in. */
91 #define	VIRTIO_NET_F_HOST_TSO4	(1 << 11) /* Host can handle TSOv4 in. */
92 #define	VIRTIO_NET_F_HOST_TSO6	(1 << 12) /* Host can handle TSOv6 in. */
93 #define	VIRTIO_NET_F_HOST_ECN	(1 << 13) /* Host can handle TSO[6] w/ ECN in */
94 #define	VIRTIO_NET_F_HOST_UFO	(1 << 14) /* Host can handle UFO in. */
95 #define	VIRTIO_NET_F_MRG_RXBUF	(1 << 15) /* Host can merge receive buffers. */
96 #define	VIRTIO_NET_F_STATUS	(1 << 16) /* Config.status available */
97 #define	VIRTIO_NET_F_CTRL_VQ	(1 << 17) /* Control channel available */
98 #define	VIRTIO_NET_F_CTRL_RX	(1 << 18) /* Control channel RX mode support */
99 #define	VIRTIO_NET_F_CTRL_VLAN	(1 << 19) /* Control channel VLAN filtering */
100 #define	VIRTIO_NET_F_CTRL_RX_EXTRA (1 << 20) /* Extra RX mode control support */
101 
102 #define	VIRTIO_NET_FEATURE_BITS \
103 	"\020" \
104 	"\1CSUM" \
105 	"\2GUEST_CSUM" \
106 	"\6MAC" \
107 	"\7GSO" \
108 	"\10GUEST_TSO4" \
109 	"\11GUEST_TSO6" \
110 	"\12GUEST_ECN" \
111 	"\13GUEST_UFO" \
112 	"\14HOST_TSO4" \
113 	"\15HOST_TSO6" \
114 	"\16HOST_ECN" \
115 	"\17HOST_UFO" \
116 	"\20MRG_RXBUF" \
117 	"\21STATUS" \
118 	"\22CTRL_VQ" \
119 	"\23CTRL_RX" \
120 	"\24CTRL_VLAN" \
121 	"\25CTRL_RX_EXTRA"
122 
123 /* Status */
124 #define	VIRTIO_NET_S_LINK_UP	1
125 
126 #pragma pack(1)
127 /* Packet header structure */
128 struct virtio_net_hdr {
129 	uint8_t		flags;
130 	uint8_t		gso_type;
131 	uint16_t	hdr_len;
132 	uint16_t	gso_size;
133 	uint16_t	csum_start;
134 	uint16_t	csum_offset;
135 };
136 #pragma pack()
137 
138 #define	VIRTIO_NET_HDR_F_NEEDS_CSUM	1 /* flags */
139 #define	VIRTIO_NET_HDR_GSO_NONE		0 /* gso_type */
140 #define	VIRTIO_NET_HDR_GSO_TCPV4	1 /* gso_type */
141 #define	VIRTIO_NET_HDR_GSO_UDP		3 /* gso_type */
142 #define	VIRTIO_NET_HDR_GSO_TCPV6	4 /* gso_type */
143 #define	VIRTIO_NET_HDR_GSO_ECN		0x80 /* gso_type, |'ed */
144 
145 
146 /* Control virtqueue */
147 #pragma pack(1)
148 struct virtio_net_ctrl_cmd {
149 	uint8_t	class;
150 	uint8_t	command;
151 };
152 #pragma pack()
153 
154 #define	VIRTIO_NET_CTRL_RX		0
155 #define	VIRTIO_NET_CTRL_RX_PROMISC	0
156 #define	VIRTIO_NET_CTRL_RX_ALLMULTI	1
157 
158 #define	VIRTIO_NET_CTRL_MAC		1
159 #define	VIRTIO_NET_CTRL_MAC_TABLE_SET	0
160 
161 #define	VIRTIO_NET_CTRL_VLAN		2
162 #define	VIRTIO_NET_CTRL_VLAN_ADD	0
163 #define	VIRTIO_NET_CTRL_VLAN_DEL	1
164 
165 #pragma pack(1)
166 struct virtio_net_ctrl_status {
167 	uint8_t	ack;
168 };
169 
170 struct virtio_net_ctrl_rx {
171 	uint8_t	onoff;
172 };
173 
174 struct virtio_net_ctrl_mac_tbl {
175 	uint32_t nentries;
176 	uint8_t macs[][ETHERADDRL];
177 };
178 
179 struct virtio_net_ctrl_vlan {
180 	uint16_t id;
181 };
182 #pragma pack()
183 
184 static int vioif_quiesce(dev_info_t *);
185 static int vioif_attach(dev_info_t *, ddi_attach_cmd_t);
186 static int vioif_detach(dev_info_t *, ddi_detach_cmd_t);
187 
188 DDI_DEFINE_STREAM_OPS(vioif_ops,
189 	nulldev,		/* identify */
190 	nulldev,		/* probe */
191 	vioif_attach,		/* attach */
192 	vioif_detach,		/* detach */
193 	nodev,			/* reset */
194 	NULL,			/* cb_ops */
195 	D_MP,			/* bus_ops */
196 	NULL,			/* power */
197 	vioif_quiesce		/* quiesce */
198 );
199 
200 static char vioif_ident[] = "VirtIO ethernet driver";
201 
202 /* Standard Module linkage initialization for a Streams driver */
203 extern struct mod_ops mod_driverops;
204 
205 static struct modldrv modldrv = {
206 	&mod_driverops,		/* Type of module.  This one is a driver */
207 	vioif_ident,		/* short description */
208 	&vioif_ops		/* driver specific ops */
209 };
210 
211 static struct modlinkage modlinkage = {
212 	MODREV_1,
213 	{
214 		(void *)&modldrv,
215 		NULL,
216 	},
217 };
218 
219 ddi_device_acc_attr_t vioif_attr = {
220 	DDI_DEVICE_ATTR_V0,
221 	DDI_NEVERSWAP_ACC,	/* virtio is always native byte order */
222 	DDI_STORECACHING_OK_ACC,
223 	DDI_DEFAULT_ACC
224 };
225 
226 /*
227  * A mapping represents a binding for a single buffer that is contiguous in the
228  * virtual address space.
229  */
230 struct vioif_buf_mapping {
231 	caddr_t			vbm_buf;
232 	ddi_dma_handle_t	vbm_dmah;
233 	ddi_acc_handle_t	vbm_acch;
234 	ddi_dma_cookie_t	vbm_dmac;
235 	unsigned int		vbm_ncookies;
236 };
237 
238 /*
239  * Rx buffers can be loaned upstream, so the code has
240  * to allocate them dynamically.
241  */
242 struct vioif_rx_buf {
243 	struct vioif_softc	*rb_sc;
244 	frtn_t			rb_frtn;
245 
246 	struct vioif_buf_mapping rb_mapping;
247 };
248 
249 /*
250  * Tx buffers have two mapping types. One, "inline", is pre-allocated and is
251  * used to hold the virtio_net_header. Small packets also get copied there, as
252  * it's faster then mapping them. Bigger packets get mapped using the "external"
253  * mapping array. An array is used, because a packet may consist of muptiple
254  * fragments, so each fragment gets bound to an entry. According to my
255  * observations, the number of fragments does not exceed 2, but just in case,
256  * a bigger, up to VIOIF_INDIRECT_MAX - 1 array is allocated. To save resources,
257  * the dma handles are allocated lazily in the tx path.
258  */
259 struct vioif_tx_buf {
260 	mblk_t			*tb_mp;
261 
262 	/* inline buffer */
263 	struct vioif_buf_mapping tb_inline_mapping;
264 
265 	/* External buffers */
266 	struct vioif_buf_mapping *tb_external_mapping;
267 	unsigned int		tb_external_num;
268 };
269 
270 struct vioif_softc {
271 	dev_info_t		*sc_dev; /* mirrors virtio_softc->sc_dev */
272 	struct virtio_softc	sc_virtio;
273 
274 	mac_handle_t sc_mac_handle;
275 	mac_register_t *sc_macp;
276 
277 	struct virtqueue	*sc_rx_vq;
278 	struct virtqueue	*sc_tx_vq;
279 	struct virtqueue	*sc_ctrl_vq;
280 
281 	unsigned int		sc_tx_stopped:1;
282 
283 	/* Feature bits. */
284 	unsigned int		sc_rx_csum:1;
285 	unsigned int		sc_tx_csum:1;
286 	unsigned int		sc_tx_tso4:1;
287 
288 	int			sc_mtu;
289 	uint8_t			sc_mac[ETHERADDRL];
290 	/*
291 	 * For rx buffers, we keep a pointer array, because the buffers
292 	 * can be loaned upstream, and we have to repopulate the array with
293 	 * new members.
294 	 */
295 	struct vioif_rx_buf	**sc_rxbufs;
296 
297 	/*
298 	 * For tx, we just allocate an array of buffers. The packet can
299 	 * either be copied into the inline buffer, or the external mapping
300 	 * could be used to map the packet
301 	 */
302 	struct vioif_tx_buf	*sc_txbufs;
303 
304 	kstat_t			*sc_intrstat;
305 	/*
306 	 * We "loan" rx buffers upstream and reuse them after they are
307 	 * freed. This lets us avoid allocations in the hot path.
308 	 */
309 	kmem_cache_t		*sc_rxbuf_cache;
310 	ulong_t			sc_rxloan;
311 
312 	/* Copying small packets turns out to be faster then mapping them. */
313 	unsigned long		sc_rxcopy_thresh;
314 	unsigned long		sc_txcopy_thresh;
315 	/* Some statistic coming here */
316 	uint64_t		sc_ipackets;
317 	uint64_t		sc_opackets;
318 	uint64_t		sc_rbytes;
319 	uint64_t		sc_obytes;
320 	uint64_t		sc_brdcstxmt;
321 	uint64_t		sc_brdcstrcv;
322 	uint64_t		sc_multixmt;
323 	uint64_t		sc_multircv;
324 	uint64_t		sc_norecvbuf;
325 	uint64_t		sc_notxbuf;
326 	uint64_t		sc_ierrors;
327 	uint64_t		sc_oerrors;
328 };
329 
330 #define	ETHER_HEADER_LEN		sizeof (struct ether_header)
331 
332 /* MTU + the ethernet header. */
333 #define	MAX_PAYLOAD	65535
334 #define	MAX_MTU		(MAX_PAYLOAD - ETHER_HEADER_LEN)
335 #define	DEFAULT_MTU	ETHERMTU
336 
337 /*
338  * Yeah, we spend 8M per device. Turns out, there is no point
339  * being smart and using merged rx buffers (VIRTIO_NET_F_MRG_RXBUF),
340  * because vhost does not support them, and we expect to be used with
341  * vhost in production environment.
342  */
343 /* The buffer keeps both the packet data and the virtio_net_header. */
344 #define	VIOIF_RX_SIZE (MAX_PAYLOAD + sizeof (struct virtio_net_hdr))
345 
346 /*
347  * We win a bit on header alignment, but the host wins a lot
348  * more on moving aligned buffers. Might need more thought.
349  */
350 #define	VIOIF_IP_ALIGN 0
351 
352 /* Maximum number of indirect descriptors, somewhat arbitrary. */
353 #define	VIOIF_INDIRECT_MAX 128
354 
355 /*
356  * We pre-allocate a reasonably large buffer to copy small packets
357  * there. Bigger packets are mapped, packets with multiple
358  * cookies are mapped as indirect buffers.
359  */
360 #define	VIOIF_TX_INLINE_SIZE 2048
361 
362 /* Native queue size for all queues */
363 #define	VIOIF_RX_QLEN 0
364 #define	VIOIF_TX_QLEN 0
365 #define	VIOIF_CTRL_QLEN 0
366 
367 static uchar_t vioif_broadcast[ETHERADDRL] = {
368 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
369 };
370 
371 #define	VIOIF_TX_THRESH_MAX	640
372 #define	VIOIF_RX_THRESH_MAX	640
373 
374 #define	CACHE_NAME_SIZE	32
375 
376 static char vioif_txcopy_thresh[] =
377 	"vioif_txcopy_thresh";
378 static char vioif_rxcopy_thresh[] =
379 	"vioif_rxcopy_thresh";
380 
381 static char *vioif_priv_props[] = {
382 	vioif_txcopy_thresh,
383 	vioif_rxcopy_thresh,
384 	NULL
385 };
386 
387 /* Add up to ddi? */
388 static ddi_dma_cookie_t *
389 vioif_dma_curr_cookie(ddi_dma_handle_t dmah)
390 {
391 	ddi_dma_impl_t *dmah_impl = (void *) dmah;
392 	ASSERT(dmah_impl->dmai_cookie);
393 	return (dmah_impl->dmai_cookie);
394 }
395 
396 static void
397 vioif_dma_reset_cookie(ddi_dma_handle_t dmah, ddi_dma_cookie_t *dmac)
398 {
399 	ddi_dma_impl_t *dmah_impl = (void *) dmah;
400 	dmah_impl->dmai_cookie = dmac;
401 }
402 
403 static link_state_t
404 vioif_link_state(struct vioif_softc *sc)
405 {
406 	if (sc->sc_virtio.sc_features & VIRTIO_NET_F_STATUS) {
407 		if (virtio_read_device_config_2(&sc->sc_virtio,
408 		    VIRTIO_NET_CONFIG_STATUS) & VIRTIO_NET_S_LINK_UP) {
409 			return (LINK_STATE_UP);
410 		} else {
411 			return (LINK_STATE_DOWN);
412 		}
413 	}
414 
415 	return (LINK_STATE_UP);
416 }
417 
418 static ddi_dma_attr_t vioif_inline_buf_dma_attr = {
419 	DMA_ATTR_V0,		/* Version number */
420 	0,			/* low address */
421 	0xFFFFFFFFFFFFFFFF,	/* high address */
422 	0xFFFFFFFF,		/* counter register max */
423 	1,			/* page alignment */
424 	1,			/* burst sizes: 1 - 32 */
425 	1,			/* minimum transfer size */
426 	0xFFFFFFFF,		/* max transfer size */
427 	0xFFFFFFFFFFFFFFF,	/* address register max */
428 	1,			/* scatter-gather capacity */
429 	1,			/* device operates on bytes */
430 	0,			/* attr flag: set to 0 */
431 };
432 
433 static ddi_dma_attr_t vioif_mapped_buf_dma_attr = {
434 	DMA_ATTR_V0,		/* Version number */
435 	0,			/* low address */
436 	0xFFFFFFFFFFFFFFFF,	/* high address */
437 	0xFFFFFFFF,		/* counter register max */
438 	1,			/* page alignment */
439 	1,			/* burst sizes: 1 - 32 */
440 	1,			/* minimum transfer size */
441 	0xFFFFFFFF,		/* max transfer size */
442 	0xFFFFFFFFFFFFFFF,	/* address register max */
443 
444 	/* One entry is used for the virtio_net_hdr on the tx path */
445 	VIOIF_INDIRECT_MAX - 1,	/* scatter-gather capacity */
446 	1,			/* device operates on bytes */
447 	0,			/* attr flag: set to 0 */
448 };
449 
450 static ddi_device_acc_attr_t vioif_bufattr = {
451 	DDI_DEVICE_ATTR_V0,
452 	DDI_NEVERSWAP_ACC,
453 	DDI_STORECACHING_OK_ACC,
454 	DDI_DEFAULT_ACC
455 };
456 
457 static void
458 vioif_rx_free(caddr_t free_arg)
459 {
460 	struct vioif_rx_buf *buf = (void *) free_arg;
461 	struct vioif_softc *sc = buf->rb_sc;
462 
463 	kmem_cache_free(sc->sc_rxbuf_cache, buf);
464 	atomic_dec_ulong(&sc->sc_rxloan);
465 }
466 
467 static int
468 vioif_rx_construct(void *buffer, void *user_arg, int kmflags)
469 {
470 	_NOTE(ARGUNUSED(kmflags));
471 	struct vioif_softc *sc = user_arg;
472 	struct vioif_rx_buf *buf = buffer;
473 	size_t len;
474 
475 	if (ddi_dma_alloc_handle(sc->sc_dev, &vioif_mapped_buf_dma_attr,
476 	    DDI_DMA_SLEEP, NULL, &buf->rb_mapping.vbm_dmah)) {
477 		dev_err(sc->sc_dev, CE_WARN,
478 		    "Can't allocate dma handle for rx buffer");
479 		goto exit_handle;
480 	}
481 
482 	if (ddi_dma_mem_alloc(buf->rb_mapping.vbm_dmah,
483 	    VIOIF_RX_SIZE + sizeof (struct virtio_net_hdr),
484 	    &vioif_bufattr, DDI_DMA_STREAMING, DDI_DMA_SLEEP,
485 	    NULL, &buf->rb_mapping.vbm_buf, &len, &buf->rb_mapping.vbm_acch)) {
486 		dev_err(sc->sc_dev, CE_WARN,
487 		    "Can't allocate rx buffer");
488 		goto exit_alloc;
489 	}
490 	ASSERT(len >= VIOIF_RX_SIZE);
491 
492 	if (ddi_dma_addr_bind_handle(buf->rb_mapping.vbm_dmah, NULL,
493 	    buf->rb_mapping.vbm_buf, len, DDI_DMA_READ | DDI_DMA_STREAMING,
494 	    DDI_DMA_SLEEP, NULL, &buf->rb_mapping.vbm_dmac,
495 	    &buf->rb_mapping.vbm_ncookies)) {
496 		dev_err(sc->sc_dev, CE_WARN, "Can't bind tx buffer");
497 
498 		goto exit_bind;
499 	}
500 
501 	ASSERT(buf->rb_mapping.vbm_ncookies <= VIOIF_INDIRECT_MAX);
502 
503 	buf->rb_sc = sc;
504 	buf->rb_frtn.free_arg = (void *) buf;
505 	buf->rb_frtn.free_func = vioif_rx_free;
506 
507 	return (0);
508 exit_bind:
509 	ddi_dma_mem_free(&buf->rb_mapping.vbm_acch);
510 exit_alloc:
511 	ddi_dma_free_handle(&buf->rb_mapping.vbm_dmah);
512 exit_handle:
513 
514 	return (ENOMEM);
515 }
516 
517 static void
518 vioif_rx_destruct(void *buffer, void *user_arg)
519 {
520 	_NOTE(ARGUNUSED(user_arg));
521 	struct vioif_rx_buf *buf = buffer;
522 
523 	ASSERT(buf->rb_mapping.vbm_acch);
524 	ASSERT(buf->rb_mapping.vbm_acch);
525 
526 	(void) ddi_dma_unbind_handle(buf->rb_mapping.vbm_dmah);
527 	ddi_dma_mem_free(&buf->rb_mapping.vbm_acch);
528 	ddi_dma_free_handle(&buf->rb_mapping.vbm_dmah);
529 }
530 
531 static void
532 vioif_free_mems(struct vioif_softc *sc)
533 {
534 	int i;
535 
536 	for (i = 0; i < sc->sc_tx_vq->vq_num; i++) {
537 		struct vioif_tx_buf *buf = &sc->sc_txbufs[i];
538 		int j;
539 
540 		/* Tear down the internal mapping. */
541 
542 		ASSERT(buf->tb_inline_mapping.vbm_acch);
543 		ASSERT(buf->tb_inline_mapping.vbm_dmah);
544 
545 		(void) ddi_dma_unbind_handle(buf->tb_inline_mapping.vbm_dmah);
546 		ddi_dma_mem_free(&buf->tb_inline_mapping.vbm_acch);
547 		ddi_dma_free_handle(&buf->tb_inline_mapping.vbm_dmah);
548 
549 		/* We should not see any in-flight buffers at this point. */
550 		ASSERT(!buf->tb_mp);
551 
552 		/* Free all the dma hdnales we allocated lazily. */
553 		for (j = 0; buf->tb_external_mapping[j].vbm_dmah; j++)
554 			ddi_dma_free_handle(
555 			    &buf->tb_external_mapping[j].vbm_dmah);
556 		/* Free the external mapping array. */
557 		kmem_free(buf->tb_external_mapping,
558 		    sizeof (struct vioif_tx_buf) * VIOIF_INDIRECT_MAX - 1);
559 	}
560 
561 	kmem_free(sc->sc_txbufs, sizeof (struct vioif_tx_buf) *
562 	    sc->sc_tx_vq->vq_num);
563 
564 	for (i = 0; i < sc->sc_rx_vq->vq_num; i++) {
565 		struct vioif_rx_buf *buf = sc->sc_rxbufs[i];
566 
567 		if (buf)
568 			kmem_cache_free(sc->sc_rxbuf_cache, buf);
569 	}
570 	kmem_free(sc->sc_rxbufs, sizeof (struct vioif_rx_buf *) *
571 	    sc->sc_rx_vq->vq_num);
572 }
573 
574 static int
575 vioif_alloc_mems(struct vioif_softc *sc)
576 {
577 	int i, txqsize, rxqsize;
578 	size_t len;
579 	unsigned int nsegments;
580 
581 	txqsize = sc->sc_tx_vq->vq_num;
582 	rxqsize = sc->sc_rx_vq->vq_num;
583 
584 	sc->sc_txbufs = kmem_zalloc(sizeof (struct vioif_tx_buf) * txqsize,
585 	    KM_SLEEP);
586 	if (sc->sc_txbufs == NULL) {
587 		dev_err(sc->sc_dev, CE_WARN,
588 		    "Failed to allocate the tx buffers array");
589 		goto exit_txalloc;
590 	}
591 
592 	/*
593 	 * We don't allocate the rx vioif_bufs, just the pointers, as
594 	 * rx vioif_bufs can be loaned upstream, and we don't know the
595 	 * total number we need.
596 	 */
597 	sc->sc_rxbufs = kmem_zalloc(sizeof (struct vioif_rx_buf *) * rxqsize,
598 	    KM_SLEEP);
599 	if (sc->sc_rxbufs == NULL) {
600 		dev_err(sc->sc_dev, CE_WARN,
601 		    "Failed to allocate the rx buffers pointer array");
602 		goto exit_rxalloc;
603 	}
604 
605 	for (i = 0; i < txqsize; i++) {
606 		struct vioif_tx_buf *buf = &sc->sc_txbufs[i];
607 
608 		/* Allocate and bind an inline mapping. */
609 
610 		if (ddi_dma_alloc_handle(sc->sc_dev,
611 		    &vioif_inline_buf_dma_attr,
612 		    DDI_DMA_SLEEP, NULL, &buf->tb_inline_mapping.vbm_dmah)) {
613 
614 			dev_err(sc->sc_dev, CE_WARN,
615 			    "Can't allocate dma handle for tx buffer %d", i);
616 			goto exit_tx;
617 		}
618 
619 		if (ddi_dma_mem_alloc(buf->tb_inline_mapping.vbm_dmah,
620 		    VIOIF_TX_INLINE_SIZE, &vioif_bufattr, DDI_DMA_STREAMING,
621 		    DDI_DMA_SLEEP, NULL, &buf->tb_inline_mapping.vbm_buf,
622 		    &len, &buf->tb_inline_mapping.vbm_acch)) {
623 
624 			dev_err(sc->sc_dev, CE_WARN,
625 			    "Can't allocate tx buffer %d", i);
626 			goto exit_tx;
627 		}
628 		ASSERT(len >= VIOIF_TX_INLINE_SIZE);
629 
630 		if (ddi_dma_addr_bind_handle(buf->tb_inline_mapping.vbm_dmah,
631 		    NULL, buf->tb_inline_mapping.vbm_buf, len,
632 		    DDI_DMA_WRITE | DDI_DMA_STREAMING, DDI_DMA_SLEEP, NULL,
633 		    &buf->tb_inline_mapping.vbm_dmac, &nsegments)) {
634 
635 			dev_err(sc->sc_dev, CE_WARN,
636 			    "Can't bind tx buffer %d", i);
637 			goto exit_tx;
638 		}
639 
640 		/* We asked for a single segment */
641 		ASSERT(nsegments == 1);
642 
643 		/*
644 		 * We allow up to VIOIF_INDIRECT_MAX - 1 external mappings.
645 		 * In reality, I don't expect more then 2-3 used, but who
646 		 * knows.
647 		 */
648 		buf->tb_external_mapping = kmem_zalloc(
649 		    sizeof (struct vioif_tx_buf) * VIOIF_INDIRECT_MAX - 1,
650 		    KM_SLEEP);
651 
652 		/*
653 		 * The external mapping's dma handles are allocate lazily,
654 		 * as we don't expect most of them to be used..
655 		 */
656 	}
657 
658 	return (0);
659 
660 exit_tx:
661 	for (i = 0; i < txqsize; i++) {
662 		struct vioif_tx_buf *buf = &sc->sc_txbufs[i];
663 
664 		if (buf->tb_inline_mapping.vbm_dmah)
665 			(void) ddi_dma_unbind_handle(
666 			    buf->tb_inline_mapping.vbm_dmah);
667 
668 		if (buf->tb_inline_mapping.vbm_acch)
669 			ddi_dma_mem_free(
670 			    &buf->tb_inline_mapping.vbm_acch);
671 
672 		if (buf->tb_inline_mapping.vbm_dmah)
673 			ddi_dma_free_handle(
674 			    &buf->tb_inline_mapping.vbm_dmah);
675 
676 		if (buf->tb_external_mapping)
677 			kmem_free(buf->tb_external_mapping,
678 			    sizeof (struct vioif_tx_buf) *
679 			    VIOIF_INDIRECT_MAX - 1);
680 	}
681 
682 	kmem_free(sc->sc_rxbufs, sizeof (struct vioif_rx_buf) * rxqsize);
683 
684 exit_rxalloc:
685 	kmem_free(sc->sc_txbufs, sizeof (struct vioif_tx_buf) * txqsize);
686 exit_txalloc:
687 	return (ENOMEM);
688 }
689 
690 /* ARGSUSED */
691 int
692 vioif_multicst(void *arg, boolean_t add, const uint8_t *macaddr)
693 {
694 	return (DDI_SUCCESS);
695 }
696 
697 /* ARGSUSED */
698 int
699 vioif_promisc(void *arg, boolean_t on)
700 {
701 	return (DDI_SUCCESS);
702 }
703 
704 /* ARGSUSED */
705 int
706 vioif_unicst(void *arg, const uint8_t *macaddr)
707 {
708 	return (DDI_FAILURE);
709 }
710 
711 
712 static int
713 vioif_add_rx(struct vioif_softc *sc, int kmflag)
714 {
715 	struct vq_entry *ve;
716 	struct vioif_rx_buf *buf;
717 
718 	ve = vq_alloc_entry(sc->sc_rx_vq);
719 	if (!ve) {
720 		/*
721 		 * Out of free descriptors - ring already full.
722 		 * It would be better to update sc_norxdescavail
723 		 * but MAC does not ask for this info, hence we
724 		 * update sc_norecvbuf.
725 		 */
726 		sc->sc_norecvbuf++;
727 		goto exit_vq;
728 	}
729 	buf = sc->sc_rxbufs[ve->qe_index];
730 
731 	if (!buf) {
732 		/* First run, allocate the buffer. */
733 		buf = kmem_cache_alloc(sc->sc_rxbuf_cache, kmflag);
734 		sc->sc_rxbufs[ve->qe_index] = buf;
735 	}
736 
737 	/* Still nothing? Bye. */
738 	if (!buf) {
739 		dev_err(sc->sc_dev, CE_WARN, "Can't allocate rx buffer");
740 		sc->sc_norecvbuf++;
741 		goto exit_buf;
742 	}
743 
744 	ASSERT(buf->rb_mapping.vbm_ncookies >= 1);
745 
746 	/*
747 	 * For an unknown reason, the virtio_net_hdr must be placed
748 	 * as a separate virtio queue entry.
749 	 */
750 	virtio_ve_add_indirect_buf(ve, buf->rb_mapping.vbm_dmac.dmac_laddress,
751 	    sizeof (struct virtio_net_hdr), B_FALSE);
752 
753 	/* Add the rest of the first cookie. */
754 	virtio_ve_add_indirect_buf(ve,
755 	    buf->rb_mapping.vbm_dmac.dmac_laddress +
756 	    sizeof (struct virtio_net_hdr),
757 	    buf->rb_mapping.vbm_dmac.dmac_size -
758 	    sizeof (struct virtio_net_hdr), B_FALSE);
759 
760 	/*
761 	 * If the buffer consists of a single cookie (unlikely for a
762 	 * 64-k buffer), we are done. Otherwise, add the rest of the cookies
763 	 * using indirect entries.
764 	 */
765 	if (buf->rb_mapping.vbm_ncookies > 1) {
766 		ddi_dma_cookie_t *first_extra_dmac;
767 		ddi_dma_cookie_t dmac;
768 		first_extra_dmac =
769 		    vioif_dma_curr_cookie(buf->rb_mapping.vbm_dmah);
770 
771 		ddi_dma_nextcookie(buf->rb_mapping.vbm_dmah, &dmac);
772 		virtio_ve_add_cookie(ve, buf->rb_mapping.vbm_dmah,
773 		    dmac, buf->rb_mapping.vbm_ncookies - 1, B_FALSE);
774 		vioif_dma_reset_cookie(buf->rb_mapping.vbm_dmah,
775 		    first_extra_dmac);
776 	}
777 
778 	virtio_push_chain(ve, B_FALSE);
779 
780 	return (DDI_SUCCESS);
781 
782 exit_buf:
783 	vq_free_entry(sc->sc_rx_vq, ve);
784 exit_vq:
785 	return (DDI_FAILURE);
786 }
787 
788 static int
789 vioif_populate_rx(struct vioif_softc *sc, int kmflag)
790 {
791 	int i = 0;
792 	int ret;
793 
794 	for (;;) {
795 		ret = vioif_add_rx(sc, kmflag);
796 		if (ret)
797 			/*
798 			 * We could not allocate some memory. Try to work with
799 			 * what we've got.
800 			 */
801 			break;
802 		i++;
803 	}
804 
805 	if (i)
806 		virtio_sync_vq(sc->sc_rx_vq);
807 
808 	return (i);
809 }
810 
811 static int
812 vioif_process_rx(struct vioif_softc *sc)
813 {
814 	struct vq_entry *ve;
815 	struct vioif_rx_buf *buf;
816 	mblk_t *mp;
817 	uint32_t len;
818 	int i = 0;
819 
820 	while ((ve = virtio_pull_chain(sc->sc_rx_vq, &len))) {
821 
822 		buf = sc->sc_rxbufs[ve->qe_index];
823 		ASSERT(buf);
824 
825 		if (len < sizeof (struct virtio_net_hdr)) {
826 			dev_err(sc->sc_dev, CE_WARN, "RX: Cnain too small: %u",
827 			    len - (uint32_t)sizeof (struct virtio_net_hdr));
828 			sc->sc_ierrors++;
829 			virtio_free_chain(ve);
830 			continue;
831 		}
832 
833 		len -= sizeof (struct virtio_net_hdr);
834 		/*
835 		 * We copy small packets that happenned to fit into a single
836 		 * cookie and reuse the buffers. For bigger ones, we loan
837 		 * the buffers upstream.
838 		 */
839 		if (len < sc->sc_rxcopy_thresh) {
840 			mp = allocb(len, 0);
841 			if (!mp) {
842 				sc->sc_norecvbuf++;
843 				sc->sc_ierrors++;
844 
845 				virtio_free_chain(ve);
846 				break;
847 			}
848 
849 			bcopy((char *)buf->rb_mapping.vbm_buf +
850 			    sizeof (struct virtio_net_hdr), mp->b_rptr, len);
851 			mp->b_wptr = mp->b_rptr + len;
852 
853 		} else {
854 			mp = desballoc((unsigned char *)
855 			    buf->rb_mapping.vbm_buf +
856 			    sizeof (struct virtio_net_hdr) +
857 			    VIOIF_IP_ALIGN, len, 0, &buf->rb_frtn);
858 			if (!mp) {
859 				sc->sc_norecvbuf++;
860 				sc->sc_ierrors++;
861 
862 				virtio_free_chain(ve);
863 				break;
864 			}
865 			mp->b_wptr = mp->b_rptr + len;
866 
867 			atomic_inc_ulong(&sc->sc_rxloan);
868 			/*
869 			 * Buffer loaned, we will have to allocate a new one
870 			 * for this slot.
871 			 */
872 			sc->sc_rxbufs[ve->qe_index] = NULL;
873 		}
874 
875 		/*
876 		 * virtio-net does not tell us if this packet is multicast
877 		 * or broadcast, so we have to check it.
878 		 */
879 		if (mp->b_rptr[0] & 0x1) {
880 			if (bcmp(mp->b_rptr, vioif_broadcast, ETHERADDRL) != 0)
881 				sc->sc_multircv++;
882 			else
883 				sc->sc_brdcstrcv++;
884 		}
885 
886 		sc->sc_rbytes += len;
887 		sc->sc_ipackets++;
888 
889 		virtio_free_chain(ve);
890 		mac_rx(sc->sc_mac_handle, NULL, mp);
891 		i++;
892 	}
893 
894 	return (i);
895 }
896 
897 static void
898 vioif_reclaim_used_tx(struct vioif_softc *sc)
899 {
900 	struct vq_entry *ve;
901 	struct vioif_tx_buf *buf;
902 	uint32_t len;
903 	mblk_t *mp;
904 	int i = 0;
905 
906 	while ((ve = virtio_pull_chain(sc->sc_tx_vq, &len))) {
907 		/* We don't chain descriptors for tx, so don't expect any. */
908 		ASSERT(!ve->qe_next);
909 
910 		buf = &sc->sc_txbufs[ve->qe_index];
911 		mp = buf->tb_mp;
912 		buf->tb_mp = NULL;
913 
914 		if (mp) {
915 			for (i = 0; i < buf->tb_external_num; i++)
916 				(void) ddi_dma_unbind_handle(
917 				    buf->tb_external_mapping[i].vbm_dmah);
918 		}
919 
920 		virtio_free_chain(ve);
921 
922 		/* External mapping used, mp was not freed in vioif_send() */
923 		if (mp)
924 			freemsg(mp);
925 		i++;
926 	}
927 
928 	if (sc->sc_tx_stopped && i) {
929 		sc->sc_tx_stopped = 0;
930 		mac_tx_update(sc->sc_mac_handle);
931 	}
932 }
933 
934 /* sc will be used to update stat counters. */
935 /* ARGSUSED */
936 static inline void
937 vioif_tx_inline(struct vioif_softc *sc, struct vq_entry *ve, mblk_t *mp,
938     size_t msg_size)
939 {
940 	struct vioif_tx_buf *buf;
941 	buf = &sc->sc_txbufs[ve->qe_index];
942 
943 	ASSERT(buf);
944 
945 	/* Frees mp */
946 	mcopymsg(mp, buf->tb_inline_mapping.vbm_buf +
947 	    sizeof (struct virtio_net_hdr));
948 
949 	virtio_ve_add_indirect_buf(ve,
950 	    buf->tb_inline_mapping.vbm_dmac.dmac_laddress +
951 	    sizeof (struct virtio_net_hdr), msg_size, B_TRUE);
952 }
953 
954 static inline int
955 vioif_tx_lazy_handle_alloc(struct vioif_softc *sc, struct vioif_tx_buf *buf,
956     int i)
957 {
958 	int ret = DDI_SUCCESS;
959 
960 	if (!buf->tb_external_mapping[i].vbm_dmah) {
961 		ret = ddi_dma_alloc_handle(sc->sc_dev,
962 		    &vioif_mapped_buf_dma_attr, DDI_DMA_SLEEP, NULL,
963 		    &buf->tb_external_mapping[i].vbm_dmah);
964 		if (ret != DDI_SUCCESS) {
965 			dev_err(sc->sc_dev, CE_WARN,
966 			    "Can't allocate dma handle for external tx buffer");
967 		}
968 	}
969 
970 	return (ret);
971 }
972 
973 static inline int
974 vioif_tx_external(struct vioif_softc *sc, struct vq_entry *ve, mblk_t *mp,
975     size_t msg_size)
976 {
977 	_NOTE(ARGUNUSED(msg_size));
978 
979 	struct vioif_tx_buf *buf;
980 	mblk_t *nmp;
981 	int i, j;
982 	int ret = DDI_SUCCESS;
983 
984 	buf = &sc->sc_txbufs[ve->qe_index];
985 
986 	ASSERT(buf);
987 
988 	buf->tb_external_num = 0;
989 	i = 0;
990 	nmp = mp;
991 
992 	while (nmp) {
993 		size_t len;
994 		ddi_dma_cookie_t dmac;
995 		unsigned int ncookies;
996 
997 		len = MBLKL(nmp);
998 		/*
999 		 * For some reason, the network stack can
1000 		 * actually send us zero-length fragments.
1001 		 */
1002 		if (len == 0) {
1003 			nmp = nmp->b_cont;
1004 			continue;
1005 		}
1006 
1007 		ret = vioif_tx_lazy_handle_alloc(sc, buf, i);
1008 		if (ret != DDI_SUCCESS) {
1009 			sc->sc_notxbuf++;
1010 			sc->sc_oerrors++;
1011 			goto exit_lazy_alloc;
1012 		}
1013 		ret = ddi_dma_addr_bind_handle(
1014 		    buf->tb_external_mapping[i].vbm_dmah, NULL,
1015 		    (caddr_t)nmp->b_rptr, len,
1016 		    DDI_DMA_WRITE | DDI_DMA_STREAMING,
1017 		    DDI_DMA_SLEEP, NULL, &dmac, &ncookies);
1018 
1019 		if (ret != DDI_SUCCESS) {
1020 			sc->sc_oerrors++;
1021 			dev_err(sc->sc_dev, CE_NOTE,
1022 			    "TX: Failed to bind external handle");
1023 			goto exit_bind;
1024 		}
1025 
1026 		/* Check if we still fit into the indirect table. */
1027 		if (virtio_ve_indirect_available(ve) < ncookies) {
1028 			dev_err(sc->sc_dev, CE_NOTE,
1029 			    "TX: Indirect descriptor table limit reached."
1030 			    " It took %d fragments.", i);
1031 			sc->sc_notxbuf++;
1032 			sc->sc_oerrors++;
1033 
1034 			ret = DDI_FAILURE;
1035 			goto exit_limit;
1036 		}
1037 
1038 		virtio_ve_add_cookie(ve, buf->tb_external_mapping[i].vbm_dmah,
1039 		    dmac, ncookies, B_TRUE);
1040 
1041 		nmp = nmp->b_cont;
1042 		i++;
1043 	}
1044 
1045 	buf->tb_external_num = i;
1046 	/* Save the mp to free it when the packet is sent. */
1047 	buf->tb_mp = mp;
1048 
1049 	return (DDI_SUCCESS);
1050 
1051 exit_limit:
1052 exit_bind:
1053 exit_lazy_alloc:
1054 
1055 	for (j = 0; j < i; j++) {
1056 		(void) ddi_dma_unbind_handle(
1057 		    buf->tb_external_mapping[j].vbm_dmah);
1058 	}
1059 
1060 	return (ret);
1061 }
1062 
1063 static boolean_t
1064 vioif_send(struct vioif_softc *sc, mblk_t *mp)
1065 {
1066 	struct vq_entry *ve;
1067 	struct vioif_tx_buf *buf;
1068 	struct virtio_net_hdr *net_header = NULL;
1069 	size_t msg_size = 0;
1070 	uint32_t csum_start;
1071 	uint32_t csum_stuff;
1072 	uint32_t csum_flags;
1073 	uint32_t lso_flags;
1074 	uint32_t lso_mss;
1075 	mblk_t *nmp;
1076 	int ret;
1077 	boolean_t lso_required = B_FALSE;
1078 
1079 	for (nmp = mp; nmp; nmp = nmp->b_cont)
1080 		msg_size += MBLKL(nmp);
1081 
1082 	if (sc->sc_tx_tso4) {
1083 		mac_lso_get(mp, &lso_mss, &lso_flags);
1084 		lso_required = (lso_flags & HW_LSO);
1085 	}
1086 
1087 	ve = vq_alloc_entry(sc->sc_tx_vq);
1088 
1089 	if (!ve) {
1090 		sc->sc_notxbuf++;
1091 		/* Out of free descriptors - try later. */
1092 		return (B_FALSE);
1093 	}
1094 	buf = &sc->sc_txbufs[ve->qe_index];
1095 
1096 	/* Use the inline buffer of the first entry for the virtio_net_hdr. */
1097 	(void) memset(buf->tb_inline_mapping.vbm_buf, 0,
1098 	    sizeof (struct virtio_net_hdr));
1099 
1100 	net_header = (struct virtio_net_hdr *)buf->tb_inline_mapping.vbm_buf;
1101 
1102 	mac_hcksum_get(mp, &csum_start, &csum_stuff, NULL,
1103 	    NULL, &csum_flags);
1104 
1105 	/* They want us to do the TCP/UDP csum calculation. */
1106 	if (csum_flags & HCK_PARTIALCKSUM) {
1107 		struct ether_header *eth_header;
1108 		int eth_hsize;
1109 
1110 		/* Did we ask for it? */
1111 		ASSERT(sc->sc_tx_csum);
1112 
1113 		/* We only asked for partial csum packets. */
1114 		ASSERT(!(csum_flags & HCK_IPV4_HDRCKSUM));
1115 		ASSERT(!(csum_flags & HCK_FULLCKSUM));
1116 
1117 		eth_header = (void *) mp->b_rptr;
1118 		if (eth_header->ether_type == htons(ETHERTYPE_VLAN)) {
1119 			eth_hsize = sizeof (struct ether_vlan_header);
1120 		} else {
1121 			eth_hsize = sizeof (struct ether_header);
1122 		}
1123 		net_header->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
1124 		net_header->csum_start = eth_hsize + csum_start;
1125 		net_header->csum_offset = csum_stuff - csum_start;
1126 	}
1127 
1128 	/* setup LSO fields if required */
1129 	if (lso_required) {
1130 		net_header->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1131 		net_header->gso_size = (uint16_t)lso_mss;
1132 	}
1133 
1134 	virtio_ve_add_indirect_buf(ve,
1135 	    buf->tb_inline_mapping.vbm_dmac.dmac_laddress,
1136 	    sizeof (struct virtio_net_hdr), B_TRUE);
1137 
1138 	/* meanwhile update the statistic */
1139 	if (mp->b_rptr[0] & 0x1) {
1140 		if (bcmp(mp->b_rptr, vioif_broadcast, ETHERADDRL) != 0)
1141 				sc->sc_multixmt++;
1142 			else
1143 				sc->sc_brdcstxmt++;
1144 	}
1145 
1146 	/*
1147 	 * We copy small packets into the inline buffer. The bigger ones
1148 	 * get mapped using the mapped buffer.
1149 	 */
1150 	if (msg_size < sc->sc_txcopy_thresh) {
1151 		vioif_tx_inline(sc, ve, mp, msg_size);
1152 	} else {
1153 		/* statistic gets updated by vioif_tx_external when fail */
1154 		ret = vioif_tx_external(sc, ve, mp, msg_size);
1155 		if (ret != DDI_SUCCESS)
1156 			goto exit_tx_external;
1157 	}
1158 
1159 	virtio_push_chain(ve, B_TRUE);
1160 
1161 	sc->sc_opackets++;
1162 	sc->sc_obytes += msg_size;
1163 
1164 	return (B_TRUE);
1165 
1166 exit_tx_external:
1167 
1168 	vq_free_entry(sc->sc_tx_vq, ve);
1169 	/*
1170 	 * vioif_tx_external can fail when the buffer does not fit into the
1171 	 * indirect descriptor table. Free the mp. I don't expect this ever
1172 	 * to happen.
1173 	 */
1174 	freemsg(mp);
1175 
1176 	return (B_TRUE);
1177 }
1178 
1179 mblk_t *
1180 vioif_tx(void *arg, mblk_t *mp)
1181 {
1182 	struct vioif_softc *sc = arg;
1183 	mblk_t	*nmp;
1184 
1185 	while (mp != NULL) {
1186 		nmp = mp->b_next;
1187 		mp->b_next = NULL;
1188 
1189 		if (!vioif_send(sc, mp)) {
1190 			sc->sc_tx_stopped = 1;
1191 			mp->b_next = nmp;
1192 			break;
1193 		}
1194 		mp = nmp;
1195 	}
1196 
1197 	return (mp);
1198 }
1199 
1200 int
1201 vioif_start(void *arg)
1202 {
1203 	struct vioif_softc *sc = arg;
1204 
1205 	mac_link_update(sc->sc_mac_handle,
1206 	    vioif_link_state(sc));
1207 
1208 	virtio_start_vq_intr(sc->sc_rx_vq);
1209 
1210 	return (DDI_SUCCESS);
1211 }
1212 
1213 void
1214 vioif_stop(void *arg)
1215 {
1216 	struct vioif_softc *sc = arg;
1217 
1218 	virtio_stop_vq_intr(sc->sc_rx_vq);
1219 }
1220 
1221 /* ARGSUSED */
1222 static int
1223 vioif_stat(void *arg, uint_t stat, uint64_t *val)
1224 {
1225 	struct vioif_softc *sc = arg;
1226 
1227 	switch (stat) {
1228 	case MAC_STAT_IERRORS:
1229 		*val = sc->sc_ierrors;
1230 		break;
1231 	case MAC_STAT_OERRORS:
1232 		*val = sc->sc_oerrors;
1233 		break;
1234 	case MAC_STAT_MULTIRCV:
1235 		*val = sc->sc_multircv;
1236 		break;
1237 	case MAC_STAT_BRDCSTRCV:
1238 		*val = sc->sc_brdcstrcv;
1239 		break;
1240 	case MAC_STAT_MULTIXMT:
1241 		*val = sc->sc_multixmt;
1242 		break;
1243 	case MAC_STAT_BRDCSTXMT:
1244 		*val = sc->sc_brdcstxmt;
1245 		break;
1246 	case MAC_STAT_IPACKETS:
1247 		*val = sc->sc_ipackets;
1248 		break;
1249 	case MAC_STAT_RBYTES:
1250 		*val = sc->sc_rbytes;
1251 		break;
1252 	case MAC_STAT_OPACKETS:
1253 		*val = sc->sc_opackets;
1254 		break;
1255 	case MAC_STAT_OBYTES:
1256 		*val = sc->sc_obytes;
1257 		break;
1258 	case MAC_STAT_NORCVBUF:
1259 		*val = sc->sc_norecvbuf;
1260 		break;
1261 	case MAC_STAT_NOXMTBUF:
1262 		*val = sc->sc_notxbuf;
1263 		break;
1264 	case MAC_STAT_IFSPEED:
1265 		/* always 1 Gbit */
1266 		*val = 1000000000ULL;
1267 		break;
1268 	case ETHER_STAT_LINK_DUPLEX:
1269 		/* virtual device, always full-duplex */
1270 		*val = LINK_DUPLEX_FULL;
1271 		break;
1272 
1273 	default:
1274 		return (ENOTSUP);
1275 	}
1276 
1277 	return (DDI_SUCCESS);
1278 }
1279 
1280 static int
1281 vioif_set_prop_private(struct vioif_softc *sc, const char *pr_name,
1282     uint_t pr_valsize, const void *pr_val)
1283 {
1284 	_NOTE(ARGUNUSED(pr_valsize));
1285 
1286 	long result;
1287 
1288 	if (strcmp(pr_name, vioif_txcopy_thresh) == 0) {
1289 
1290 		if (pr_val == NULL)
1291 			return (EINVAL);
1292 
1293 		(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
1294 
1295 		if (result < 0 || result > VIOIF_TX_THRESH_MAX)
1296 			return (EINVAL);
1297 		sc->sc_txcopy_thresh = result;
1298 	}
1299 	if (strcmp(pr_name, vioif_rxcopy_thresh) == 0) {
1300 
1301 		if (pr_val == NULL)
1302 			return (EINVAL);
1303 
1304 		(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
1305 
1306 		if (result < 0 || result > VIOIF_RX_THRESH_MAX)
1307 			return (EINVAL);
1308 		sc->sc_rxcopy_thresh = result;
1309 	}
1310 	return (0);
1311 }
1312 
1313 static int
1314 vioif_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
1315     uint_t pr_valsize, const void *pr_val)
1316 {
1317 	struct vioif_softc *sc = arg;
1318 	const uint32_t *new_mtu;
1319 	int err;
1320 
1321 	switch (pr_num) {
1322 	case MAC_PROP_MTU:
1323 		new_mtu = pr_val;
1324 
1325 		if (*new_mtu > MAX_MTU) {
1326 			return (EINVAL);
1327 		}
1328 
1329 		err = mac_maxsdu_update(sc->sc_mac_handle, *new_mtu);
1330 		if (err) {
1331 			return (err);
1332 		}
1333 		break;
1334 	case MAC_PROP_PRIVATE:
1335 		err = vioif_set_prop_private(sc, pr_name,
1336 		    pr_valsize, pr_val);
1337 		if (err)
1338 			return (err);
1339 		break;
1340 	default:
1341 		return (ENOTSUP);
1342 	}
1343 
1344 	return (0);
1345 }
1346 
1347 static int
1348 vioif_get_prop_private(struct vioif_softc *sc, const char *pr_name,
1349     uint_t pr_valsize, void *pr_val)
1350 {
1351 	int err = ENOTSUP;
1352 	int value;
1353 
1354 	if (strcmp(pr_name, vioif_txcopy_thresh) == 0) {
1355 
1356 		value = sc->sc_txcopy_thresh;
1357 		err = 0;
1358 		goto done;
1359 	}
1360 	if (strcmp(pr_name, vioif_rxcopy_thresh) == 0) {
1361 
1362 		value = sc->sc_rxcopy_thresh;
1363 		err = 0;
1364 		goto done;
1365 	}
1366 done:
1367 	if (err == 0) {
1368 		(void) snprintf(pr_val, pr_valsize, "%d", value);
1369 	}
1370 	return (err);
1371 }
1372 
1373 static int
1374 vioif_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
1375     uint_t pr_valsize, void *pr_val)
1376 {
1377 	struct vioif_softc *sc = arg;
1378 	int err = ENOTSUP;
1379 
1380 	switch (pr_num) {
1381 	case MAC_PROP_PRIVATE:
1382 		err = vioif_get_prop_private(sc, pr_name,
1383 		    pr_valsize, pr_val);
1384 		break;
1385 	default:
1386 		break;
1387 	}
1388 	return (err);
1389 }
1390 
1391 static void
1392 vioif_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num,
1393     mac_prop_info_handle_t prh)
1394 {
1395 	struct vioif_softc *sc = arg;
1396 	char valstr[64];
1397 	int value;
1398 
1399 	switch (pr_num) {
1400 	case MAC_PROP_MTU:
1401 		mac_prop_info_set_range_uint32(prh, ETHERMIN, MAX_MTU);
1402 		break;
1403 
1404 	case MAC_PROP_PRIVATE:
1405 		bzero(valstr, sizeof (valstr));
1406 		if (strcmp(pr_name, vioif_txcopy_thresh) == 0) {
1407 
1408 			value = sc->sc_txcopy_thresh;
1409 		} else	if (strcmp(pr_name,
1410 		    vioif_rxcopy_thresh) == 0) {
1411 			value = sc->sc_rxcopy_thresh;
1412 		} else {
1413 			return;
1414 		}
1415 		(void) snprintf(valstr, sizeof (valstr), "%d", value);
1416 		break;
1417 
1418 	default:
1419 		break;
1420 	}
1421 }
1422 
1423 static boolean_t
1424 vioif_getcapab(void *arg, mac_capab_t cap, void *cap_data)
1425 {
1426 	struct vioif_softc *sc = arg;
1427 
1428 	switch (cap) {
1429 	case MAC_CAPAB_HCKSUM:
1430 		if (sc->sc_tx_csum) {
1431 			uint32_t *txflags = cap_data;
1432 
1433 			*txflags = HCKSUM_INET_PARTIAL;
1434 			return (B_TRUE);
1435 		}
1436 		return (B_FALSE);
1437 	case MAC_CAPAB_LSO:
1438 		if (sc->sc_tx_tso4) {
1439 			mac_capab_lso_t *cap_lso = cap_data;
1440 
1441 			cap_lso->lso_flags = LSO_TX_BASIC_TCP_IPV4;
1442 			cap_lso->lso_basic_tcp_ipv4.lso_max = MAX_MTU;
1443 			return (B_TRUE);
1444 		}
1445 		return (B_FALSE);
1446 	default:
1447 		break;
1448 	}
1449 	return (B_FALSE);
1450 }
1451 
1452 static mac_callbacks_t vioif_m_callbacks = {
1453 	.mc_callbacks	= (MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO),
1454 	.mc_getstat	= vioif_stat,
1455 	.mc_start	= vioif_start,
1456 	.mc_stop	= vioif_stop,
1457 	.mc_setpromisc	= vioif_promisc,
1458 	.mc_multicst	= vioif_multicst,
1459 	.mc_unicst	= vioif_unicst,
1460 	.mc_tx		= vioif_tx,
1461 	/* Optional callbacks */
1462 	.mc_reserved	= NULL,		/* reserved */
1463 	.mc_ioctl	= NULL,		/* mc_ioctl */
1464 	.mc_getcapab	= vioif_getcapab,		/* mc_getcapab */
1465 	.mc_open	= NULL,		/* mc_open */
1466 	.mc_close	= NULL,		/* mc_close */
1467 	.mc_setprop	= vioif_setprop,
1468 	.mc_getprop	= vioif_getprop,
1469 	.mc_propinfo	= vioif_propinfo,
1470 };
1471 
1472 static void
1473 vioif_show_features(struct vioif_softc *sc, const char *prefix,
1474     uint32_t features)
1475 {
1476 	char buf[512];
1477 	char *bufp = buf;
1478 	char *bufend = buf + sizeof (buf);
1479 
1480 	/* LINTED E_PTRDIFF_OVERFLOW */
1481 	bufp += snprintf(bufp, bufend - bufp, prefix);
1482 	/* LINTED E_PTRDIFF_OVERFLOW */
1483 	bufp += virtio_show_features(features, bufp, bufend - bufp);
1484 	*bufp = '\0';
1485 
1486 
1487 	/* Using '!' to only CE_NOTE this to the system log. */
1488 	dev_err(sc->sc_dev, CE_NOTE, "!%s Vioif (%b)", buf, features,
1489 	    VIRTIO_NET_FEATURE_BITS);
1490 }
1491 
1492 /*
1493  * Find out which features are supported by the device and
1494  * choose which ones we wish to use.
1495  */
1496 static int
1497 vioif_dev_features(struct vioif_softc *sc)
1498 {
1499 	uint32_t host_features;
1500 
1501 	host_features = virtio_negotiate_features(&sc->sc_virtio,
1502 	    VIRTIO_NET_F_CSUM |
1503 	    VIRTIO_NET_F_HOST_TSO4 |
1504 	    VIRTIO_NET_F_HOST_ECN |
1505 	    VIRTIO_NET_F_MAC |
1506 	    VIRTIO_NET_F_STATUS |
1507 	    VIRTIO_F_RING_INDIRECT_DESC |
1508 	    VIRTIO_F_NOTIFY_ON_EMPTY);
1509 
1510 	vioif_show_features(sc, "Host features: ", host_features);
1511 	vioif_show_features(sc, "Negotiated features: ",
1512 	    sc->sc_virtio.sc_features);
1513 
1514 	if (!(sc->sc_virtio.sc_features & VIRTIO_F_RING_INDIRECT_DESC)) {
1515 		dev_err(sc->sc_dev, CE_NOTE,
1516 		    "Host does not support RING_INDIRECT_DESC, bye.");
1517 		return (DDI_FAILURE);
1518 	}
1519 
1520 	return (DDI_SUCCESS);
1521 }
1522 
1523 static int
1524 vioif_has_feature(struct vioif_softc *sc, uint32_t feature)
1525 {
1526 	return (virtio_has_feature(&sc->sc_virtio, feature));
1527 }
1528 
1529 static void
1530 vioif_set_mac(struct vioif_softc *sc)
1531 {
1532 	int i;
1533 
1534 	for (i = 0; i < ETHERADDRL; i++) {
1535 		virtio_write_device_config_1(&sc->sc_virtio,
1536 		    VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
1537 	}
1538 }
1539 
1540 /* Get the mac address out of the hardware, or make up one. */
1541 static void
1542 vioif_get_mac(struct vioif_softc *sc)
1543 {
1544 	int i;
1545 	if (sc->sc_virtio.sc_features & VIRTIO_NET_F_MAC) {
1546 		for (i = 0; i < ETHERADDRL; i++) {
1547 			sc->sc_mac[i] = virtio_read_device_config_1(
1548 			    &sc->sc_virtio,
1549 			    VIRTIO_NET_CONFIG_MAC + i);
1550 		}
1551 		dev_err(sc->sc_dev, CE_NOTE, "Got MAC address from host: %s",
1552 		    ether_sprintf((struct ether_addr *)sc->sc_mac));
1553 	} else {
1554 		/* Get a few random bytes */
1555 		(void) random_get_pseudo_bytes(sc->sc_mac, ETHERADDRL);
1556 		/* Make sure it's a unicast MAC */
1557 		sc->sc_mac[0] &= ~1;
1558 		/* Set the "locally administered" bit */
1559 		sc->sc_mac[1] |= 2;
1560 
1561 		vioif_set_mac(sc);
1562 
1563 		dev_err(sc->sc_dev, CE_NOTE,
1564 		    "Generated a random MAC address: %s",
1565 		    ether_sprintf((struct ether_addr *)sc->sc_mac));
1566 	}
1567 }
1568 
1569 /*
1570  * Virtqueue interrupt handlers
1571  */
1572 /* ARGSUSED */
1573 uint_t
1574 vioif_rx_handler(caddr_t arg1, caddr_t arg2)
1575 {
1576 	struct virtio_softc *vsc = (void *) arg1;
1577 	struct vioif_softc *sc = container_of(vsc,
1578 	    struct vioif_softc, sc_virtio);
1579 
1580 	(void) vioif_process_rx(sc);
1581 
1582 	(void) vioif_populate_rx(sc, KM_NOSLEEP);
1583 
1584 	return (DDI_INTR_CLAIMED);
1585 }
1586 
1587 /* ARGSUSED */
1588 uint_t
1589 vioif_tx_handler(caddr_t arg1, caddr_t arg2)
1590 {
1591 	struct virtio_softc *vsc = (void *)arg1;
1592 	struct vioif_softc *sc = container_of(vsc,
1593 	    struct vioif_softc, sc_virtio);
1594 
1595 	vioif_reclaim_used_tx(sc);
1596 	return (DDI_INTR_CLAIMED);
1597 }
1598 
1599 static int
1600 vioif_register_ints(struct vioif_softc *sc)
1601 {
1602 	int ret;
1603 
1604 	struct virtio_int_handler vioif_vq_h[] = {
1605 		{ vioif_rx_handler },
1606 		{ vioif_tx_handler },
1607 		{ NULL }
1608 	};
1609 
1610 	ret = virtio_register_ints(&sc->sc_virtio, NULL, vioif_vq_h);
1611 
1612 	return (ret);
1613 }
1614 
1615 
1616 static void
1617 vioif_check_features(struct vioif_softc *sc)
1618 {
1619 	if (vioif_has_feature(sc, VIRTIO_NET_F_CSUM)) {
1620 		/* The GSO/GRO featured depend on CSUM, check them here. */
1621 		sc->sc_tx_csum = 1;
1622 		sc->sc_rx_csum = 1;
1623 
1624 		if (!vioif_has_feature(sc, VIRTIO_NET_F_GUEST_CSUM)) {
1625 			sc->sc_rx_csum = 0;
1626 		}
1627 		cmn_err(CE_NOTE, "Csum enabled.");
1628 
1629 		if (vioif_has_feature(sc, VIRTIO_NET_F_HOST_TSO4)) {
1630 
1631 			sc->sc_tx_tso4 = 1;
1632 			/*
1633 			 * We don't seem to have a way to ask the system
1634 			 * not to send us LSO packets with Explicit
1635 			 * Congestion Notification bit set, so we require
1636 			 * the device to support it in order to do
1637 			 * LSO.
1638 			 */
1639 			if (!vioif_has_feature(sc, VIRTIO_NET_F_HOST_ECN)) {
1640 				dev_err(sc->sc_dev, CE_NOTE,
1641 				    "TSO4 supported, but not ECN. "
1642 				    "Not using LSO.");
1643 				sc->sc_tx_tso4 = 0;
1644 			} else {
1645 				cmn_err(CE_NOTE, "LSO enabled");
1646 			}
1647 		}
1648 	}
1649 }
1650 
1651 static int
1652 vioif_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd)
1653 {
1654 	int ret, instance;
1655 	struct vioif_softc *sc;
1656 	struct virtio_softc *vsc;
1657 	mac_register_t *macp;
1658 	char cache_name[CACHE_NAME_SIZE];
1659 
1660 	instance = ddi_get_instance(devinfo);
1661 
1662 	switch (cmd) {
1663 	case DDI_ATTACH:
1664 		break;
1665 
1666 	case DDI_RESUME:
1667 	case DDI_PM_RESUME:
1668 		/* We do not support suspend/resume for vioif. */
1669 		goto exit;
1670 
1671 	default:
1672 		goto exit;
1673 	}
1674 
1675 	sc = kmem_zalloc(sizeof (struct vioif_softc), KM_SLEEP);
1676 	ddi_set_driver_private(devinfo, sc);
1677 
1678 	vsc = &sc->sc_virtio;
1679 
1680 	/* Duplicate for less typing */
1681 	sc->sc_dev = devinfo;
1682 	vsc->sc_dev = devinfo;
1683 
1684 	/*
1685 	 * Initialize interrupt kstat.
1686 	 */
1687 	sc->sc_intrstat = kstat_create("vioif", instance, "intr", "controller",
1688 	    KSTAT_TYPE_INTR, 1, 0);
1689 	if (sc->sc_intrstat == NULL) {
1690 		dev_err(devinfo, CE_WARN, "kstat_create failed");
1691 		goto exit_intrstat;
1692 	}
1693 	kstat_install(sc->sc_intrstat);
1694 
1695 	/* map BAR 0 */
1696 	ret = ddi_regs_map_setup(devinfo, 1,
1697 	    (caddr_t *)&sc->sc_virtio.sc_io_addr,
1698 	    0, 0, &vioif_attr, &sc->sc_virtio.sc_ioh);
1699 	if (ret != DDI_SUCCESS) {
1700 		dev_err(devinfo, CE_WARN, "unable to map bar 0: %d", ret);
1701 		goto exit_map;
1702 	}
1703 
1704 	virtio_device_reset(&sc->sc_virtio);
1705 	virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_ACK);
1706 	virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER);
1707 
1708 	ret = vioif_dev_features(sc);
1709 	if (ret)
1710 		goto exit_features;
1711 
1712 	vsc->sc_nvqs = vioif_has_feature(sc, VIRTIO_NET_F_CTRL_VQ) ? 3 : 2;
1713 
1714 	(void) snprintf(cache_name, CACHE_NAME_SIZE, "vioif%d_rx", instance);
1715 	sc->sc_rxbuf_cache = kmem_cache_create(cache_name,
1716 	    sizeof (struct vioif_rx_buf), 0, vioif_rx_construct,
1717 	    vioif_rx_destruct, NULL, sc, NULL, KM_SLEEP);
1718 	if (sc->sc_rxbuf_cache == NULL) {
1719 		dev_err(sc->sc_dev, CE_WARN, "Can't allocate the buffer cache");
1720 		goto exit_cache;
1721 	}
1722 
1723 	ret = vioif_register_ints(sc);
1724 	if (ret) {
1725 		dev_err(sc->sc_dev, CE_WARN,
1726 		    "Failed to allocate interrupt(s)!");
1727 		goto exit_ints;
1728 	}
1729 
1730 	/*
1731 	 * Register layout determined, can now access the
1732 	 * device-specific bits
1733 	 */
1734 	vioif_get_mac(sc);
1735 
1736 	sc->sc_rx_vq = virtio_alloc_vq(&sc->sc_virtio, 0,
1737 	    VIOIF_RX_QLEN, VIOIF_INDIRECT_MAX, "rx");
1738 	if (!sc->sc_rx_vq)
1739 		goto exit_alloc1;
1740 	virtio_stop_vq_intr(sc->sc_rx_vq);
1741 
1742 	sc->sc_tx_vq = virtio_alloc_vq(&sc->sc_virtio, 1,
1743 	    VIOIF_TX_QLEN, VIOIF_INDIRECT_MAX, "tx");
1744 	if (!sc->sc_rx_vq)
1745 		goto exit_alloc2;
1746 	virtio_stop_vq_intr(sc->sc_tx_vq);
1747 
1748 	if (vioif_has_feature(sc, VIRTIO_NET_F_CTRL_VQ)) {
1749 		sc->sc_ctrl_vq = virtio_alloc_vq(&sc->sc_virtio, 2,
1750 		    VIOIF_CTRL_QLEN, 0, "ctrl");
1751 		if (!sc->sc_ctrl_vq) {
1752 			goto exit_alloc3;
1753 		}
1754 		virtio_stop_vq_intr(sc->sc_ctrl_vq);
1755 	}
1756 
1757 	virtio_set_status(&sc->sc_virtio,
1758 	    VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
1759 
1760 	sc->sc_rxloan = 0;
1761 
1762 	/* set some reasonable-small default values */
1763 	sc->sc_rxcopy_thresh = 300;
1764 	sc->sc_txcopy_thresh = 300;
1765 	sc->sc_mtu = ETHERMTU;
1766 
1767 	vioif_check_features(sc);
1768 
1769 	if (vioif_alloc_mems(sc))
1770 		goto exit_alloc_mems;
1771 
1772 	if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
1773 		dev_err(devinfo, CE_WARN, "Failed to allocate a mac_register");
1774 		goto exit_macalloc;
1775 	}
1776 
1777 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1778 	macp->m_driver = sc;
1779 	macp->m_dip = devinfo;
1780 	macp->m_src_addr = sc->sc_mac;
1781 	macp->m_callbacks = &vioif_m_callbacks;
1782 	macp->m_min_sdu = 0;
1783 	macp->m_max_sdu = sc->sc_mtu;
1784 	macp->m_margin = VLAN_TAGSZ;
1785 	macp->m_priv_props = vioif_priv_props;
1786 
1787 	sc->sc_macp = macp;
1788 
1789 	/* Pre-fill the rx ring. */
1790 	(void) vioif_populate_rx(sc, KM_SLEEP);
1791 
1792 	ret = mac_register(macp, &sc->sc_mac_handle);
1793 	if (ret != 0) {
1794 		dev_err(devinfo, CE_WARN, "vioif_attach: "
1795 		    "mac_register() failed, ret=%d", ret);
1796 		goto exit_register;
1797 	}
1798 
1799 	ret = virtio_enable_ints(&sc->sc_virtio);
1800 	if (ret) {
1801 		dev_err(devinfo, CE_WARN, "Failed to enable interrupts");
1802 		goto exit_enable_ints;
1803 	}
1804 
1805 	mac_link_update(sc->sc_mac_handle, LINK_STATE_UP);
1806 	return (DDI_SUCCESS);
1807 
1808 exit_enable_ints:
1809 	(void) mac_unregister(sc->sc_mac_handle);
1810 exit_register:
1811 	mac_free(macp);
1812 exit_macalloc:
1813 	vioif_free_mems(sc);
1814 exit_alloc_mems:
1815 	virtio_release_ints(&sc->sc_virtio);
1816 	if (sc->sc_ctrl_vq)
1817 		virtio_free_vq(sc->sc_ctrl_vq);
1818 exit_alloc3:
1819 	virtio_free_vq(sc->sc_tx_vq);
1820 exit_alloc2:
1821 	virtio_free_vq(sc->sc_rx_vq);
1822 exit_alloc1:
1823 exit_ints:
1824 	kmem_cache_destroy(sc->sc_rxbuf_cache);
1825 exit_cache:
1826 exit_features:
1827 	virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
1828 	ddi_regs_map_free(&sc->sc_virtio.sc_ioh);
1829 exit_intrstat:
1830 exit_map:
1831 	kstat_delete(sc->sc_intrstat);
1832 	kmem_free(sc, sizeof (struct vioif_softc));
1833 exit:
1834 	return (DDI_FAILURE);
1835 }
1836 
1837 static int
1838 vioif_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd)
1839 {
1840 	struct vioif_softc *sc;
1841 
1842 	if ((sc = ddi_get_driver_private(devinfo)) == NULL)
1843 		return (DDI_FAILURE);
1844 
1845 	switch (cmd) {
1846 	case DDI_DETACH:
1847 		break;
1848 
1849 	case DDI_PM_SUSPEND:
1850 		/* We do not support suspend/resume for vioif. */
1851 		return (DDI_FAILURE);
1852 
1853 	default:
1854 		return (DDI_FAILURE);
1855 	}
1856 
1857 	if (sc->sc_rxloan) {
1858 		dev_err(devinfo, CE_WARN, "!Some rx buffers are still upstream,"
1859 		    " not detaching.");
1860 		return (DDI_FAILURE);
1861 	}
1862 
1863 	virtio_stop_vq_intr(sc->sc_rx_vq);
1864 	virtio_stop_vq_intr(sc->sc_tx_vq);
1865 
1866 	virtio_release_ints(&sc->sc_virtio);
1867 
1868 	if (mac_unregister(sc->sc_mac_handle)) {
1869 		return (DDI_FAILURE);
1870 	}
1871 
1872 	mac_free(sc->sc_macp);
1873 
1874 	vioif_free_mems(sc);
1875 	virtio_free_vq(sc->sc_rx_vq);
1876 	virtio_free_vq(sc->sc_tx_vq);
1877 
1878 	virtio_device_reset(&sc->sc_virtio);
1879 
1880 	ddi_regs_map_free(&sc->sc_virtio.sc_ioh);
1881 
1882 	kmem_cache_destroy(sc->sc_rxbuf_cache);
1883 	kstat_delete(sc->sc_intrstat);
1884 	kmem_free(sc, sizeof (struct vioif_softc));
1885 
1886 	return (DDI_SUCCESS);
1887 }
1888 
1889 static int
1890 vioif_quiesce(dev_info_t *devinfo)
1891 {
1892 	struct vioif_softc *sc;
1893 
1894 	if ((sc = ddi_get_driver_private(devinfo)) == NULL)
1895 		return (DDI_FAILURE);
1896 
1897 	virtio_stop_vq_intr(sc->sc_rx_vq);
1898 	virtio_stop_vq_intr(sc->sc_tx_vq);
1899 	virtio_device_reset(&sc->sc_virtio);
1900 
1901 	return (DDI_SUCCESS);
1902 }
1903 
1904 int
1905 _init(void)
1906 {
1907 	int ret = 0;
1908 
1909 	mac_init_ops(&vioif_ops, "vioif");
1910 
1911 	ret = mod_install(&modlinkage);
1912 	if (ret != DDI_SUCCESS) {
1913 		mac_fini_ops(&vioif_ops);
1914 		return (ret);
1915 	}
1916 
1917 	return (0);
1918 }
1919 
1920 int
1921 _fini(void)
1922 {
1923 	int ret;
1924 
1925 	ret = mod_remove(&modlinkage);
1926 	if (ret == DDI_SUCCESS) {
1927 		mac_fini_ops(&vioif_ops);
1928 	}
1929 
1930 	return (ret);
1931 }
1932 
1933 int
1934 _info(struct modinfo *pModinfo)
1935 {
1936 	return (mod_info(&modlinkage, pModinfo));
1937 }
1938