xref: /illumos-gate/usr/src/uts/common/io/vioif/vioif.c (revision 13b136d3061155363c62c9f6568d25b8b27da8f6)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2013 Nexenta Inc.  All rights reserved.
14  * Copyright (c) 2014, 2016 by Delphix. All rights reserved.
15  * Copyright 2019 Joyent, Inc.
16  * Copyright 2019 Joshua M. Clulow <josh@sysmgr.org>
17  */
18 
19 /* Based on the NetBSD virtio driver by Minoura Makoto. */
20 /*
21  * Copyright (c) 2010 Minoura Makoto.
22  * All rights reserved.
23  *
24  * Redistribution and use in source and binary forms, with or without
25  * modification, are permitted provided that the following conditions
26  * are met:
27  * 1. Redistributions of source code must retain the above copyright
28  *    notice, this list of conditions and the following disclaimer.
29  * 2. Redistributions in binary form must reproduce the above copyright
30  *    notice, this list of conditions and the following disclaimer in the
31  *    documentation and/or other materials provided with the distribution.
32  *
33  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
34  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
35  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
36  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
37  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
38  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
39  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
40  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
41  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
42  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43  */
44 
45 /*
46  * VIRTIO NETWORK DRIVER
47  */
48 
49 #include <sys/types.h>
50 #include <sys/errno.h>
51 #include <sys/param.h>
52 #include <sys/stropts.h>
53 #include <sys/stream.h>
54 #include <sys/strsubr.h>
55 #include <sys/kmem.h>
56 #include <sys/conf.h>
57 #include <sys/devops.h>
58 #include <sys/ksynch.h>
59 #include <sys/stat.h>
60 #include <sys/modctl.h>
61 #include <sys/debug.h>
62 #include <sys/pci.h>
63 #include <sys/ethernet.h>
64 #include <sys/vlan.h>
65 #include <sys/sysmacros.h>
66 #include <sys/smbios.h>
67 
68 #include <sys/dlpi.h>
69 #include <sys/taskq.h>
70 
71 #include <sys/pattr.h>
72 #include <sys/strsun.h>
73 
74 #include <sys/random.h>
75 #include <sys/containerof.h>
76 #include <sys/stream.h>
77 
78 #include <sys/mac.h>
79 #include <sys/mac_provider.h>
80 #include <sys/mac_ether.h>
81 
82 #include "virtio.h"
83 #include "vioif.h"
84 
85 
86 static int vioif_quiesce(dev_info_t *);
87 static int vioif_attach(dev_info_t *, ddi_attach_cmd_t);
88 static int vioif_detach(dev_info_t *, ddi_detach_cmd_t);
89 static boolean_t vioif_has_feature(vioif_t *, uint32_t);
90 static void vioif_reclaim_restart(vioif_t *);
91 static int vioif_m_stat(void *, uint_t, uint64_t *);
92 static void vioif_m_stop(void *);
93 static int vioif_m_start(void *);
94 static int vioif_m_multicst(void *, boolean_t, const uint8_t *);
95 static int vioif_m_setpromisc(void *, boolean_t);
96 static int vioif_m_unicst(void *, const uint8_t *);
97 static mblk_t *vioif_m_tx(void *, mblk_t *);
98 static int vioif_m_setprop(void *, const char *, mac_prop_id_t, uint_t,
99     const void *);
100 static int vioif_m_getprop(void *, const char *, mac_prop_id_t, uint_t, void *);
101 static void vioif_m_propinfo(void *, const char *, mac_prop_id_t,
102     mac_prop_info_handle_t);
103 static boolean_t vioif_m_getcapab(void *, mac_capab_t, void *);
104 static uint_t vioif_add_rx(vioif_t *);
105 
106 
107 static struct cb_ops vioif_cb_ops = {
108 	.cb_rev =			CB_REV,
109 	.cb_flag =			D_MP | D_NEW,
110 
111 	.cb_open =			nulldev,
112 	.cb_close =			nulldev,
113 	.cb_strategy =			nodev,
114 	.cb_print =			nodev,
115 	.cb_dump =			nodev,
116 	.cb_read =			nodev,
117 	.cb_write =			nodev,
118 	.cb_ioctl =			nodev,
119 	.cb_devmap =			nodev,
120 	.cb_mmap =			nodev,
121 	.cb_segmap =			nodev,
122 	.cb_chpoll =			nochpoll,
123 	.cb_prop_op =			ddi_prop_op,
124 	.cb_str =			NULL,
125 	.cb_aread =			nodev,
126 	.cb_awrite =			nodev,
127 };
128 
129 static struct dev_ops vioif_dev_ops = {
130 	.devo_rev =			DEVO_REV,
131 	.devo_refcnt =			0,
132 
133 	.devo_attach =			vioif_attach,
134 	.devo_detach =			vioif_detach,
135 	.devo_quiesce =			vioif_quiesce,
136 
137 	.devo_cb_ops =			&vioif_cb_ops,
138 
139 	.devo_getinfo =			NULL,
140 	.devo_identify =		nulldev,
141 	.devo_probe =			nulldev,
142 	.devo_reset =			nodev,
143 	.devo_bus_ops =			NULL,
144 	.devo_power =			NULL,
145 };
146 
147 static struct modldrv vioif_modldrv = {
148 	.drv_modops =			&mod_driverops,
149 	.drv_linkinfo =			"VIRTIO network driver",
150 	.drv_dev_ops =			&vioif_dev_ops
151 };
152 
153 static struct modlinkage vioif_modlinkage = {
154 	.ml_rev =			MODREV_1,
155 	.ml_linkage =			{ &vioif_modldrv, NULL }
156 };
157 
158 static mac_callbacks_t vioif_mac_callbacks = {
159 	.mc_getstat =			vioif_m_stat,
160 	.mc_start =			vioif_m_start,
161 	.mc_stop =			vioif_m_stop,
162 	.mc_setpromisc =		vioif_m_setpromisc,
163 	.mc_multicst =			vioif_m_multicst,
164 	.mc_unicst =			vioif_m_unicst,
165 	.mc_tx =			vioif_m_tx,
166 
167 	.mc_callbacks =			(MC_GETCAPAB | MC_SETPROP |
168 					    MC_GETPROP | MC_PROPINFO),
169 	.mc_getcapab =			vioif_m_getcapab,
170 	.mc_setprop =			vioif_m_setprop,
171 	.mc_getprop =			vioif_m_getprop,
172 	.mc_propinfo =			vioif_m_propinfo,
173 };
174 
175 static const uchar_t vioif_broadcast[ETHERADDRL] = {
176 	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
177 };
178 
179 /*
180  * Interval for the periodic TX reclaim.
181  */
182 uint_t vioif_reclaim_ms = 200;
183 
184 /*
185  * Allow the operator to override the kinds of interrupts we'll use for
186  * vioif.  This value defaults to -1 so that it can be overridden to 0 in
187  * /etc/system.
188  */
189 int vioif_allowed_int_types = -1;
190 
191 /*
192  * DMA attribute template for transmit and receive buffers.  The SGL entry
193  * count will be modified before using the template.  Note that these
194  * allocations are aligned so that VIOIF_HEADER_SKIP places the IP header in
195  * received frames at the correct offset for the networking stack.
196  */
197 ddi_dma_attr_t vioif_dma_attr_bufs = {
198 	.dma_attr_version =		DMA_ATTR_V0,
199 	.dma_attr_addr_lo =		0x0000000000000000,
200 	.dma_attr_addr_hi =		0xFFFFFFFFFFFFFFFF,
201 	.dma_attr_count_max =		0x00000000FFFFFFFF,
202 	.dma_attr_align =		VIOIF_HEADER_ALIGN,
203 	.dma_attr_burstsizes =		1,
204 	.dma_attr_minxfer =		1,
205 	.dma_attr_maxxfer =		0x00000000FFFFFFFF,
206 	.dma_attr_seg =			0x00000000FFFFFFFF,
207 	.dma_attr_sgllen =		0,
208 	.dma_attr_granular =		1,
209 	.dma_attr_flags =		0
210 };
211 
212 /*
213  * DMA attributes for mapping larger transmit buffers from the networking
214  * stack.  The requirements are quite loose, but note that the SGL entry length
215  * field is 32-bit.
216  */
217 ddi_dma_attr_t vioif_dma_attr_external = {
218 	.dma_attr_version =		DMA_ATTR_V0,
219 	.dma_attr_addr_lo =		0x0000000000000000,
220 	.dma_attr_addr_hi =		0xFFFFFFFFFFFFFFFF,
221 	.dma_attr_count_max =		0x00000000FFFFFFFF,
222 	.dma_attr_align =		1,
223 	.dma_attr_burstsizes =		1,
224 	.dma_attr_minxfer =		1,
225 	.dma_attr_maxxfer =		0x00000000FFFFFFFF,
226 	.dma_attr_seg =			0x00000000FFFFFFFF,
227 	.dma_attr_sgllen =		VIOIF_MAX_SEGS - 1,
228 	.dma_attr_granular =		1,
229 	.dma_attr_flags =		0
230 };
231 
232 
233 /*
234  * VIRTIO NET MAC PROPERTIES
235  */
236 #define	VIOIF_MACPROP_TXCOPY_THRESH	"_txcopy_thresh"
237 #define	VIOIF_MACPROP_TXCOPY_THRESH_DEF	300
238 #define	VIOIF_MACPROP_TXCOPY_THRESH_MAX	640
239 
240 #define	VIOIF_MACPROP_RXCOPY_THRESH	"_rxcopy_thresh"
241 #define	VIOIF_MACPROP_RXCOPY_THRESH_DEF	300
242 #define	VIOIF_MACPROP_RXCOPY_THRESH_MAX	640
243 
244 static char *vioif_priv_props[] = {
245 	VIOIF_MACPROP_TXCOPY_THRESH,
246 	VIOIF_MACPROP_RXCOPY_THRESH,
247 	NULL
248 };
249 
250 
251 static vioif_txbuf_t *
252 vioif_txbuf_alloc(vioif_t *vif)
253 {
254 	vioif_txbuf_t *tb;
255 
256 	VERIFY(MUTEX_HELD(&vif->vif_mutex));
257 
258 	if ((tb = list_remove_head(&vif->vif_txbufs)) != NULL) {
259 		vif->vif_ntxbufs_alloc++;
260 	}
261 
262 	return (tb);
263 }
264 
265 static void
266 vioif_txbuf_free(vioif_t *vif, vioif_txbuf_t *tb)
267 {
268 	VERIFY(MUTEX_HELD(&vif->vif_mutex));
269 
270 	VERIFY3U(vif->vif_ntxbufs_alloc, >, 0);
271 	vif->vif_ntxbufs_alloc--;
272 
273 	virtio_chain_clear(tb->tb_chain);
274 	list_insert_head(&vif->vif_txbufs, tb);
275 }
276 
277 static vioif_rxbuf_t *
278 vioif_rxbuf_alloc(vioif_t *vif)
279 {
280 	vioif_rxbuf_t *rb;
281 
282 	VERIFY(MUTEX_HELD(&vif->vif_mutex));
283 
284 	if ((rb = list_remove_head(&vif->vif_rxbufs)) != NULL) {
285 		vif->vif_nrxbufs_alloc++;
286 	}
287 
288 	return (rb);
289 }
290 
291 static void
292 vioif_rxbuf_free(vioif_t *vif, vioif_rxbuf_t *rb)
293 {
294 	VERIFY(MUTEX_HELD(&vif->vif_mutex));
295 
296 	VERIFY3U(vif->vif_nrxbufs_alloc, >, 0);
297 	vif->vif_nrxbufs_alloc--;
298 
299 	virtio_chain_clear(rb->rb_chain);
300 	list_insert_head(&vif->vif_rxbufs, rb);
301 }
302 
303 static void
304 vioif_rx_free_callback(caddr_t free_arg)
305 {
306 	vioif_rxbuf_t *rb = (vioif_rxbuf_t *)free_arg;
307 	vioif_t *vif = rb->rb_vioif;
308 
309 	mutex_enter(&vif->vif_mutex);
310 
311 	/*
312 	 * Return this receive buffer to the free list.
313 	 */
314 	vioif_rxbuf_free(vif, rb);
315 
316 	VERIFY3U(vif->vif_nrxbufs_onloan, >, 0);
317 	vif->vif_nrxbufs_onloan--;
318 
319 	/*
320 	 * Attempt to replenish the receive queue with at least the buffer we
321 	 * just freed.  There isn't a great way to deal with failure here,
322 	 * though because we'll only loan at most half of the buffers there
323 	 * should always be at least some available even if this fails.
324 	 */
325 	(void) vioif_add_rx(vif);
326 
327 	mutex_exit(&vif->vif_mutex);
328 }
329 
330 static void
331 vioif_free_bufs(vioif_t *vif)
332 {
333 	VERIFY(MUTEX_HELD(&vif->vif_mutex));
334 
335 	VERIFY3U(vif->vif_ntxbufs_alloc, ==, 0);
336 	for (uint_t i = 0; i < vif->vif_txbufs_capacity; i++) {
337 		vioif_txbuf_t *tb = &vif->vif_txbufs_mem[i];
338 
339 		/*
340 		 * Ensure that this txbuf is now in the free list:
341 		 */
342 		VERIFY(list_link_active(&tb->tb_link));
343 		list_remove(&vif->vif_txbufs, tb);
344 
345 		/*
346 		 * We should not have an mblk chain at this point.
347 		 */
348 		VERIFY3P(tb->tb_mp, ==, NULL);
349 
350 		if (tb->tb_dma != NULL) {
351 			virtio_dma_free(tb->tb_dma);
352 			tb->tb_dma = NULL;
353 		}
354 
355 		if (tb->tb_chain != NULL) {
356 			virtio_chain_free(tb->tb_chain);
357 			tb->tb_chain = NULL;
358 		}
359 
360 		if (tb->tb_dmaext != NULL) {
361 			for (uint_t j = 0; j < tb->tb_dmaext_capacity; j++) {
362 				if (tb->tb_dmaext[j] != NULL) {
363 					virtio_dma_free(
364 					    tb->tb_dmaext[j]);
365 					tb->tb_dmaext[j] = NULL;
366 				}
367 			}
368 
369 			kmem_free(tb->tb_dmaext,
370 			    sizeof (virtio_dma_t *) * tb->tb_dmaext_capacity);
371 			tb->tb_dmaext = NULL;
372 			tb->tb_dmaext_capacity = 0;
373 		}
374 	}
375 	VERIFY(list_is_empty(&vif->vif_txbufs));
376 	if (vif->vif_txbufs_mem != NULL) {
377 		kmem_free(vif->vif_txbufs_mem,
378 		    sizeof (vioif_txbuf_t) * vif->vif_txbufs_capacity);
379 		vif->vif_txbufs_mem = NULL;
380 		vif->vif_txbufs_capacity = 0;
381 	}
382 
383 	VERIFY3U(vif->vif_nrxbufs_alloc, ==, 0);
384 	for (uint_t i = 0; i < vif->vif_rxbufs_capacity; i++) {
385 		vioif_rxbuf_t *rb = &vif->vif_rxbufs_mem[i];
386 
387 		/*
388 		 * Ensure that this rxbuf is now in the free list:
389 		 */
390 		VERIFY(list_link_active(&rb->rb_link));
391 		list_remove(&vif->vif_rxbufs, rb);
392 
393 		if (rb->rb_dma != NULL) {
394 			virtio_dma_free(rb->rb_dma);
395 			rb->rb_dma = NULL;
396 		}
397 
398 		if (rb->rb_chain != NULL) {
399 			virtio_chain_free(rb->rb_chain);
400 			rb->rb_chain = NULL;
401 		}
402 	}
403 	VERIFY(list_is_empty(&vif->vif_rxbufs));
404 	if (vif->vif_rxbufs_mem != NULL) {
405 		kmem_free(vif->vif_rxbufs_mem,
406 		    sizeof (vioif_rxbuf_t) * vif->vif_rxbufs_capacity);
407 		vif->vif_rxbufs_mem = NULL;
408 		vif->vif_rxbufs_capacity = 0;
409 	}
410 }
411 
412 static int
413 vioif_alloc_bufs(vioif_t *vif)
414 {
415 	VERIFY(MUTEX_HELD(&vif->vif_mutex));
416 
417 	/*
418 	 * Allocate one contiguous chunk of memory for the transmit and receive
419 	 * buffer tracking objects.  If the ring is unusually small, we'll
420 	 * reduce our target buffer count accordingly.
421 	 */
422 	vif->vif_txbufs_capacity = MIN(VIRTIO_NET_TX_BUFS,
423 	    virtio_queue_size(vif->vif_tx_vq));
424 	vif->vif_txbufs_mem = kmem_zalloc(
425 	    sizeof (vioif_txbuf_t) * vif->vif_txbufs_capacity, KM_SLEEP);
426 	list_create(&vif->vif_txbufs, sizeof (vioif_txbuf_t),
427 	    offsetof(vioif_txbuf_t, tb_link));
428 
429 	vif->vif_rxbufs_capacity = MIN(VIRTIO_NET_RX_BUFS,
430 	    virtio_queue_size(vif->vif_rx_vq));
431 	vif->vif_rxbufs_mem = kmem_zalloc(
432 	    sizeof (vioif_rxbuf_t) * vif->vif_rxbufs_capacity, KM_SLEEP);
433 	list_create(&vif->vif_rxbufs, sizeof (vioif_rxbuf_t),
434 	    offsetof(vioif_rxbuf_t, rb_link));
435 
436 	/*
437 	 * Do not loan more than half of our allocated receive buffers into
438 	 * the networking stack.
439 	 */
440 	vif->vif_nrxbufs_onloan_max = vif->vif_rxbufs_capacity / 2;
441 
442 	/*
443 	 * Put everything in the free list straight away in order to simplify
444 	 * the use of vioif_free_bufs() for cleanup on allocation failure.
445 	 */
446 	for (uint_t i = 0; i < vif->vif_txbufs_capacity; i++) {
447 		list_insert_tail(&vif->vif_txbufs, &vif->vif_txbufs_mem[i]);
448 	}
449 	for (uint_t i = 0; i < vif->vif_rxbufs_capacity; i++) {
450 		list_insert_tail(&vif->vif_rxbufs, &vif->vif_rxbufs_mem[i]);
451 	}
452 
453 	/*
454 	 * Start from the DMA attribute template common to both transmit and
455 	 * receive buffers.  The SGL entry count will be modified for each
456 	 * buffer type.
457 	 */
458 	ddi_dma_attr_t attr = vioif_dma_attr_bufs;
459 
460 	/*
461 	 * The transmit inline buffer is small (less than a page), so it's
462 	 * reasonable to request a single cookie.
463 	 */
464 	attr.dma_attr_sgllen = 1;
465 
466 	for (vioif_txbuf_t *tb = list_head(&vif->vif_txbufs); tb != NULL;
467 	    tb = list_next(&vif->vif_txbufs, tb)) {
468 		if ((tb->tb_dma = virtio_dma_alloc(vif->vif_virtio,
469 		    VIOIF_TX_INLINE_SIZE, &attr,
470 		    DDI_DMA_STREAMING | DDI_DMA_WRITE, KM_SLEEP)) == NULL) {
471 			goto fail;
472 		}
473 		VERIFY3U(virtio_dma_ncookies(tb->tb_dma), ==, 1);
474 
475 		if ((tb->tb_chain = virtio_chain_alloc(vif->vif_tx_vq,
476 		    KM_SLEEP)) == NULL) {
477 			goto fail;
478 		}
479 		virtio_chain_data_set(tb->tb_chain, tb);
480 
481 		tb->tb_dmaext_capacity = VIOIF_MAX_SEGS - 1;
482 		tb->tb_dmaext = kmem_zalloc(
483 		    sizeof (virtio_dma_t *) * tb->tb_dmaext_capacity,
484 		    KM_SLEEP);
485 	}
486 
487 	/*
488 	 * The receive buffers are larger, and we can tolerate a large number
489 	 * of segments.  Adjust the SGL entry count, setting aside one segment
490 	 * for the virtio net header.
491 	 */
492 	attr.dma_attr_sgllen = VIOIF_MAX_SEGS - 1;
493 
494 	for (vioif_rxbuf_t *rb = list_head(&vif->vif_rxbufs); rb != NULL;
495 	    rb = list_next(&vif->vif_rxbufs, rb)) {
496 		if ((rb->rb_dma = virtio_dma_alloc(vif->vif_virtio,
497 		    VIOIF_RX_BUF_SIZE, &attr, DDI_DMA_STREAMING | DDI_DMA_READ,
498 		    KM_SLEEP)) == NULL) {
499 			goto fail;
500 		}
501 
502 		if ((rb->rb_chain = virtio_chain_alloc(vif->vif_rx_vq,
503 		    KM_SLEEP)) == NULL) {
504 			goto fail;
505 		}
506 		virtio_chain_data_set(rb->rb_chain, rb);
507 
508 		/*
509 		 * Ensure that the first cookie is sufficient to cover the
510 		 * header skip region plus one byte.
511 		 */
512 		VERIFY3U(virtio_dma_cookie_size(rb->rb_dma, 0), >=,
513 		    VIOIF_HEADER_SKIP + 1);
514 
515 		/*
516 		 * Ensure that the frame data begins at a location with a
517 		 * correctly aligned IP header.
518 		 */
519 		VERIFY3U((uintptr_t)virtio_dma_va(rb->rb_dma,
520 		    VIOIF_HEADER_SKIP) % 4, ==, 2);
521 
522 		rb->rb_vioif = vif;
523 		rb->rb_frtn.free_func = vioif_rx_free_callback;
524 		rb->rb_frtn.free_arg = (caddr_t)rb;
525 	}
526 
527 	return (0);
528 
529 fail:
530 	vioif_free_bufs(vif);
531 	return (ENOMEM);
532 }
533 
534 static int
535 vioif_m_multicst(void *arg, boolean_t add, const uint8_t *mcst_addr)
536 {
537 	/*
538 	 * Even though we currently do not have support for programming
539 	 * multicast filters, or even enabling promiscuous mode, we return
540 	 * success here to avoid the networking stack falling back to link
541 	 * layer broadcast for multicast traffic.  Some hypervisors already
542 	 * pass received multicast frames onto the guest, so at least on those
543 	 * systems multicast will work as expected anyway.
544 	 */
545 	return (0);
546 }
547 
548 static int
549 vioif_m_setpromisc(void *arg, boolean_t on)
550 {
551 	/*
552 	 * Even though we cannot currently enable promiscuous mode, we return
553 	 * success here to allow tools like snoop(1M) to continue to function.
554 	 */
555 	return (0);
556 }
557 
558 static int
559 vioif_m_unicst(void *arg, const uint8_t *mac)
560 {
561 	return (ENOTSUP);
562 }
563 
564 static uint_t
565 vioif_add_rx(vioif_t *vif)
566 {
567 	VERIFY(MUTEX_HELD(&vif->vif_mutex));
568 
569 	if (vif->vif_runstate != VIOIF_RUNSTATE_RUNNING) {
570 		/*
571 		 * If the NIC is not running, do not give the device any
572 		 * receive buffers.
573 		 */
574 		return (0);
575 	}
576 
577 	uint_t num_added = 0;
578 
579 	vioif_rxbuf_t *rb;
580 	while ((rb = vioif_rxbuf_alloc(vif)) != NULL) {
581 		/*
582 		 * For legacy devices, and those that have not negotiated
583 		 * VIRTIO_F_ANY_LAYOUT, the virtio net header must appear in a
584 		 * separate descriptor entry to the rest of the buffer.
585 		 */
586 		if (virtio_chain_append(rb->rb_chain,
587 		    virtio_dma_cookie_pa(rb->rb_dma, 0),
588 		    sizeof (struct virtio_net_hdr),
589 		    VIRTIO_DIR_DEVICE_WRITES) != DDI_SUCCESS) {
590 			goto fail;
591 		}
592 
593 		for (uint_t n = 0; n < virtio_dma_ncookies(rb->rb_dma); n++) {
594 			uint64_t pa = virtio_dma_cookie_pa(rb->rb_dma, n);
595 			size_t sz = virtio_dma_cookie_size(rb->rb_dma, n);
596 
597 			if (n == 0) {
598 				pa += VIOIF_HEADER_SKIP;
599 				VERIFY3U(sz, >, VIOIF_HEADER_SKIP);
600 				sz -= VIOIF_HEADER_SKIP;
601 			}
602 
603 			if (virtio_chain_append(rb->rb_chain, pa, sz,
604 			    VIRTIO_DIR_DEVICE_WRITES) != DDI_SUCCESS) {
605 				goto fail;
606 			}
607 		}
608 
609 		virtio_chain_submit(rb->rb_chain, B_FALSE);
610 		num_added++;
611 		continue;
612 
613 fail:
614 		vioif_rxbuf_free(vif, rb);
615 		vif->vif_norecvbuf++;
616 		break;
617 	}
618 
619 	if (num_added > 0) {
620 		virtio_queue_flush(vif->vif_rx_vq);
621 	}
622 
623 	return (num_added);
624 }
625 
626 static uint_t
627 vioif_process_rx(vioif_t *vif)
628 {
629 	virtio_chain_t *vic;
630 	mblk_t *mphead = NULL, *lastmp = NULL, *mp;
631 	uint_t num_processed = 0;
632 
633 	VERIFY(MUTEX_HELD(&vif->vif_mutex));
634 
635 	while ((vic = virtio_queue_poll(vif->vif_rx_vq)) != NULL) {
636 		/*
637 		 * We have to use the chain received length here, as the device
638 		 * does not tell us the received frame length any other way.
639 		 * In a limited survey of hypervisors, virtio network devices
640 		 * appear to provide the right value here.
641 		 */
642 		size_t len = virtio_chain_received_length(vic);
643 		vioif_rxbuf_t *rb = virtio_chain_data(vic);
644 
645 		virtio_dma_sync(rb->rb_dma, DDI_DMA_SYNC_FORCPU);
646 
647 		/*
648 		 * If the NIC is not running, discard any received frames.
649 		 */
650 		if (vif->vif_runstate != VIOIF_RUNSTATE_RUNNING) {
651 			vioif_rxbuf_free(vif, rb);
652 			continue;
653 		}
654 
655 		if (len < sizeof (struct virtio_net_hdr)) {
656 			vif->vif_rxfail_chain_undersize++;
657 			vif->vif_ierrors++;
658 			vioif_rxbuf_free(vif, rb);
659 			continue;
660 		}
661 		len -= sizeof (struct virtio_net_hdr);
662 
663 		/*
664 		 * We copy small packets that happen to fit into a single
665 		 * cookie and reuse the buffers. For bigger ones, we loan
666 		 * the buffers upstream.
667 		 */
668 		if (len < vif->vif_rxcopy_thresh ||
669 		    vif->vif_nrxbufs_onloan >= vif->vif_nrxbufs_onloan_max) {
670 			mutex_exit(&vif->vif_mutex);
671 			if ((mp = allocb(len, 0)) == NULL) {
672 				mutex_enter(&vif->vif_mutex);
673 				vif->vif_norecvbuf++;
674 				vif->vif_ierrors++;
675 
676 				vioif_rxbuf_free(vif, rb);
677 				continue;
678 			}
679 
680 			bcopy(virtio_dma_va(rb->rb_dma, VIOIF_HEADER_SKIP),
681 			    mp->b_rptr, len);
682 			mp->b_wptr = mp->b_rptr + len;
683 
684 			/*
685 			 * As the packet contents was copied rather than
686 			 * loaned, we can return the receive buffer resources
687 			 * to the free list.
688 			 */
689 			mutex_enter(&vif->vif_mutex);
690 			vioif_rxbuf_free(vif, rb);
691 
692 		} else {
693 			mutex_exit(&vif->vif_mutex);
694 			if ((mp = desballoc(virtio_dma_va(rb->rb_dma,
695 			    VIOIF_HEADER_SKIP), len, 0,
696 			    &rb->rb_frtn)) == NULL) {
697 				mutex_enter(&vif->vif_mutex);
698 				vif->vif_norecvbuf++;
699 				vif->vif_ierrors++;
700 
701 				vioif_rxbuf_free(vif, rb);
702 				continue;
703 			}
704 			mp->b_wptr = mp->b_rptr + len;
705 
706 			mutex_enter(&vif->vif_mutex);
707 			vif->vif_nrxbufs_onloan++;
708 		}
709 
710 		/*
711 		 * virtio-net does not tell us if this packet is multicast
712 		 * or broadcast, so we have to check it.
713 		 */
714 		if (mp->b_rptr[0] & 0x1) {
715 			if (bcmp(mp->b_rptr, vioif_broadcast, ETHERADDRL) != 0)
716 				vif->vif_multircv++;
717 			else
718 				vif->vif_brdcstrcv++;
719 		}
720 
721 		vif->vif_rbytes += len;
722 		vif->vif_ipackets++;
723 
724 		if (lastmp == NULL) {
725 			mphead = mp;
726 		} else {
727 			lastmp->b_next = mp;
728 		}
729 		lastmp = mp;
730 		num_processed++;
731 	}
732 
733 	if (mphead != NULL) {
734 		if (vif->vif_runstate == VIOIF_RUNSTATE_RUNNING) {
735 			mutex_exit(&vif->vif_mutex);
736 			mac_rx(vif->vif_mac_handle, NULL, mphead);
737 			mutex_enter(&vif->vif_mutex);
738 		} else {
739 			/*
740 			 * The NIC was disabled part way through our execution,
741 			 * so free the messages we allocated.
742 			 */
743 			freemsgchain(mphead);
744 		}
745 	}
746 
747 	return (num_processed);
748 }
749 
750 static uint_t
751 vioif_reclaim_used_tx(vioif_t *vif)
752 {
753 	virtio_chain_t *vic;
754 	uint_t num_reclaimed = 0;
755 
756 	VERIFY(MUTEX_NOT_HELD(&vif->vif_mutex));
757 
758 	while ((vic = virtio_queue_poll(vif->vif_tx_vq)) != NULL) {
759 		vioif_txbuf_t *tb = virtio_chain_data(vic);
760 
761 		if (tb->tb_mp != NULL) {
762 			/*
763 			 * Unbind the external mapping.
764 			 */
765 			for (uint_t i = 0; i < tb->tb_dmaext_capacity; i++) {
766 				if (tb->tb_dmaext[i] == NULL) {
767 					continue;
768 				}
769 
770 				virtio_dma_unbind(tb->tb_dmaext[i]);
771 			}
772 
773 			freemsg(tb->tb_mp);
774 			tb->tb_mp = NULL;
775 		}
776 
777 		/*
778 		 * Return this transmit buffer to the free list for reuse.
779 		 */
780 		mutex_enter(&vif->vif_mutex);
781 		vioif_txbuf_free(vif, tb);
782 		mutex_exit(&vif->vif_mutex);
783 
784 		num_reclaimed++;
785 	}
786 
787 	/* Return ring to transmitting state if descriptors were reclaimed. */
788 	if (num_reclaimed > 0) {
789 		boolean_t do_update = B_FALSE;
790 
791 		mutex_enter(&vif->vif_mutex);
792 		vif->vif_stat_tx_reclaim += num_reclaimed;
793 		if (vif->vif_tx_corked) {
794 			/*
795 			 * TX was corked on a lack of available descriptors.
796 			 * That dire state has passed so the TX interrupt can
797 			 * be disabled and MAC can be notified that
798 			 * transmission is possible again.
799 			 */
800 			vif->vif_tx_corked = B_FALSE;
801 			virtio_queue_no_interrupt(vif->vif_tx_vq, B_TRUE);
802 			do_update = B_TRUE;
803 		}
804 
805 		if (do_update) {
806 			mac_tx_update(vif->vif_mac_handle);
807 		}
808 		mutex_exit(&vif->vif_mutex);
809 	}
810 
811 	return (num_reclaimed);
812 }
813 
814 static void
815 vioif_reclaim_periodic(void *arg)
816 {
817 	vioif_t *vif = arg;
818 	uint_t num_reclaimed;
819 
820 	num_reclaimed = vioif_reclaim_used_tx(vif);
821 
822 	mutex_enter(&vif->vif_mutex);
823 	vif->vif_tx_reclaim_tid = 0;
824 	/*
825 	 * If used descriptors were reclaimed or TX descriptors appear to be
826 	 * outstanding, the ring is considered active and periodic reclamation
827 	 * is necessary for now.
828 	 */
829 	if (num_reclaimed != 0 || virtio_queue_nactive(vif->vif_tx_vq) != 0) {
830 		/* Do not reschedule if the ring is being drained. */
831 		if (!vif->vif_tx_drain) {
832 			vioif_reclaim_restart(vif);
833 		}
834 	}
835 	mutex_exit(&vif->vif_mutex);
836 }
837 
838 static void
839 vioif_reclaim_restart(vioif_t *vif)
840 {
841 	VERIFY(MUTEX_HELD(&vif->vif_mutex));
842 	VERIFY(!vif->vif_tx_drain);
843 
844 	if (vif->vif_tx_reclaim_tid == 0) {
845 		vif->vif_tx_reclaim_tid = timeout(vioif_reclaim_periodic, vif,
846 		    MSEC_TO_TICK_ROUNDUP(vioif_reclaim_ms));
847 	}
848 }
849 
850 static void
851 vioif_tx_drain(vioif_t *vif)
852 {
853 	VERIFY(MUTEX_HELD(&vif->vif_mutex));
854 	VERIFY3S(vif->vif_runstate, ==, VIOIF_RUNSTATE_STOPPING);
855 
856 	vif->vif_tx_drain = B_TRUE;
857 	/* Put a stop to the periodic reclaim if it is running */
858 	if (vif->vif_tx_reclaim_tid != 0) {
859 		timeout_id_t tid = vif->vif_tx_reclaim_tid;
860 
861 		/*
862 		 * With vif_tx_drain set, there is no risk that a racing
863 		 * vioif_reclaim_periodic() call will reschedule itself.
864 		 *
865 		 * Being part of the mc_stop hook also guarantees that
866 		 * vioif_m_tx() will not be called to restart it.
867 		 */
868 		vif->vif_tx_reclaim_tid = 0;
869 		mutex_exit(&vif->vif_mutex);
870 		(void) untimeout(tid);
871 		mutex_enter(&vif->vif_mutex);
872 	}
873 	virtio_queue_no_interrupt(vif->vif_tx_vq, B_TRUE);
874 
875 	/*
876 	 * Wait for all of the TX descriptors to be processed by the host so
877 	 * they can be reclaimed.
878 	 */
879 	while (vif->vif_ntxbufs_alloc > 0) {
880 		mutex_exit(&vif->vif_mutex);
881 		(void) vioif_reclaim_used_tx(vif);
882 		delay(5);
883 		mutex_enter(&vif->vif_mutex);
884 	}
885 	VERIFY(!vif->vif_tx_corked);
886 	VERIFY3U(vif->vif_tx_reclaim_tid, ==, 0);
887 	VERIFY3U(virtio_queue_nactive(vif->vif_tx_vq), ==, 0);
888 }
889 
890 static int
891 vioif_tx_inline(vioif_t *vif, vioif_txbuf_t *tb, mblk_t *mp, size_t msg_size)
892 {
893 	VERIFY(MUTEX_NOT_HELD(&vif->vif_mutex));
894 
895 	VERIFY3U(msg_size, <=, virtio_dma_size(tb->tb_dma) - VIOIF_HEADER_SKIP);
896 
897 	/*
898 	 * Copy the message into the inline buffer and then free the message.
899 	 */
900 	mcopymsg(mp, virtio_dma_va(tb->tb_dma, VIOIF_HEADER_SKIP));
901 
902 	if (virtio_chain_append(tb->tb_chain,
903 	    virtio_dma_cookie_pa(tb->tb_dma, 0) + VIOIF_HEADER_SKIP,
904 	    msg_size, VIRTIO_DIR_DEVICE_READS) != DDI_SUCCESS) {
905 		return (DDI_FAILURE);
906 	}
907 
908 	return (DDI_SUCCESS);
909 }
910 
911 static int
912 vioif_tx_external(vioif_t *vif, vioif_txbuf_t *tb, mblk_t *mp, size_t msg_size)
913 {
914 	VERIFY(MUTEX_NOT_HELD(&vif->vif_mutex));
915 
916 	mblk_t *nmp = mp;
917 	tb->tb_ndmaext = 0;
918 
919 	while (nmp != NULL) {
920 		size_t len;
921 
922 		if ((len = MBLKL(nmp)) == 0) {
923 			/*
924 			 * Skip any zero-length entries in the chain.
925 			 */
926 			nmp = nmp->b_cont;
927 			continue;
928 		}
929 
930 		if (tb->tb_ndmaext >= tb->tb_dmaext_capacity) {
931 			mutex_enter(&vif->vif_mutex);
932 			vif->vif_txfail_indirect_limit++;
933 			vif->vif_notxbuf++;
934 			mutex_exit(&vif->vif_mutex);
935 			goto fail;
936 		}
937 
938 		if (tb->tb_dmaext[tb->tb_ndmaext] == NULL) {
939 			/*
940 			 * Allocate a DMA handle for this slot.
941 			 */
942 			if ((tb->tb_dmaext[tb->tb_ndmaext] =
943 			    virtio_dma_alloc_nomem(vif->vif_virtio,
944 			    &vioif_dma_attr_external, KM_SLEEP)) == NULL) {
945 				mutex_enter(&vif->vif_mutex);
946 				vif->vif_notxbuf++;
947 				mutex_exit(&vif->vif_mutex);
948 				goto fail;
949 			}
950 		}
951 		virtio_dma_t *extdma = tb->tb_dmaext[tb->tb_ndmaext++];
952 
953 		if (virtio_dma_bind(extdma, nmp->b_rptr, len,
954 		    DDI_DMA_WRITE | DDI_DMA_STREAMING, KM_SLEEP) !=
955 		    DDI_SUCCESS) {
956 			mutex_enter(&vif->vif_mutex);
957 			vif->vif_txfail_dma_bind++;
958 			mutex_exit(&vif->vif_mutex);
959 			goto fail;
960 		}
961 
962 		for (uint_t n = 0; n < virtio_dma_ncookies(extdma); n++) {
963 			uint64_t pa = virtio_dma_cookie_pa(extdma, n);
964 			size_t sz = virtio_dma_cookie_size(extdma, n);
965 
966 			if (virtio_chain_append(tb->tb_chain, pa, sz,
967 			    VIRTIO_DIR_DEVICE_READS) != DDI_SUCCESS) {
968 				mutex_enter(&vif->vif_mutex);
969 				vif->vif_txfail_indirect_limit++;
970 				vif->vif_notxbuf++;
971 				mutex_exit(&vif->vif_mutex);
972 				goto fail;
973 			}
974 		}
975 
976 		nmp = nmp->b_cont;
977 	}
978 
979 	/*
980 	 * We need to keep the message around until we reclaim the buffer from
981 	 * the device before freeing it.
982 	 */
983 	tb->tb_mp = mp;
984 
985 	return (DDI_SUCCESS);
986 
987 fail:
988 	for (uint_t n = 0; n < tb->tb_ndmaext; n++) {
989 		if (tb->tb_dmaext[n] != NULL) {
990 			virtio_dma_unbind(tb->tb_dmaext[n]);
991 		}
992 	}
993 	tb->tb_ndmaext = 0;
994 
995 	freemsg(mp);
996 
997 	return (DDI_FAILURE);
998 }
999 
1000 static boolean_t
1001 vioif_send(vioif_t *vif, mblk_t *mp)
1002 {
1003 	VERIFY(MUTEX_NOT_HELD(&vif->vif_mutex));
1004 
1005 	vioif_txbuf_t *tb = NULL;
1006 	struct virtio_net_hdr *vnh = NULL;
1007 	size_t msg_size = 0;
1008 	uint32_t csum_start;
1009 	uint32_t csum_stuff;
1010 	uint32_t csum_flags;
1011 	uint32_t lso_flags;
1012 	uint32_t lso_mss;
1013 	mblk_t *nmp;
1014 	int ret;
1015 	boolean_t lso_required = B_FALSE;
1016 	struct ether_header *ether = (void *)mp->b_rptr;
1017 
1018 	for (nmp = mp; nmp; nmp = nmp->b_cont)
1019 		msg_size += MBLKL(nmp);
1020 
1021 	if (vif->vif_tx_tso4) {
1022 		mac_lso_get(mp, &lso_mss, &lso_flags);
1023 		lso_required = (lso_flags & HW_LSO) != 0;
1024 	}
1025 
1026 	mutex_enter(&vif->vif_mutex);
1027 	if ((tb = vioif_txbuf_alloc(vif)) == NULL) {
1028 		vif->vif_notxbuf++;
1029 		goto fail;
1030 	}
1031 	mutex_exit(&vif->vif_mutex);
1032 
1033 	/*
1034 	 * Use the inline buffer for the virtio net header.  Zero the portion
1035 	 * of our DMA allocation prior to the packet data.
1036 	 */
1037 	vnh = virtio_dma_va(tb->tb_dma, 0);
1038 	bzero(vnh, VIOIF_HEADER_SKIP);
1039 
1040 	/*
1041 	 * For legacy devices, and those that have not negotiated
1042 	 * VIRTIO_F_ANY_LAYOUT, the virtio net header must appear in a separate
1043 	 * descriptor entry to the rest of the buffer.
1044 	 */
1045 	if (virtio_chain_append(tb->tb_chain,
1046 	    virtio_dma_cookie_pa(tb->tb_dma, 0), sizeof (struct virtio_net_hdr),
1047 	    VIRTIO_DIR_DEVICE_READS) != DDI_SUCCESS) {
1048 		mutex_enter(&vif->vif_mutex);
1049 		vif->vif_notxbuf++;
1050 		goto fail;
1051 	}
1052 
1053 	mac_hcksum_get(mp, &csum_start, &csum_stuff, NULL, NULL, &csum_flags);
1054 
1055 	/*
1056 	 * They want us to do the TCP/UDP csum calculation.
1057 	 */
1058 	if (csum_flags & HCK_PARTIALCKSUM) {
1059 		int eth_hsize;
1060 
1061 		/*
1062 		 * Did we ask for it?
1063 		 */
1064 		ASSERT(vif->vif_tx_csum);
1065 
1066 		/*
1067 		 * We only asked for partial csum packets.
1068 		 */
1069 		ASSERT(!(csum_flags & HCK_IPV4_HDRCKSUM));
1070 		ASSERT(!(csum_flags & HCK_FULLCKSUM));
1071 
1072 		if (ether->ether_type == htons(ETHERTYPE_VLAN)) {
1073 			eth_hsize = sizeof (struct ether_vlan_header);
1074 		} else {
1075 			eth_hsize = sizeof (struct ether_header);
1076 		}
1077 
1078 		vnh->vnh_flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
1079 		vnh->vnh_csum_start = eth_hsize + csum_start;
1080 		vnh->vnh_csum_offset = csum_stuff - csum_start;
1081 	}
1082 
1083 	/*
1084 	 * Setup LSO fields if required.
1085 	 */
1086 	if (lso_required) {
1087 		vnh->vnh_gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1088 		vnh->vnh_gso_size = (uint16_t)lso_mss;
1089 	}
1090 
1091 	/*
1092 	 * The device does not maintain its own statistics about broadcast or
1093 	 * multicast packets, so we have to check the destination address
1094 	 * ourselves.
1095 	 */
1096 	if ((ether->ether_dhost.ether_addr_octet[0] & 0x01) != 0) {
1097 		mutex_enter(&vif->vif_mutex);
1098 		if (ether_cmp(&ether->ether_dhost, vioif_broadcast) == 0) {
1099 			vif->vif_brdcstxmt++;
1100 		} else {
1101 			vif->vif_multixmt++;
1102 		}
1103 		mutex_exit(&vif->vif_mutex);
1104 	}
1105 
1106 	/*
1107 	 * For small packets, copy into the preallocated inline buffer rather
1108 	 * than incur the overhead of mapping.  Note that both of these
1109 	 * functions ensure that "mp" is freed before returning.
1110 	 */
1111 	if (msg_size < vif->vif_txcopy_thresh) {
1112 		ret = vioif_tx_inline(vif, tb, mp, msg_size);
1113 	} else {
1114 		ret = vioif_tx_external(vif, tb, mp, msg_size);
1115 	}
1116 	mp = NULL;
1117 
1118 	mutex_enter(&vif->vif_mutex);
1119 
1120 	if (ret != DDI_SUCCESS) {
1121 		goto fail;
1122 	}
1123 
1124 	vif->vif_opackets++;
1125 	vif->vif_obytes += msg_size;
1126 	mutex_exit(&vif->vif_mutex);
1127 
1128 	virtio_dma_sync(tb->tb_dma, DDI_DMA_SYNC_FORDEV);
1129 	virtio_chain_submit(tb->tb_chain, B_TRUE);
1130 
1131 	return (B_TRUE);
1132 
1133 fail:
1134 	vif->vif_oerrors++;
1135 	if (tb != NULL) {
1136 		vioif_txbuf_free(vif, tb);
1137 	}
1138 	mutex_exit(&vif->vif_mutex);
1139 
1140 	return (mp == NULL);
1141 }
1142 
1143 static mblk_t *
1144 vioif_m_tx(void *arg, mblk_t *mp)
1145 {
1146 	vioif_t *vif = arg;
1147 	mblk_t *nmp;
1148 
1149 	/*
1150 	 * Prior to attempting to send any more frames, do a reclaim to pick up
1151 	 * any descriptors which have been processed by the host.
1152 	 */
1153 	if (virtio_queue_nactive(vif->vif_tx_vq) != 0) {
1154 		(void) vioif_reclaim_used_tx(vif);
1155 	}
1156 
1157 	while (mp != NULL) {
1158 		nmp = mp->b_next;
1159 		mp->b_next = NULL;
1160 
1161 		if (!vioif_send(vif, mp)) {
1162 			/*
1163 			 * If there are no descriptors available, try to
1164 			 * reclaim some, allowing a retry of the send if some
1165 			 * are found.
1166 			 */
1167 			mp->b_next = nmp;
1168 			if (vioif_reclaim_used_tx(vif) != 0) {
1169 				continue;
1170 			}
1171 
1172 			/*
1173 			 * Otherwise, enable the TX ring interrupt so that as
1174 			 * soon as a descriptor becomes available, transmission
1175 			 * can begin again.  For safety, make sure the periodic
1176 			 * reclaim is running as well.
1177 			 */
1178 			mutex_enter(&vif->vif_mutex);
1179 			vif->vif_tx_corked = B_TRUE;
1180 			virtio_queue_no_interrupt(vif->vif_tx_vq, B_FALSE);
1181 			vioif_reclaim_restart(vif);
1182 			mutex_exit(&vif->vif_mutex);
1183 			return (mp);
1184 		}
1185 		mp = nmp;
1186 	}
1187 
1188 	/* Ensure the periodic reclaim has been started. */
1189 	mutex_enter(&vif->vif_mutex);
1190 	vioif_reclaim_restart(vif);
1191 	mutex_exit(&vif->vif_mutex);
1192 
1193 	return (NULL);
1194 }
1195 
1196 static int
1197 vioif_m_start(void *arg)
1198 {
1199 	vioif_t *vif = arg;
1200 
1201 	mutex_enter(&vif->vif_mutex);
1202 
1203 	VERIFY3S(vif->vif_runstate, ==, VIOIF_RUNSTATE_STOPPED);
1204 	vif->vif_runstate = VIOIF_RUNSTATE_RUNNING;
1205 
1206 	mac_link_update(vif->vif_mac_handle, LINK_STATE_UP);
1207 
1208 	virtio_queue_no_interrupt(vif->vif_rx_vq, B_FALSE);
1209 
1210 	/*
1211 	 * Starting interrupts on the TX virtqueue is unnecessary at this time.
1212 	 * Descriptor reclamation is handling during transmit, via a periodic
1213 	 * timer, and when resources are tight, via the then-enabled interrupt.
1214 	 */
1215 	vif->vif_tx_drain = B_FALSE;
1216 
1217 	/*
1218 	 * Add as many receive buffers as we can to the receive queue.  If we
1219 	 * cannot add any, it may be because we have stopped and started again
1220 	 * and the descriptors are all in the queue already.
1221 	 */
1222 	(void) vioif_add_rx(vif);
1223 
1224 	mutex_exit(&vif->vif_mutex);
1225 	return (DDI_SUCCESS);
1226 }
1227 
1228 static void
1229 vioif_m_stop(void *arg)
1230 {
1231 	vioif_t *vif = arg;
1232 
1233 	mutex_enter(&vif->vif_mutex);
1234 
1235 	VERIFY3S(vif->vif_runstate, ==, VIOIF_RUNSTATE_RUNNING);
1236 	vif->vif_runstate = VIOIF_RUNSTATE_STOPPING;
1237 
1238 	/* Ensure all TX descriptors have been processed and reclaimed */
1239 	vioif_tx_drain(vif);
1240 
1241 	virtio_queue_no_interrupt(vif->vif_rx_vq, B_TRUE);
1242 
1243 	vif->vif_runstate = VIOIF_RUNSTATE_STOPPED;
1244 	mutex_exit(&vif->vif_mutex);
1245 }
1246 
1247 static int
1248 vioif_m_stat(void *arg, uint_t stat, uint64_t *val)
1249 {
1250 	vioif_t *vif = arg;
1251 
1252 	switch (stat) {
1253 	case MAC_STAT_IERRORS:
1254 		*val = vif->vif_ierrors;
1255 		break;
1256 	case MAC_STAT_OERRORS:
1257 		*val = vif->vif_oerrors;
1258 		break;
1259 	case MAC_STAT_MULTIRCV:
1260 		*val = vif->vif_multircv;
1261 		break;
1262 	case MAC_STAT_BRDCSTRCV:
1263 		*val = vif->vif_brdcstrcv;
1264 		break;
1265 	case MAC_STAT_MULTIXMT:
1266 		*val = vif->vif_multixmt;
1267 		break;
1268 	case MAC_STAT_BRDCSTXMT:
1269 		*val = vif->vif_brdcstxmt;
1270 		break;
1271 	case MAC_STAT_IPACKETS:
1272 		*val = vif->vif_ipackets;
1273 		break;
1274 	case MAC_STAT_RBYTES:
1275 		*val = vif->vif_rbytes;
1276 		break;
1277 	case MAC_STAT_OPACKETS:
1278 		*val = vif->vif_opackets;
1279 		break;
1280 	case MAC_STAT_OBYTES:
1281 		*val = vif->vif_obytes;
1282 		break;
1283 	case MAC_STAT_NORCVBUF:
1284 		*val = vif->vif_norecvbuf;
1285 		break;
1286 	case MAC_STAT_NOXMTBUF:
1287 		*val = vif->vif_notxbuf;
1288 		break;
1289 	case MAC_STAT_IFSPEED:
1290 		/* always 1 Gbit */
1291 		*val = 1000000000ULL;
1292 		break;
1293 	case ETHER_STAT_LINK_DUPLEX:
1294 		/* virtual device, always full-duplex */
1295 		*val = LINK_DUPLEX_FULL;
1296 		break;
1297 
1298 	default:
1299 		return (ENOTSUP);
1300 	}
1301 
1302 	return (DDI_SUCCESS);
1303 }
1304 
1305 static int
1306 vioif_m_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
1307     uint_t pr_valsize, const void *pr_val)
1308 {
1309 	vioif_t *vif = arg;
1310 
1311 	switch (pr_num) {
1312 	case MAC_PROP_MTU: {
1313 		int r;
1314 		uint32_t mtu;
1315 		if (pr_valsize < sizeof (mtu)) {
1316 			return (EOVERFLOW);
1317 		}
1318 		bcopy(pr_val, &mtu, sizeof (mtu));
1319 
1320 		if (mtu < ETHERMIN || mtu > vif->vif_mtu_max) {
1321 			return (EINVAL);
1322 		}
1323 
1324 		mutex_enter(&vif->vif_mutex);
1325 		if ((r = mac_maxsdu_update(vif->vif_mac_handle, mtu)) == 0) {
1326 			vif->vif_mtu = mtu;
1327 		}
1328 		mutex_exit(&vif->vif_mutex);
1329 
1330 		return (r);
1331 	}
1332 
1333 	case MAC_PROP_PRIVATE: {
1334 		long max, result;
1335 		uint_t *resp;
1336 		char *endptr;
1337 
1338 		if (strcmp(pr_name, VIOIF_MACPROP_TXCOPY_THRESH) == 0) {
1339 			max = VIOIF_MACPROP_TXCOPY_THRESH_MAX;
1340 			resp = &vif->vif_txcopy_thresh;
1341 		} else if (strcmp(pr_name, VIOIF_MACPROP_RXCOPY_THRESH) == 0) {
1342 			max = VIOIF_MACPROP_RXCOPY_THRESH_MAX;
1343 			resp = &vif->vif_rxcopy_thresh;
1344 		} else {
1345 			return (ENOTSUP);
1346 		}
1347 
1348 		if (pr_val == NULL) {
1349 			return (EINVAL);
1350 		}
1351 
1352 		if (ddi_strtol(pr_val, &endptr, 10, &result) != 0 ||
1353 		    *endptr != '\0' || result < 0 || result > max) {
1354 			return (EINVAL);
1355 		}
1356 
1357 		mutex_enter(&vif->vif_mutex);
1358 		*resp = result;
1359 		mutex_exit(&vif->vif_mutex);
1360 
1361 		return (0);
1362 	}
1363 
1364 	default:
1365 		return (ENOTSUP);
1366 	}
1367 }
1368 
1369 static int
1370 vioif_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
1371     uint_t pr_valsize, void *pr_val)
1372 {
1373 	vioif_t *vif = arg;
1374 
1375 	switch (pr_num) {
1376 	case MAC_PROP_PRIVATE: {
1377 		uint_t value;
1378 
1379 		if (strcmp(pr_name, VIOIF_MACPROP_TXCOPY_THRESH) == 0) {
1380 			value = vif->vif_txcopy_thresh;
1381 		} else if (strcmp(pr_name, VIOIF_MACPROP_RXCOPY_THRESH) == 0) {
1382 			value = vif->vif_rxcopy_thresh;
1383 		} else {
1384 			return (ENOTSUP);
1385 		}
1386 
1387 		if (snprintf(pr_val, pr_valsize, "%u", value) >= pr_valsize) {
1388 			return (EOVERFLOW);
1389 		}
1390 
1391 		return (0);
1392 	}
1393 
1394 	default:
1395 		return (ENOTSUP);
1396 	}
1397 }
1398 
1399 static void
1400 vioif_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num,
1401     mac_prop_info_handle_t prh)
1402 {
1403 	vioif_t *vif = arg;
1404 	char valstr[64];
1405 	int value;
1406 
1407 	switch (pr_num) {
1408 	case MAC_PROP_MTU:
1409 		mac_prop_info_set_perm(prh, MAC_PROP_PERM_RW);
1410 		mac_prop_info_set_range_uint32(prh, ETHERMIN, vif->vif_mtu_max);
1411 		return;
1412 
1413 	case MAC_PROP_PRIVATE:
1414 		if (strcmp(pr_name, VIOIF_MACPROP_TXCOPY_THRESH) == 0) {
1415 			value = VIOIF_MACPROP_TXCOPY_THRESH_DEF;
1416 		} else if (strcmp(pr_name, VIOIF_MACPROP_RXCOPY_THRESH) == 0) {
1417 			value = VIOIF_MACPROP_RXCOPY_THRESH_DEF;
1418 		} else {
1419 			/*
1420 			 * We do not recognise this private property name.
1421 			 */
1422 			return;
1423 		}
1424 		mac_prop_info_set_perm(prh, MAC_PROP_PERM_RW);
1425 		(void) snprintf(valstr, sizeof (valstr), "%d", value);
1426 		mac_prop_info_set_default_str(prh, valstr);
1427 		return;
1428 
1429 	default:
1430 		return;
1431 	}
1432 }
1433 
1434 static boolean_t
1435 vioif_m_getcapab(void *arg, mac_capab_t cap, void *cap_data)
1436 {
1437 	vioif_t *vif = arg;
1438 
1439 	switch (cap) {
1440 	case MAC_CAPAB_HCKSUM: {
1441 		if (!vif->vif_tx_csum) {
1442 			return (B_FALSE);
1443 		}
1444 
1445 		*(uint32_t *)cap_data = HCKSUM_INET_PARTIAL;
1446 
1447 		return (B_TRUE);
1448 	}
1449 
1450 	case MAC_CAPAB_LSO: {
1451 		if (!vif->vif_tx_tso4) {
1452 			return (B_FALSE);
1453 		}
1454 
1455 		mac_capab_lso_t *lso = cap_data;
1456 		lso->lso_flags = LSO_TX_BASIC_TCP_IPV4;
1457 		lso->lso_basic_tcp_ipv4.lso_max = VIOIF_RX_DATA_SIZE;
1458 
1459 		return (B_TRUE);
1460 	}
1461 
1462 	default:
1463 		return (B_FALSE);
1464 	}
1465 }
1466 
1467 static boolean_t
1468 vioif_has_feature(vioif_t *vif, uint32_t feature)
1469 {
1470 	return (virtio_feature_present(vif->vif_virtio, feature));
1471 }
1472 
1473 /*
1474  * Read the primary MAC address from the device if one is provided.  If not,
1475  * generate a random locally administered MAC address and write it back to the
1476  * device.
1477  */
1478 static void
1479 vioif_get_mac(vioif_t *vif)
1480 {
1481 	VERIFY(MUTEX_HELD(&vif->vif_mutex));
1482 
1483 	if (vioif_has_feature(vif, VIRTIO_NET_F_MAC)) {
1484 		for (uint_t i = 0; i < ETHERADDRL; i++) {
1485 			vif->vif_mac[i] = virtio_dev_get8(vif->vif_virtio,
1486 			    VIRTIO_NET_CONFIG_MAC + i);
1487 		}
1488 		vif->vif_mac_from_host = 1;
1489 
1490 		return;
1491 	}
1492 
1493 	/* Get a few random bytes */
1494 	(void) random_get_pseudo_bytes(vif->vif_mac, ETHERADDRL);
1495 	/* Make sure it's a unicast MAC */
1496 	vif->vif_mac[0] &= ~1;
1497 	/* Set the "locally administered" bit */
1498 	vif->vif_mac[1] |= 2;
1499 
1500 	/*
1501 	 * Write the random MAC address back to the device.
1502 	 */
1503 	for (uint_t i = 0; i < ETHERADDRL; i++) {
1504 		virtio_dev_put8(vif->vif_virtio, VIRTIO_NET_CONFIG_MAC + i,
1505 		    vif->vif_mac[i]);
1506 	}
1507 	vif->vif_mac_from_host = 0;
1508 
1509 	dev_err(vif->vif_dip, CE_NOTE, "!Generated a random MAC address: "
1510 	    "%02x:%02x:%02x:%02x:%02x:%02x",
1511 	    (uint_t)vif->vif_mac[0], (uint_t)vif->vif_mac[1],
1512 	    (uint_t)vif->vif_mac[2], (uint_t)vif->vif_mac[3],
1513 	    (uint_t)vif->vif_mac[4], (uint_t)vif->vif_mac[5]);
1514 }
1515 
1516 /*
1517  * Virtqueue interrupt handlers
1518  */
1519 static uint_t
1520 vioif_rx_handler(caddr_t arg0, caddr_t arg1)
1521 {
1522 	vioif_t *vif = (vioif_t *)arg0;
1523 
1524 	mutex_enter(&vif->vif_mutex);
1525 	(void) vioif_process_rx(vif);
1526 
1527 	/*
1528 	 * Attempt to replenish the receive queue.  If we cannot add any
1529 	 * descriptors here, it may be because all of the recently received
1530 	 * packets were loaned up to the networking stack.
1531 	 */
1532 	(void) vioif_add_rx(vif);
1533 	mutex_exit(&vif->vif_mutex);
1534 
1535 	return (DDI_INTR_CLAIMED);
1536 }
1537 
1538 static uint_t
1539 vioif_tx_handler(caddr_t arg0, caddr_t arg1)
1540 {
1541 	vioif_t *vif = (vioif_t *)arg0;
1542 
1543 	/*
1544 	 * The TX interrupt could race with other reclamation activity, so
1545 	 * interpreting the return value is unimportant.
1546 	 */
1547 	(void) vioif_reclaim_used_tx(vif);
1548 
1549 	return (DDI_INTR_CLAIMED);
1550 }
1551 
1552 static void
1553 vioif_check_features(vioif_t *vif)
1554 {
1555 	VERIFY(MUTEX_HELD(&vif->vif_mutex));
1556 
1557 	vif->vif_tx_csum = 0;
1558 	vif->vif_tx_tso4 = 0;
1559 
1560 	if (vioif_has_feature(vif, VIRTIO_NET_F_CSUM)) {
1561 		/*
1562 		 * The host will accept packets with partial checksums from us.
1563 		 */
1564 		vif->vif_tx_csum = 1;
1565 
1566 		/*
1567 		 * The legacy GSO feature represents the combination of
1568 		 * HOST_TSO4, HOST_TSO6, and HOST_ECN.
1569 		 */
1570 		boolean_t gso = vioif_has_feature(vif, VIRTIO_NET_F_GSO);
1571 		boolean_t tso4 = vioif_has_feature(vif, VIRTIO_NET_F_HOST_TSO4);
1572 		boolean_t ecn = vioif_has_feature(vif, VIRTIO_NET_F_HOST_ECN);
1573 
1574 		/*
1575 		 * Explicit congestion notification (ECN) is configured
1576 		 * globally; see "tcp_ecn_permitted".  As we cannot currently
1577 		 * request that the stack disable ECN on a per interface basis,
1578 		 * we require the device to support the combination of
1579 		 * segmentation offload and ECN support.
1580 		 */
1581 		if (gso || (tso4 && ecn)) {
1582 			vif->vif_tx_tso4 = 1;
1583 		}
1584 	}
1585 }
1586 
1587 static int
1588 vioif_select_interrupt_types(void)
1589 {
1590 	id_t id;
1591 	smbios_system_t sys;
1592 	smbios_info_t info;
1593 
1594 	if (vioif_allowed_int_types != -1) {
1595 		/*
1596 		 * If this value was tuned via /etc/system or the debugger,
1597 		 * use the provided value directly.
1598 		 */
1599 		return (vioif_allowed_int_types);
1600 	}
1601 
1602 	if ((id = smbios_info_system(ksmbios, &sys)) == SMB_ERR ||
1603 	    smbios_info_common(ksmbios, id, &info) == SMB_ERR) {
1604 		/*
1605 		 * The system may not have valid SMBIOS data, so ignore a
1606 		 * failure here.
1607 		 */
1608 		return (0);
1609 	}
1610 
1611 	if (strcmp(info.smbi_manufacturer, "Google") == 0 &&
1612 	    strcmp(info.smbi_product, "Google Compute Engine") == 0) {
1613 		/*
1614 		 * An undiagnosed issue with the Google Compute Engine (GCE)
1615 		 * hypervisor exists.  In this environment, no RX interrupts
1616 		 * are received if MSI-X handlers are installed.  This does not
1617 		 * appear to be true for the Virtio SCSI driver.  Fixed
1618 		 * interrupts do appear to work, so we fall back for now:
1619 		 */
1620 		return (DDI_INTR_TYPE_FIXED);
1621 	}
1622 
1623 	return (0);
1624 }
1625 
1626 static int
1627 vioif_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
1628 {
1629 	int ret;
1630 	vioif_t *vif;
1631 	virtio_t *vio;
1632 	mac_register_t *macp = NULL;
1633 
1634 	if (cmd != DDI_ATTACH) {
1635 		return (DDI_FAILURE);
1636 	}
1637 
1638 	if ((vio = virtio_init(dip, VIRTIO_NET_WANTED_FEATURES, B_TRUE)) ==
1639 	    NULL) {
1640 		return (DDI_FAILURE);
1641 	}
1642 
1643 	vif = kmem_zalloc(sizeof (*vif), KM_SLEEP);
1644 	vif->vif_dip = dip;
1645 	vif->vif_virtio = vio;
1646 	vif->vif_runstate = VIOIF_RUNSTATE_STOPPED;
1647 	ddi_set_driver_private(dip, vif);
1648 
1649 	if ((vif->vif_rx_vq = virtio_queue_alloc(vio, VIRTIO_NET_VIRTQ_RX,
1650 	    "rx", vioif_rx_handler, vif, B_FALSE, VIOIF_MAX_SEGS)) == NULL ||
1651 	    (vif->vif_tx_vq = virtio_queue_alloc(vio, VIRTIO_NET_VIRTQ_TX,
1652 	    "tx", vioif_tx_handler, vif, B_FALSE, VIOIF_MAX_SEGS)) == NULL) {
1653 		goto fail;
1654 	}
1655 
1656 	if (virtio_init_complete(vio, vioif_select_interrupt_types()) !=
1657 	    DDI_SUCCESS) {
1658 		dev_err(dip, CE_WARN, "failed to complete Virtio init");
1659 		goto fail;
1660 	}
1661 
1662 	virtio_queue_no_interrupt(vif->vif_rx_vq, B_TRUE);
1663 	virtio_queue_no_interrupt(vif->vif_tx_vq, B_TRUE);
1664 
1665 	mutex_init(&vif->vif_mutex, NULL, MUTEX_DRIVER, virtio_intr_pri(vio));
1666 	mutex_enter(&vif->vif_mutex);
1667 
1668 	vioif_get_mac(vif);
1669 
1670 	vif->vif_rxcopy_thresh = VIOIF_MACPROP_RXCOPY_THRESH_DEF;
1671 	vif->vif_txcopy_thresh = VIOIF_MACPROP_TXCOPY_THRESH_DEF;
1672 
1673 	if (vioif_has_feature(vif, VIRTIO_NET_F_MTU)) {
1674 		vif->vif_mtu_max = virtio_dev_get16(vio, VIRTIO_NET_CONFIG_MTU);
1675 	} else {
1676 		vif->vif_mtu_max = ETHERMTU;
1677 	}
1678 
1679 	vif->vif_mtu = ETHERMTU;
1680 	if (vif->vif_mtu > vif->vif_mtu_max) {
1681 		vif->vif_mtu = vif->vif_mtu_max;
1682 	}
1683 
1684 	vioif_check_features(vif);
1685 
1686 	if (vioif_alloc_bufs(vif) != 0) {
1687 		mutex_exit(&vif->vif_mutex);
1688 		dev_err(dip, CE_WARN, "failed to allocate memory");
1689 		goto fail;
1690 	}
1691 
1692 	mutex_exit(&vif->vif_mutex);
1693 
1694 	if (virtio_interrupts_enable(vio) != DDI_SUCCESS) {
1695 		dev_err(dip, CE_WARN, "failed to enable interrupts");
1696 		goto fail;
1697 	}
1698 
1699 	if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
1700 		dev_err(dip, CE_WARN, "failed to allocate a mac_register");
1701 		goto fail;
1702 	}
1703 
1704 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1705 	macp->m_driver = vif;
1706 	macp->m_dip = dip;
1707 	macp->m_src_addr = vif->vif_mac;
1708 	macp->m_callbacks = &vioif_mac_callbacks;
1709 	macp->m_min_sdu = 0;
1710 	macp->m_max_sdu = vif->vif_mtu;
1711 	macp->m_margin = VLAN_TAGSZ;
1712 	macp->m_priv_props = vioif_priv_props;
1713 
1714 	if ((ret = mac_register(macp, &vif->vif_mac_handle)) != 0) {
1715 		dev_err(dip, CE_WARN, "mac_register() failed (%d)", ret);
1716 		goto fail;
1717 	}
1718 	mac_free(macp);
1719 
1720 	mac_link_update(vif->vif_mac_handle, LINK_STATE_UP);
1721 
1722 	return (DDI_SUCCESS);
1723 
1724 fail:
1725 	vioif_free_bufs(vif);
1726 	if (macp != NULL) {
1727 		mac_free(macp);
1728 	}
1729 	(void) virtio_fini(vio, B_TRUE);
1730 	kmem_free(vif, sizeof (*vif));
1731 	return (DDI_FAILURE);
1732 }
1733 
1734 static int
1735 vioif_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
1736 {
1737 	int r;
1738 	vioif_t *vif;
1739 
1740 	if (cmd != DDI_DETACH) {
1741 		return (DDI_FAILURE);
1742 	}
1743 
1744 	if ((vif = ddi_get_driver_private(dip)) == NULL) {
1745 		return (DDI_FAILURE);
1746 	}
1747 
1748 	mutex_enter(&vif->vif_mutex);
1749 	if (vif->vif_runstate != VIOIF_RUNSTATE_STOPPED) {
1750 		dev_err(dip, CE_WARN, "!NIC still running, cannot detach");
1751 		mutex_exit(&vif->vif_mutex);
1752 		return (DDI_FAILURE);
1753 	}
1754 
1755 	/*
1756 	 * There should be no outstanding transmit buffers once the NIC is
1757 	 * completely stopped.
1758 	 */
1759 	VERIFY3U(vif->vif_ntxbufs_alloc, ==, 0);
1760 
1761 	/*
1762 	 * Though we cannot claw back all of the receive buffers until we reset
1763 	 * the device, we must ensure all those loaned to MAC have been
1764 	 * returned before calling mac_unregister().
1765 	 */
1766 	if (vif->vif_nrxbufs_onloan > 0) {
1767 		dev_err(dip, CE_WARN, "!%u receive buffers still loaned, "
1768 		    "cannot detach", vif->vif_nrxbufs_onloan);
1769 		mutex_exit(&vif->vif_mutex);
1770 		return (DDI_FAILURE);
1771 	}
1772 
1773 	if ((r = mac_unregister(vif->vif_mac_handle)) != 0) {
1774 		dev_err(dip, CE_WARN, "!MAC unregister failed (%d)", r);
1775 		return (DDI_FAILURE);
1776 	}
1777 
1778 	/*
1779 	 * Shut down the device so that we can recover any previously
1780 	 * submitted receive buffers.
1781 	 */
1782 	virtio_shutdown(vif->vif_virtio);
1783 	for (;;) {
1784 		virtio_chain_t *vic;
1785 
1786 		if ((vic = virtio_queue_evacuate(vif->vif_rx_vq)) == NULL) {
1787 			break;
1788 		}
1789 
1790 		vioif_rxbuf_t *rb = virtio_chain_data(vic);
1791 		vioif_rxbuf_free(vif, rb);
1792 	}
1793 
1794 	/*
1795 	 * vioif_free_bufs() must be called before virtio_fini()
1796 	 * as it uses virtio_chain_free() which itself depends on some
1797 	 * virtio data structures still being around.
1798 	 */
1799 	vioif_free_bufs(vif);
1800 	(void) virtio_fini(vif->vif_virtio, B_FALSE);
1801 
1802 	mutex_exit(&vif->vif_mutex);
1803 	mutex_destroy(&vif->vif_mutex);
1804 
1805 	kmem_free(vif, sizeof (*vif));
1806 
1807 	return (DDI_SUCCESS);
1808 }
1809 
1810 static int
1811 vioif_quiesce(dev_info_t *dip)
1812 {
1813 	vioif_t *vif;
1814 
1815 	if ((vif = ddi_get_driver_private(dip)) == NULL)
1816 		return (DDI_FAILURE);
1817 
1818 	return (virtio_quiesce(vif->vif_virtio));
1819 }
1820 
1821 int
1822 _init(void)
1823 {
1824 	int ret;
1825 
1826 	mac_init_ops(&vioif_dev_ops, "vioif");
1827 
1828 	if ((ret = mod_install(&vioif_modlinkage)) != DDI_SUCCESS) {
1829 		mac_fini_ops(&vioif_dev_ops);
1830 	}
1831 
1832 	return (ret);
1833 }
1834 
1835 int
1836 _fini(void)
1837 {
1838 	int ret;
1839 
1840 	if ((ret = mod_remove(&vioif_modlinkage)) == DDI_SUCCESS) {
1841 		mac_fini_ops(&vioif_dev_ops);
1842 	}
1843 
1844 	return (ret);
1845 }
1846 
1847 int
1848 _info(struct modinfo *modinfop)
1849 {
1850 	return (mod_info(&vioif_modlinkage, modinfop));
1851 }
1852