xref: /freebsd/sys/dev/virtio/network/if_vtnet.c (revision d8b88105c2ccf7686552516877f541efb54fb6c8)
1 /*-
2  * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice unmodified, this list of conditions, and the following
10  *    disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 /* Driver for VirtIO network devices. */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/eventhandler.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/sockio.h>
37 #include <sys/mbuf.h>
38 #include <sys/malloc.h>
39 #include <sys/module.h>
40 #include <sys/socket.h>
41 #include <sys/sysctl.h>
42 #include <sys/random.h>
43 #include <sys/sglist.h>
44 #include <sys/lock.h>
45 #include <sys/mutex.h>
46 #include <sys/taskqueue.h>
47 #include <sys/smp.h>
48 #include <machine/smp.h>
49 
50 #include <vm/uma.h>
51 
52 #include <net/ethernet.h>
53 #include <net/if.h>
54 #include <net/if_var.h>
55 #include <net/if_arp.h>
56 #include <net/if_dl.h>
57 #include <net/if_types.h>
58 #include <net/if_media.h>
59 #include <net/if_vlan_var.h>
60 
61 #include <net/bpf.h>
62 
63 #include <netinet/in_systm.h>
64 #include <netinet/in.h>
65 #include <netinet/ip.h>
66 #include <netinet/ip6.h>
67 #include <netinet6/ip6_var.h>
68 #include <netinet/udp.h>
69 #include <netinet/tcp.h>
70 #include <netinet/sctp.h>
71 
72 #include <machine/bus.h>
73 #include <machine/resource.h>
74 #include <sys/bus.h>
75 #include <sys/rman.h>
76 
77 #include <dev/virtio/virtio.h>
78 #include <dev/virtio/virtqueue.h>
79 #include <dev/virtio/network/virtio_net.h>
80 #include <dev/virtio/network/if_vtnetvar.h>
81 
82 #include "virtio_if.h"
83 
84 #include "opt_inet.h"
85 #include "opt_inet6.h"
86 
87 static int	vtnet_modevent(module_t, int, void *);
88 
89 static int	vtnet_probe(device_t);
90 static int	vtnet_attach(device_t);
91 static int	vtnet_detach(device_t);
92 static int	vtnet_suspend(device_t);
93 static int	vtnet_resume(device_t);
94 static int	vtnet_shutdown(device_t);
95 static int	vtnet_attach_completed(device_t);
96 static int	vtnet_config_change(device_t);
97 
98 static void	vtnet_negotiate_features(struct vtnet_softc *);
99 static void	vtnet_setup_features(struct vtnet_softc *);
100 static int	vtnet_init_rxq(struct vtnet_softc *, int);
101 static int	vtnet_init_txq(struct vtnet_softc *, int);
102 static int	vtnet_alloc_rxtx_queues(struct vtnet_softc *);
103 static void	vtnet_free_rxtx_queues(struct vtnet_softc *);
104 static int	vtnet_alloc_rx_filters(struct vtnet_softc *);
105 static void	vtnet_free_rx_filters(struct vtnet_softc *);
106 static int	vtnet_alloc_virtqueues(struct vtnet_softc *);
107 static int	vtnet_setup_interface(struct vtnet_softc *);
108 static int	vtnet_change_mtu(struct vtnet_softc *, int);
109 static int	vtnet_ioctl(struct ifnet *, u_long, caddr_t);
110 
111 static int	vtnet_rxq_populate(struct vtnet_rxq *);
112 static void	vtnet_rxq_free_mbufs(struct vtnet_rxq *);
113 static struct mbuf *
114 		vtnet_rx_alloc_buf(struct vtnet_softc *, int , struct mbuf **);
115 static int	vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *,
116 		    struct mbuf *, int);
117 static int	vtnet_rxq_replace_buf(struct vtnet_rxq *, struct mbuf *, int);
118 static int	vtnet_rxq_enqueue_buf(struct vtnet_rxq *, struct mbuf *);
119 static int	vtnet_rxq_new_buf(struct vtnet_rxq *);
120 static int	vtnet_rxq_csum(struct vtnet_rxq *, struct mbuf *,
121 		     struct virtio_net_hdr *);
122 static void	vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *, int);
123 static void	vtnet_rxq_discard_buf(struct vtnet_rxq *, struct mbuf *);
124 static int	vtnet_rxq_merged_eof(struct vtnet_rxq *, struct mbuf *, int);
125 static void	vtnet_rxq_input(struct vtnet_rxq *, struct mbuf *,
126 		    struct virtio_net_hdr *);
127 static int	vtnet_rxq_eof(struct vtnet_rxq *);
128 static void	vtnet_rx_vq_intr(void *);
129 static void	vtnet_rxq_tq_intr(void *, int);
130 
131 static void	vtnet_txq_free_mbufs(struct vtnet_txq *);
132 static int	vtnet_txq_offload_ctx(struct vtnet_txq *, struct mbuf *,
133 		    int *, int *, int *);
134 static int	vtnet_txq_offload_tso(struct vtnet_txq *, struct mbuf *, int,
135 		    int, struct virtio_net_hdr *);
136 static struct mbuf *
137 		vtnet_txq_offload(struct vtnet_txq *, struct mbuf *,
138 		    struct virtio_net_hdr *);
139 static int	vtnet_txq_enqueue_buf(struct vtnet_txq *, struct mbuf **,
140 		    struct vtnet_tx_header *);
141 static int	vtnet_txq_encap(struct vtnet_txq *, struct mbuf **);
142 #ifdef VTNET_LEGACY_TX
143 static void	vtnet_start_locked(struct vtnet_txq *, struct ifnet *);
144 static void	vtnet_start(struct ifnet *);
145 #else
146 static int	vtnet_txq_mq_start_locked(struct vtnet_txq *, struct mbuf *);
147 static int	vtnet_txq_mq_start(struct ifnet *, struct mbuf *);
148 static void	vtnet_txq_tq_deferred(void *, int);
149 #endif
150 static void	vtnet_txq_start(struct vtnet_txq *);
151 static void	vtnet_txq_tq_intr(void *, int);
152 static void	vtnet_txq_eof(struct vtnet_txq *);
153 static void	vtnet_tx_vq_intr(void *);
154 static void	vtnet_tx_start_all(struct vtnet_softc *);
155 
156 #ifndef VTNET_LEGACY_TX
157 static void	vtnet_qflush(struct ifnet *);
158 #endif
159 
160 static int	vtnet_watchdog(struct vtnet_txq *);
161 static void	vtnet_rxq_accum_stats(struct vtnet_rxq *,
162 		    struct vtnet_rxq_stats *);
163 static void	vtnet_txq_accum_stats(struct vtnet_txq *,
164 		    struct vtnet_txq_stats *);
165 static void	vtnet_accumulate_stats(struct vtnet_softc *);
166 static void	vtnet_tick(void *);
167 
168 static void	vtnet_start_taskqueues(struct vtnet_softc *);
169 static void	vtnet_free_taskqueues(struct vtnet_softc *);
170 static void	vtnet_drain_taskqueues(struct vtnet_softc *);
171 
172 static void	vtnet_drain_rxtx_queues(struct vtnet_softc *);
173 static void	vtnet_stop_rendezvous(struct vtnet_softc *);
174 static void	vtnet_stop(struct vtnet_softc *);
175 static int	vtnet_virtio_reinit(struct vtnet_softc *);
176 static void	vtnet_init_rx_filters(struct vtnet_softc *);
177 static int	vtnet_init_rx_queues(struct vtnet_softc *);
178 static int	vtnet_init_tx_queues(struct vtnet_softc *);
179 static int	vtnet_init_rxtx_queues(struct vtnet_softc *);
180 static void	vtnet_set_active_vq_pairs(struct vtnet_softc *);
181 static int	vtnet_reinit(struct vtnet_softc *);
182 static void	vtnet_init_locked(struct vtnet_softc *);
183 static void	vtnet_init(void *);
184 
185 static void	vtnet_free_ctrl_vq(struct vtnet_softc *);
186 static void	vtnet_exec_ctrl_cmd(struct vtnet_softc *, void *,
187 		    struct sglist *, int, int);
188 static int	vtnet_ctrl_mac_cmd(struct vtnet_softc *, uint8_t *);
189 static int	vtnet_ctrl_mq_cmd(struct vtnet_softc *, uint16_t);
190 static int	vtnet_ctrl_rx_cmd(struct vtnet_softc *, int, int);
191 static int	vtnet_set_promisc(struct vtnet_softc *, int);
192 static int	vtnet_set_allmulti(struct vtnet_softc *, int);
193 static void	vtnet_attach_disable_promisc(struct vtnet_softc *);
194 static void	vtnet_rx_filter(struct vtnet_softc *);
195 static void	vtnet_rx_filter_mac(struct vtnet_softc *);
196 static int	vtnet_exec_vlan_filter(struct vtnet_softc *, int, uint16_t);
197 static void	vtnet_rx_filter_vlan(struct vtnet_softc *);
198 static void	vtnet_update_vlan_filter(struct vtnet_softc *, int, uint16_t);
199 static void	vtnet_register_vlan(void *, struct ifnet *, uint16_t);
200 static void	vtnet_unregister_vlan(void *, struct ifnet *, uint16_t);
201 
202 static int	vtnet_is_link_up(struct vtnet_softc *);
203 static void	vtnet_update_link_status(struct vtnet_softc *);
204 static int	vtnet_ifmedia_upd(struct ifnet *);
205 static void	vtnet_ifmedia_sts(struct ifnet *, struct ifmediareq *);
206 static void	vtnet_get_hwaddr(struct vtnet_softc *);
207 static void	vtnet_set_hwaddr(struct vtnet_softc *);
208 static void	vtnet_vlan_tag_remove(struct mbuf *);
209 
210 static void	vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *,
211 		    struct sysctl_oid_list *, struct vtnet_rxq *);
212 static void	vtnet_setup_txq_sysctl(struct sysctl_ctx_list *,
213 		    struct sysctl_oid_list *, struct vtnet_txq *);
214 static void	vtnet_setup_queue_sysctl(struct vtnet_softc *);
215 static void	vtnet_setup_sysctl(struct vtnet_softc *);
216 
217 static int	vtnet_rxq_enable_intr(struct vtnet_rxq *);
218 static void	vtnet_rxq_disable_intr(struct vtnet_rxq *);
219 static int	vtnet_txq_enable_intr(struct vtnet_txq *);
220 static void	vtnet_txq_disable_intr(struct vtnet_txq *);
221 static void	vtnet_enable_rx_interrupts(struct vtnet_softc *);
222 static void	vtnet_enable_tx_interrupts(struct vtnet_softc *);
223 static void	vtnet_enable_interrupts(struct vtnet_softc *);
224 static void	vtnet_disable_rx_interrupts(struct vtnet_softc *);
225 static void	vtnet_disable_tx_interrupts(struct vtnet_softc *);
226 static void	vtnet_disable_interrupts(struct vtnet_softc *);
227 
228 static int	vtnet_tunable_int(struct vtnet_softc *, const char *, int);
229 
230 /* Tunables. */
231 static int vtnet_csum_disable = 0;
232 TUNABLE_INT("hw.vtnet.csum_disable", &vtnet_csum_disable);
233 static int vtnet_tso_disable = 0;
234 TUNABLE_INT("hw.vtnet.tso_disable", &vtnet_tso_disable);
235 static int vtnet_lro_disable = 0;
236 TUNABLE_INT("hw.vtnet.lro_disable", &vtnet_lro_disable);
237 static int vtnet_mq_disable = 0;
238 TUNABLE_INT("hw.vtnet.mq_disable", &vtnet_mq_disable);
239 static int vtnet_mq_max_pairs = 0;
240 TUNABLE_INT("hw.vtnet.mq_max_pairs", &vtnet_mq_max_pairs);
241 static int vtnet_rx_process_limit = 512;
242 TUNABLE_INT("hw.vtnet.rx_process_limit", &vtnet_rx_process_limit);
243 
244 /*
245  * Reducing the number of transmit completed interrupts can improve
246  * performance. To do so, the define below keeps the Tx vq interrupt
247  * disabled and adds calls to vtnet_txeof() in the start and watchdog
248  * paths. The price to pay for this is the m_free'ing of transmitted
249  * mbufs may be delayed until the watchdog fires.
250  *
251  * BMV: Reintroduce this later as a run-time option, if it makes
252  * sense after the EVENT_IDX feature is supported.
253  *
254  * #define VTNET_TX_INTR_MODERATION
255  */
256 
257 static uma_zone_t vtnet_tx_header_zone;
258 
259 static struct virtio_feature_desc vtnet_feature_desc[] = {
260 	{ VIRTIO_NET_F_CSUM,		"TxChecksum"	},
261 	{ VIRTIO_NET_F_GUEST_CSUM,	"RxChecksum"	},
262 	{ VIRTIO_NET_F_MAC,		"MacAddress"	},
263 	{ VIRTIO_NET_F_GSO,		"TxAllGSO"	},
264 	{ VIRTIO_NET_F_GUEST_TSO4,	"RxTSOv4"	},
265 	{ VIRTIO_NET_F_GUEST_TSO6,	"RxTSOv6"	},
266 	{ VIRTIO_NET_F_GUEST_ECN,	"RxECN"		},
267 	{ VIRTIO_NET_F_GUEST_UFO,	"RxUFO"		},
268 	{ VIRTIO_NET_F_HOST_TSO4,	"TxTSOv4"	},
269 	{ VIRTIO_NET_F_HOST_TSO6,	"TxTSOv6"	},
270 	{ VIRTIO_NET_F_HOST_ECN,	"TxTSOECN"	},
271 	{ VIRTIO_NET_F_HOST_UFO,	"TxUFO"		},
272 	{ VIRTIO_NET_F_MRG_RXBUF,	"MrgRxBuf"	},
273 	{ VIRTIO_NET_F_STATUS,		"Status"	},
274 	{ VIRTIO_NET_F_CTRL_VQ,		"ControlVq"	},
275 	{ VIRTIO_NET_F_CTRL_RX,		"RxMode"	},
276 	{ VIRTIO_NET_F_CTRL_VLAN,	"VLanFilter"	},
277 	{ VIRTIO_NET_F_CTRL_RX_EXTRA,	"RxModeExtra"	},
278 	{ VIRTIO_NET_F_GUEST_ANNOUNCE,	"GuestAnnounce"	},
279 	{ VIRTIO_NET_F_MQ,		"Multiqueue"	},
280 	{ VIRTIO_NET_F_CTRL_MAC_ADDR,	"SetMacAddress"	},
281 
282 	{ 0, NULL }
283 };
284 
285 static device_method_t vtnet_methods[] = {
286 	/* Device methods. */
287 	DEVMETHOD(device_probe,			vtnet_probe),
288 	DEVMETHOD(device_attach,		vtnet_attach),
289 	DEVMETHOD(device_detach,		vtnet_detach),
290 	DEVMETHOD(device_suspend,		vtnet_suspend),
291 	DEVMETHOD(device_resume,		vtnet_resume),
292 	DEVMETHOD(device_shutdown,		vtnet_shutdown),
293 
294 	/* VirtIO methods. */
295 	DEVMETHOD(virtio_attach_completed,	vtnet_attach_completed),
296 	DEVMETHOD(virtio_config_change,		vtnet_config_change),
297 
298 	DEVMETHOD_END
299 };
300 
301 static driver_t vtnet_driver = {
302 	"vtnet",
303 	vtnet_methods,
304 	sizeof(struct vtnet_softc)
305 };
306 static devclass_t vtnet_devclass;
307 
308 DRIVER_MODULE(vtnet, virtio_pci, vtnet_driver, vtnet_devclass,
309     vtnet_modevent, 0);
310 MODULE_VERSION(vtnet, 1);
311 MODULE_DEPEND(vtnet, virtio, 1, 1, 1);
312 
313 static int
314 vtnet_modevent(module_t mod, int type, void *unused)
315 {
316 	int error;
317 
318 	error = 0;
319 
320 	switch (type) {
321 	case MOD_LOAD:
322 		vtnet_tx_header_zone = uma_zcreate("vtnet_tx_hdr",
323 		    sizeof(struct vtnet_tx_header),
324 		    NULL, NULL, NULL, NULL, 0, 0);
325 		break;
326 	case MOD_QUIESCE:
327 	case MOD_UNLOAD:
328 		if (uma_zone_get_cur(vtnet_tx_header_zone) > 0)
329 			error = EBUSY;
330 		else if (type == MOD_UNLOAD) {
331 			uma_zdestroy(vtnet_tx_header_zone);
332 			vtnet_tx_header_zone = NULL;
333 		}
334 		break;
335 	case MOD_SHUTDOWN:
336 		break;
337 	default:
338 		error = EOPNOTSUPP;
339 		break;
340 	}
341 
342 	return (error);
343 }
344 
345 static int
346 vtnet_probe(device_t dev)
347 {
348 
349 	if (virtio_get_device_type(dev) != VIRTIO_ID_NETWORK)
350 		return (ENXIO);
351 
352 	device_set_desc(dev, "VirtIO Networking Adapter");
353 
354 	return (BUS_PROBE_DEFAULT);
355 }
356 
357 static int
358 vtnet_attach(device_t dev)
359 {
360 	struct vtnet_softc *sc;
361 	int error;
362 
363 	sc = device_get_softc(dev);
364 	sc->vtnet_dev = dev;
365 
366 	/* Register our feature descriptions. */
367 	virtio_set_feature_desc(dev, vtnet_feature_desc);
368 
369 	VTNET_CORE_LOCK_INIT(sc);
370 	callout_init_mtx(&sc->vtnet_tick_ch, VTNET_CORE_MTX(sc), 0);
371 
372 	vtnet_setup_sysctl(sc);
373 	vtnet_setup_features(sc);
374 
375 	error = vtnet_alloc_rx_filters(sc);
376 	if (error) {
377 		device_printf(dev, "cannot allocate Rx filters\n");
378 		goto fail;
379 	}
380 
381 	error = vtnet_alloc_rxtx_queues(sc);
382 	if (error) {
383 		device_printf(dev, "cannot allocate queues\n");
384 		goto fail;
385 	}
386 
387 	error = vtnet_alloc_virtqueues(sc);
388 	if (error) {
389 		device_printf(dev, "cannot allocate virtqueues\n");
390 		goto fail;
391 	}
392 
393 	error = vtnet_setup_interface(sc);
394 	if (error) {
395 		device_printf(dev, "cannot setup interface\n");
396 		goto fail;
397 	}
398 
399 	error = virtio_setup_intr(dev, INTR_TYPE_NET);
400 	if (error) {
401 		device_printf(dev, "cannot setup virtqueue interrupts\n");
402 		/* BMV: This will crash if during boot! */
403 		ether_ifdetach(sc->vtnet_ifp);
404 		goto fail;
405 	}
406 
407 	vtnet_start_taskqueues(sc);
408 
409 fail:
410 	if (error)
411 		vtnet_detach(dev);
412 
413 	return (error);
414 }
415 
416 static int
417 vtnet_detach(device_t dev)
418 {
419 	struct vtnet_softc *sc;
420 	struct ifnet *ifp;
421 
422 	sc = device_get_softc(dev);
423 	ifp = sc->vtnet_ifp;
424 
425 	if (device_is_attached(dev)) {
426 		VTNET_CORE_LOCK(sc);
427 		vtnet_stop(sc);
428 		VTNET_CORE_UNLOCK(sc);
429 
430 		callout_drain(&sc->vtnet_tick_ch);
431 		vtnet_drain_taskqueues(sc);
432 
433 		ether_ifdetach(ifp);
434 	}
435 
436 	vtnet_free_taskqueues(sc);
437 
438 	if (sc->vtnet_vlan_attach != NULL) {
439 		EVENTHANDLER_DEREGISTER(vlan_config, sc->vtnet_vlan_attach);
440 		sc->vtnet_vlan_attach = NULL;
441 	}
442 	if (sc->vtnet_vlan_detach != NULL) {
443 		EVENTHANDLER_DEREGISTER(vlan_unconfg, sc->vtnet_vlan_detach);
444 		sc->vtnet_vlan_detach = NULL;
445 	}
446 
447 	ifmedia_removeall(&sc->vtnet_media);
448 
449 	if (ifp != NULL) {
450 		if_free(ifp);
451 		sc->vtnet_ifp = NULL;
452 	}
453 
454 	vtnet_free_rxtx_queues(sc);
455 	vtnet_free_rx_filters(sc);
456 
457 	if (sc->vtnet_ctrl_vq != NULL)
458 		vtnet_free_ctrl_vq(sc);
459 
460 	VTNET_CORE_LOCK_DESTROY(sc);
461 
462 	return (0);
463 }
464 
465 static int
466 vtnet_suspend(device_t dev)
467 {
468 	struct vtnet_softc *sc;
469 
470 	sc = device_get_softc(dev);
471 
472 	VTNET_CORE_LOCK(sc);
473 	vtnet_stop(sc);
474 	sc->vtnet_flags |= VTNET_FLAG_SUSPENDED;
475 	VTNET_CORE_UNLOCK(sc);
476 
477 	return (0);
478 }
479 
480 static int
481 vtnet_resume(device_t dev)
482 {
483 	struct vtnet_softc *sc;
484 	struct ifnet *ifp;
485 
486 	sc = device_get_softc(dev);
487 	ifp = sc->vtnet_ifp;
488 
489 	VTNET_CORE_LOCK(sc);
490 	if (ifp->if_flags & IFF_UP)
491 		vtnet_init_locked(sc);
492 	sc->vtnet_flags &= ~VTNET_FLAG_SUSPENDED;
493 	VTNET_CORE_UNLOCK(sc);
494 
495 	return (0);
496 }
497 
498 static int
499 vtnet_shutdown(device_t dev)
500 {
501 
502 	/*
503 	 * Suspend already does all of what we need to
504 	 * do here; we just never expect to be resumed.
505 	 */
506 	return (vtnet_suspend(dev));
507 }
508 
509 static int
510 vtnet_attach_completed(device_t dev)
511 {
512 
513 	vtnet_attach_disable_promisc(device_get_softc(dev));
514 
515 	return (0);
516 }
517 
518 static int
519 vtnet_config_change(device_t dev)
520 {
521 	struct vtnet_softc *sc;
522 
523 	sc = device_get_softc(dev);
524 
525 	VTNET_CORE_LOCK(sc);
526 	vtnet_update_link_status(sc);
527 	if (sc->vtnet_link_active != 0)
528 		vtnet_tx_start_all(sc);
529 	VTNET_CORE_UNLOCK(sc);
530 
531 	return (0);
532 }
533 
534 static void
535 vtnet_negotiate_features(struct vtnet_softc *sc)
536 {
537 	device_t dev;
538 	uint64_t mask, features;
539 
540 	dev = sc->vtnet_dev;
541 	mask = 0;
542 
543 	/*
544 	 * TSO and LRO are only available when their corresponding checksum
545 	 * offload feature is also negotiated.
546 	 */
547 	if (vtnet_tunable_int(sc, "csum_disable", vtnet_csum_disable)) {
548 		mask |= VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM;
549 		mask |= VTNET_TSO_FEATURES | VTNET_LRO_FEATURES;
550 	}
551 	if (vtnet_tunable_int(sc, "tso_disable", vtnet_tso_disable))
552 		mask |= VTNET_TSO_FEATURES;
553 	if (vtnet_tunable_int(sc, "lro_disable", vtnet_lro_disable))
554 		mask |= VTNET_LRO_FEATURES;
555 	if (vtnet_tunable_int(sc, "mq_disable", vtnet_mq_disable))
556 		mask |= VIRTIO_NET_F_MQ;
557 #ifdef VTNET_LEGACY_TX
558 	mask |= VIRTIO_NET_F_MQ;
559 #endif
560 
561 	features = VTNET_FEATURES & ~mask;
562 	sc->vtnet_features = virtio_negotiate_features(dev, features);
563 
564 	if (virtio_with_feature(dev, VTNET_LRO_FEATURES) == 0)
565 		return;
566 	if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF))
567 		return;
568 
569 	/*
570 	 * LRO without mergeable buffers requires special care. This is not
571 	 * ideal because every receive buffer must be large enough to hold
572 	 * the maximum TCP packet, the Ethernet header, and the header. This
573 	 * requires up to 34 descriptors with MCLBYTES clusters. If we do
574 	 * not have indirect descriptors, LRO is disabled since the virtqueue
575 	 * will not contain very many receive buffers.
576 	 */
577 	if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC) == 0) {
578 		device_printf(dev,
579 		    "LRO disabled due to both mergeable buffers and indirect "
580 		    "descriptors not negotiated\n");
581 
582 		features &= ~VTNET_LRO_FEATURES;
583 		sc->vtnet_features = virtio_negotiate_features(dev, features);
584 	} else
585 		sc->vtnet_flags |= VTNET_FLAG_LRO_NOMRG;
586 }
587 
588 static void
589 vtnet_setup_features(struct vtnet_softc *sc)
590 {
591 	device_t dev;
592 	int max_pairs, max;
593 
594 	dev = sc->vtnet_dev;
595 
596 	vtnet_negotiate_features(sc);
597 
598 	if (virtio_with_feature(dev, VIRTIO_RING_F_EVENT_IDX))
599 		sc->vtnet_flags |= VTNET_FLAG_EVENT_IDX;
600 
601 	if (virtio_with_feature(dev, VIRTIO_NET_F_MAC)) {
602 		/* This feature should always be negotiated. */
603 		sc->vtnet_flags |= VTNET_FLAG_MAC;
604 	}
605 
606 	if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) {
607 		sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS;
608 		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
609 	} else
610 		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
611 
612 	if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VQ)) {
613 		sc->vtnet_flags |= VTNET_FLAG_CTRL_VQ;
614 
615 		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX))
616 			sc->vtnet_flags |= VTNET_FLAG_CTRL_RX;
617 		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VLAN))
618 			sc->vtnet_flags |= VTNET_FLAG_VLAN_FILTER;
619 		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_MAC_ADDR))
620 			sc->vtnet_flags |= VTNET_FLAG_CTRL_MAC;
621 	}
622 
623 	if (virtio_with_feature(dev, VIRTIO_NET_F_MQ) &&
624 	    sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
625 		max_pairs = virtio_read_dev_config_2(dev,
626 		    offsetof(struct virtio_net_config, max_virtqueue_pairs));
627 		if (max_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
628 		    max_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX)
629 			max_pairs = 1;
630 	} else
631 		max_pairs = 1;
632 
633 	if (max_pairs > 1) {
634 		/*
635 		 * Limit the maximum number of queue pairs to the number of
636 		 * CPUs or the configured maximum. The actual number of
637 		 * queues that get used may be less.
638 		 */
639 		max = vtnet_tunable_int(sc, "mq_max_pairs", vtnet_mq_max_pairs);
640 		if (max > 0 && max_pairs > max)
641 			max_pairs = max;
642 		if (max_pairs > mp_ncpus)
643 			max_pairs = mp_ncpus;
644 		if (max_pairs > VTNET_MAX_QUEUE_PAIRS)
645 			max_pairs = VTNET_MAX_QUEUE_PAIRS;
646 		if (max_pairs > 1)
647 			sc->vtnet_flags |= VTNET_FLAG_MULTIQ;
648 	}
649 
650 	sc->vtnet_max_vq_pairs = max_pairs;
651 }
652 
653 static int
654 vtnet_init_rxq(struct vtnet_softc *sc, int id)
655 {
656 	struct vtnet_rxq *rxq;
657 
658 	rxq = &sc->vtnet_rxqs[id];
659 
660 	snprintf(rxq->vtnrx_name, sizeof(rxq->vtnrx_name), "%s-rx%d",
661 	    device_get_nameunit(sc->vtnet_dev), id);
662 	mtx_init(&rxq->vtnrx_mtx, rxq->vtnrx_name, NULL, MTX_DEF);
663 
664 	rxq->vtnrx_sc = sc;
665 	rxq->vtnrx_id = id;
666 
667 	TASK_INIT(&rxq->vtnrx_intrtask, 0, vtnet_rxq_tq_intr, rxq);
668 	rxq->vtnrx_tq = taskqueue_create(rxq->vtnrx_name, M_NOWAIT,
669 	    taskqueue_thread_enqueue, &rxq->vtnrx_tq);
670 
671 	return (rxq->vtnrx_tq == NULL ? ENOMEM : 0);
672 }
673 
674 static int
675 vtnet_init_txq(struct vtnet_softc *sc, int id)
676 {
677 	struct vtnet_txq *txq;
678 
679 	txq = &sc->vtnet_txqs[id];
680 
681 	snprintf(txq->vtntx_name, sizeof(txq->vtntx_name), "%s-tx%d",
682 	    device_get_nameunit(sc->vtnet_dev), id);
683 	mtx_init(&txq->vtntx_mtx, txq->vtntx_name, NULL, MTX_DEF);
684 
685 	txq->vtntx_sc = sc;
686 	txq->vtntx_id = id;
687 
688 #ifndef VTNET_LEGACY_TX
689 	txq->vtntx_br = buf_ring_alloc(VTNET_DEFAULT_BUFRING_SIZE, M_DEVBUF,
690 	    M_NOWAIT, &txq->vtntx_mtx);
691 	if (txq->vtntx_br == NULL)
692 		return (ENOMEM);
693 
694 	TASK_INIT(&txq->vtntx_defrtask, 0, vtnet_txq_tq_deferred, txq);
695 #endif
696 	TASK_INIT(&txq->vtntx_intrtask, 0, vtnet_txq_tq_intr, txq);
697 	txq->vtntx_tq = taskqueue_create(txq->vtntx_name, M_NOWAIT,
698 	    taskqueue_thread_enqueue, &txq->vtntx_tq);
699 	if (txq->vtntx_tq == NULL)
700 		return (ENOMEM);
701 
702 	return (0);
703 }
704 
705 static int
706 vtnet_alloc_rxtx_queues(struct vtnet_softc *sc)
707 {
708 	int i, npairs, error;
709 
710 	npairs = sc->vtnet_max_vq_pairs;
711 
712 	sc->vtnet_rxqs = malloc(sizeof(struct vtnet_rxq) * npairs, M_DEVBUF,
713 	    M_NOWAIT | M_ZERO);
714 	sc->vtnet_txqs = malloc(sizeof(struct vtnet_txq) * npairs, M_DEVBUF,
715 	    M_NOWAIT | M_ZERO);
716 	if (sc->vtnet_rxqs == NULL || sc->vtnet_txqs == NULL)
717 		return (ENOMEM);
718 
719 	for (i = 0; i < npairs; i++) {
720 		error = vtnet_init_rxq(sc, i);
721 		if (error)
722 			return (error);
723 		error = vtnet_init_txq(sc, i);
724 		if (error)
725 			return (error);
726 	}
727 
728 	vtnet_setup_queue_sysctl(sc);
729 
730 	return (0);
731 }
732 
733 static void
734 vtnet_destroy_rxq(struct vtnet_rxq *rxq)
735 {
736 
737 	rxq->vtnrx_sc = NULL;
738 	rxq->vtnrx_id = -1;
739 
740 	if (mtx_initialized(&rxq->vtnrx_mtx) != 0)
741 		mtx_destroy(&rxq->vtnrx_mtx);
742 }
743 
744 static void
745 vtnet_destroy_txq(struct vtnet_txq *txq)
746 {
747 
748 	txq->vtntx_sc = NULL;
749 	txq->vtntx_id = -1;
750 
751 #ifndef VTNET_LEGACY_TX
752 	if (txq->vtntx_br != NULL) {
753 		buf_ring_free(txq->vtntx_br, M_DEVBUF);
754 		txq->vtntx_br = NULL;
755 	}
756 #endif
757 
758 	if (mtx_initialized(&txq->vtntx_mtx) != 0)
759 		mtx_destroy(&txq->vtntx_mtx);
760 }
761 
762 static void
763 vtnet_free_rxtx_queues(struct vtnet_softc *sc)
764 {
765 	int i;
766 
767 	if (sc->vtnet_rxqs != NULL) {
768 		for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
769 			vtnet_destroy_rxq(&sc->vtnet_rxqs[i]);
770 		free(sc->vtnet_rxqs, M_DEVBUF);
771 		sc->vtnet_rxqs = NULL;
772 	}
773 
774 	if (sc->vtnet_txqs != NULL) {
775 		for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
776 			vtnet_destroy_txq(&sc->vtnet_txqs[i]);
777 		free(sc->vtnet_txqs, M_DEVBUF);
778 		sc->vtnet_txqs = NULL;
779 	}
780 }
781 
782 static int
783 vtnet_alloc_rx_filters(struct vtnet_softc *sc)
784 {
785 
786 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
787 		sc->vtnet_mac_filter = malloc(sizeof(struct vtnet_mac_filter),
788 		    M_DEVBUF, M_NOWAIT | M_ZERO);
789 		if (sc->vtnet_mac_filter == NULL)
790 			return (ENOMEM);
791 	}
792 
793 	if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
794 		sc->vtnet_vlan_filter = malloc(sizeof(uint32_t) *
795 		    VTNET_VLAN_FILTER_NWORDS, M_DEVBUF, M_NOWAIT | M_ZERO);
796 		if (sc->vtnet_vlan_filter == NULL)
797 			return (ENOMEM);
798 	}
799 
800 	return (0);
801 }
802 
803 static void
804 vtnet_free_rx_filters(struct vtnet_softc *sc)
805 {
806 
807 	if (sc->vtnet_mac_filter != NULL) {
808 		free(sc->vtnet_mac_filter, M_DEVBUF);
809 		sc->vtnet_mac_filter = NULL;
810 	}
811 
812 	if (sc->vtnet_vlan_filter != NULL) {
813 		free(sc->vtnet_vlan_filter, M_DEVBUF);
814 		sc->vtnet_vlan_filter = NULL;
815 	}
816 }
817 
818 static int
819 vtnet_alloc_virtqueues(struct vtnet_softc *sc)
820 {
821 	device_t dev;
822 	struct vq_alloc_info *info;
823 	struct vtnet_rxq *rxq;
824 	struct vtnet_txq *txq;
825 	int i, idx, flags, nvqs, rxsegs, error;
826 
827 	dev = sc->vtnet_dev;
828 	flags = 0;
829 
830 	/*
831 	 * Indirect descriptors are not needed for the Rx virtqueue when
832 	 * mergeable buffers are negotiated. The header is placed inline
833 	 * with the data, not in a separate descriptor, and mbuf clusters
834 	 * are always physically contiguous.
835 	 */
836 	if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS)
837 		rxsegs = 0;
838 	else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
839 		rxsegs = VTNET_MAX_RX_SEGS;
840 	else
841 		rxsegs = VTNET_MIN_RX_SEGS;
842 
843 	nvqs = sc->vtnet_max_vq_pairs * 2;
844 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ)
845 		nvqs++;
846 
847 	info = malloc(sizeof(struct vq_alloc_info) * nvqs, M_TEMP, M_NOWAIT);
848 	if (info == NULL)
849 		return (ENOMEM);
850 
851 	for (i = 0, idx = 0; i < sc->vtnet_max_vq_pairs; i++, idx+=2) {
852 		rxq = &sc->vtnet_rxqs[i];
853 		VQ_ALLOC_INFO_INIT(&info[idx], rxsegs,
854 		    vtnet_rx_vq_intr, rxq, &rxq->vtnrx_vq,
855 		    "%s-%d rx", device_get_nameunit(dev), rxq->vtnrx_id);
856 
857 		txq = &sc->vtnet_txqs[i];
858 		VQ_ALLOC_INFO_INIT(&info[idx+1], VTNET_MAX_TX_SEGS,
859 		    vtnet_tx_vq_intr, txq, &txq->vtntx_vq,
860 		    "%s-%d tx", device_get_nameunit(dev), txq->vtntx_id);
861 	}
862 
863 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
864 		VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, NULL,
865 		    &sc->vtnet_ctrl_vq, "%s ctrl", device_get_nameunit(dev));
866 	}
867 
868 	/*
869 	 * Enable interrupt binding if this is multiqueue. This only matters
870 	 * when per-vq MSIX is available.
871 	 */
872 	if (sc->vtnet_flags & VTNET_FLAG_MULTIQ)
873 		flags |= 0;
874 
875 	error = virtio_alloc_virtqueues(dev, flags, nvqs, info);
876 	free(info, M_TEMP);
877 
878 	return (error);
879 }
880 
881 static int
882 vtnet_setup_interface(struct vtnet_softc *sc)
883 {
884 	device_t dev;
885 	struct ifnet *ifp;
886 	int limit;
887 
888 	dev = sc->vtnet_dev;
889 
890 	ifp = sc->vtnet_ifp = if_alloc(IFT_ETHER);
891 	if (ifp == NULL) {
892 		device_printf(dev, "cannot allocate ifnet structure\n");
893 		return (ENOSPC);
894 	}
895 
896 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
897 	if_initbaudrate(ifp, IF_Gbps(10));	/* Approx. */
898 	ifp->if_softc = sc;
899 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
900 	ifp->if_init = vtnet_init;
901 	ifp->if_ioctl = vtnet_ioctl;
902 
903 #ifndef VTNET_LEGACY_TX
904 	ifp->if_transmit = vtnet_txq_mq_start;
905 	ifp->if_qflush = vtnet_qflush;
906 #else
907 	struct virtqueue *vq = sc->vtnet_txqs[0].vtntx_vq;
908 	ifp->if_start = vtnet_start;
909 	IFQ_SET_MAXLEN(&ifp->if_snd, virtqueue_size(vq) - 1);
910 	ifp->if_snd.ifq_drv_maxlen = virtqueue_size(vq) - 1;
911 	IFQ_SET_READY(&ifp->if_snd);
912 #endif
913 
914 	ifmedia_init(&sc->vtnet_media, IFM_IMASK, vtnet_ifmedia_upd,
915 	    vtnet_ifmedia_sts);
916 	ifmedia_add(&sc->vtnet_media, VTNET_MEDIATYPE, 0, NULL);
917 	ifmedia_set(&sc->vtnet_media, VTNET_MEDIATYPE);
918 
919 	/* Read (or generate) the MAC address for the adapter. */
920 	vtnet_get_hwaddr(sc);
921 
922 	ether_ifattach(ifp, sc->vtnet_hwaddr);
923 
924 	if (virtio_with_feature(dev, VIRTIO_NET_F_STATUS))
925 		ifp->if_capabilities |= IFCAP_LINKSTATE;
926 
927 	/* Tell the upper layer(s) we support long frames. */
928 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
929 	ifp->if_capabilities |= IFCAP_JUMBO_MTU | IFCAP_VLAN_MTU;
930 
931 	if (virtio_with_feature(dev, VIRTIO_NET_F_CSUM)) {
932 		ifp->if_capabilities |= IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6;
933 
934 		if (virtio_with_feature(dev, VIRTIO_NET_F_GSO)) {
935 			ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_TSO6;
936 			sc->vtnet_flags |= VTNET_FLAG_TSO_ECN;
937 		} else {
938 			if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4))
939 				ifp->if_capabilities |= IFCAP_TSO4;
940 			if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
941 				ifp->if_capabilities |= IFCAP_TSO6;
942 			if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN))
943 				sc->vtnet_flags |= VTNET_FLAG_TSO_ECN;
944 		}
945 
946 		if (ifp->if_capabilities & IFCAP_TSO)
947 			ifp->if_capabilities |= IFCAP_VLAN_HWTSO;
948 	}
949 
950 	if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM))
951 		ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6;
952 
953 	if (ifp->if_capabilities & IFCAP_HWCSUM) {
954 		/*
955 		 * VirtIO does not support VLAN tagging, but we can fake
956 		 * it by inserting and removing the 802.1Q header during
957 		 * transmit and receive. We are then able to do checksum
958 		 * offloading of VLAN frames.
959 		 */
960 		ifp->if_capabilities |=
961 		    IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
962 	}
963 
964 	ifp->if_capenable = ifp->if_capabilities;
965 
966 	/*
967 	 * Capabilities after here are not enabled by default.
968 	 */
969 
970 	if (ifp->if_capabilities & IFCAP_RXCSUM) {
971 		if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) ||
972 		    virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6))
973 			ifp->if_capabilities |= IFCAP_LRO;
974 	}
975 
976 	if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
977 		ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
978 
979 		sc->vtnet_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
980 		    vtnet_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
981 		sc->vtnet_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
982 		    vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
983 	}
984 
985 	limit = vtnet_tunable_int(sc, "rx_process_limit",
986 	    vtnet_rx_process_limit);
987 	if (limit < 0)
988 		limit = INT_MAX;
989 	sc->vtnet_rx_process_limit = limit;
990 
991 	return (0);
992 }
993 
994 static int
995 vtnet_change_mtu(struct vtnet_softc *sc, int new_mtu)
996 {
997 	struct ifnet *ifp;
998 	int frame_size, clsize;
999 
1000 	ifp = sc->vtnet_ifp;
1001 
1002 	if (new_mtu < ETHERMIN || new_mtu > VTNET_MAX_MTU)
1003 		return (EINVAL);
1004 
1005 	frame_size = sc->vtnet_hdr_size + sizeof(struct ether_vlan_header) +
1006 	    new_mtu;
1007 
1008 	/*
1009 	 * Based on the new MTU (and hence frame size) determine which
1010 	 * cluster size is most appropriate for the receive queues.
1011 	 */
1012 	if (frame_size <= MCLBYTES) {
1013 		clsize = MCLBYTES;
1014 	} else if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
1015 		/* Avoid going past 9K jumbos. */
1016 		if (frame_size > MJUM9BYTES)
1017 			return (EINVAL);
1018 		clsize = MJUM9BYTES;
1019 	} else
1020 		clsize = MJUMPAGESIZE;
1021 
1022 	ifp->if_mtu = new_mtu;
1023 	sc->vtnet_rx_new_clsize = clsize;
1024 
1025 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1026 		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1027 		vtnet_init_locked(sc);
1028 	}
1029 
1030 	return (0);
1031 }
1032 
1033 static int
1034 vtnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1035 {
1036 	struct vtnet_softc *sc;
1037 	struct ifreq *ifr;
1038 	int reinit, mask, error;
1039 
1040 	sc = ifp->if_softc;
1041 	ifr = (struct ifreq *) data;
1042 	error = 0;
1043 
1044 	switch (cmd) {
1045 	case SIOCSIFMTU:
1046 		if (ifp->if_mtu != ifr->ifr_mtu) {
1047 			VTNET_CORE_LOCK(sc);
1048 			error = vtnet_change_mtu(sc, ifr->ifr_mtu);
1049 			VTNET_CORE_UNLOCK(sc);
1050 		}
1051 		break;
1052 
1053 	case SIOCSIFFLAGS:
1054 		VTNET_CORE_LOCK(sc);
1055 		if ((ifp->if_flags & IFF_UP) == 0) {
1056 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1057 				vtnet_stop(sc);
1058 		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1059 			if ((ifp->if_flags ^ sc->vtnet_if_flags) &
1060 			    (IFF_PROMISC | IFF_ALLMULTI)) {
1061 				if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX)
1062 					vtnet_rx_filter(sc);
1063 				else
1064 					error = ENOTSUP;
1065 			}
1066 		} else
1067 			vtnet_init_locked(sc);
1068 
1069 		if (error == 0)
1070 			sc->vtnet_if_flags = ifp->if_flags;
1071 		VTNET_CORE_UNLOCK(sc);
1072 		break;
1073 
1074 	case SIOCADDMULTI:
1075 	case SIOCDELMULTI:
1076 		if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0)
1077 			break;
1078 		VTNET_CORE_LOCK(sc);
1079 		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1080 			vtnet_rx_filter_mac(sc);
1081 		VTNET_CORE_UNLOCK(sc);
1082 		break;
1083 
1084 	case SIOCSIFMEDIA:
1085 	case SIOCGIFMEDIA:
1086 		error = ifmedia_ioctl(ifp, ifr, &sc->vtnet_media, cmd);
1087 		break;
1088 
1089 	case SIOCSIFCAP:
1090 		VTNET_CORE_LOCK(sc);
1091 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1092 
1093 		if (mask & IFCAP_TXCSUM)
1094 			ifp->if_capenable ^= IFCAP_TXCSUM;
1095 		if (mask & IFCAP_TXCSUM_IPV6)
1096 			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
1097 		if (mask & IFCAP_TSO4)
1098 			ifp->if_capenable ^= IFCAP_TSO4;
1099 		if (mask & IFCAP_TSO6)
1100 			ifp->if_capenable ^= IFCAP_TSO6;
1101 
1102 		if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO |
1103 		    IFCAP_VLAN_HWFILTER)) {
1104 			/* These Rx features require us to renegotiate. */
1105 			reinit = 1;
1106 
1107 			if (mask & IFCAP_RXCSUM)
1108 				ifp->if_capenable ^= IFCAP_RXCSUM;
1109 			if (mask & IFCAP_RXCSUM_IPV6)
1110 				ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
1111 			if (mask & IFCAP_LRO)
1112 				ifp->if_capenable ^= IFCAP_LRO;
1113 			if (mask & IFCAP_VLAN_HWFILTER)
1114 				ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1115 		} else
1116 			reinit = 0;
1117 
1118 		if (mask & IFCAP_VLAN_HWTSO)
1119 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1120 		if (mask & IFCAP_VLAN_HWTAGGING)
1121 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1122 
1123 		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1124 			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1125 			vtnet_init_locked(sc);
1126 		}
1127 
1128 		VTNET_CORE_UNLOCK(sc);
1129 		VLAN_CAPABILITIES(ifp);
1130 
1131 		break;
1132 
1133 	default:
1134 		error = ether_ioctl(ifp, cmd, data);
1135 		break;
1136 	}
1137 
1138 	VTNET_CORE_LOCK_ASSERT_NOTOWNED(sc);
1139 
1140 	return (error);
1141 }
1142 
1143 static int
1144 vtnet_rxq_populate(struct vtnet_rxq *rxq)
1145 {
1146 	struct virtqueue *vq;
1147 	int nbufs, error;
1148 
1149 	vq = rxq->vtnrx_vq;
1150 	error = ENOSPC;
1151 
1152 	for (nbufs = 0; !virtqueue_full(vq); nbufs++) {
1153 		error = vtnet_rxq_new_buf(rxq);
1154 		if (error)
1155 			break;
1156 	}
1157 
1158 	if (nbufs > 0) {
1159 		virtqueue_notify(vq);
1160 		/*
1161 		 * EMSGSIZE signifies the virtqueue did not have enough
1162 		 * entries available to hold the last mbuf. This is not
1163 		 * an error.
1164 		 */
1165 		if (error == EMSGSIZE)
1166 			error = 0;
1167 	}
1168 
1169 	return (error);
1170 }
1171 
1172 static void
1173 vtnet_rxq_free_mbufs(struct vtnet_rxq *rxq)
1174 {
1175 	struct virtqueue *vq;
1176 	struct mbuf *m;
1177 	int last;
1178 
1179 	vq = rxq->vtnrx_vq;
1180 	last = 0;
1181 
1182 	while ((m = virtqueue_drain(vq, &last)) != NULL)
1183 		m_freem(m);
1184 
1185 	KASSERT(virtqueue_empty(vq),
1186 	    ("%s: mbufs remaining in rx queue %p", __func__, rxq));
1187 }
1188 
1189 static struct mbuf *
1190 vtnet_rx_alloc_buf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp)
1191 {
1192 	struct mbuf *m_head, *m_tail, *m;
1193 	int i, clsize;
1194 
1195 	clsize = sc->vtnet_rx_clsize;
1196 
1197 	KASSERT(nbufs == 1 || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
1198 	    ("%s: chained mbuf %d request without LRO_NOMRG", __func__, nbufs));
1199 
1200 	m_head = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, clsize);
1201 	if (m_head == NULL)
1202 		goto fail;
1203 
1204 	m_head->m_len = clsize;
1205 	m_tail = m_head;
1206 
1207 	/* Allocate the rest of the chain. */
1208 	for (i = 1; i < nbufs; i++) {
1209 		m = m_getjcl(M_NOWAIT, MT_DATA, 0, clsize);
1210 		if (m == NULL)
1211 			goto fail;
1212 
1213 		m->m_len = clsize;
1214 		m_tail->m_next = m;
1215 		m_tail = m;
1216 	}
1217 
1218 	if (m_tailp != NULL)
1219 		*m_tailp = m_tail;
1220 
1221 	return (m_head);
1222 
1223 fail:
1224 	sc->vtnet_stats.mbuf_alloc_failed++;
1225 	m_freem(m_head);
1226 
1227 	return (NULL);
1228 }
1229 
1230 /*
1231  * Slow path for when LRO without mergeable buffers is negotiated.
1232  */
1233 static int
1234 vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *rxq, struct mbuf *m0,
1235     int len0)
1236 {
1237 	struct vtnet_softc *sc;
1238 	struct mbuf *m, *m_prev;
1239 	struct mbuf *m_new, *m_tail;
1240 	int len, clsize, nreplace, error;
1241 
1242 	sc = rxq->vtnrx_sc;
1243 	clsize = sc->vtnet_rx_clsize;
1244 
1245 	m_prev = NULL;
1246 	m_tail = NULL;
1247 	nreplace = 0;
1248 
1249 	m = m0;
1250 	len = len0;
1251 
1252 	/*
1253 	 * Since these mbuf chains are so large, we avoid allocating an
1254 	 * entire replacement chain if possible. When the received frame
1255 	 * did not consume the entire chain, the unused mbufs are moved
1256 	 * to the replacement chain.
1257 	 */
1258 	while (len > 0) {
1259 		/*
1260 		 * Something is seriously wrong if we received a frame
1261 		 * larger than the chain. Drop it.
1262 		 */
1263 		if (m == NULL) {
1264 			sc->vtnet_stats.rx_frame_too_large++;
1265 			return (EMSGSIZE);
1266 		}
1267 
1268 		/* We always allocate the same cluster size. */
1269 		KASSERT(m->m_len == clsize,
1270 		    ("%s: mbuf size %d is not the cluster size %d",
1271 		    __func__, m->m_len, clsize));
1272 
1273 		m->m_len = MIN(m->m_len, len);
1274 		len -= m->m_len;
1275 
1276 		m_prev = m;
1277 		m = m->m_next;
1278 		nreplace++;
1279 	}
1280 
1281 	KASSERT(nreplace <= sc->vtnet_rx_nmbufs,
1282 	    ("%s: too many replacement mbufs %d max %d", __func__, nreplace,
1283 	    sc->vtnet_rx_nmbufs));
1284 
1285 	m_new = vtnet_rx_alloc_buf(sc, nreplace, &m_tail);
1286 	if (m_new == NULL) {
1287 		m_prev->m_len = clsize;
1288 		return (ENOBUFS);
1289 	}
1290 
1291 	/*
1292 	 * Move any unused mbufs from the received chain onto the end
1293 	 * of the new chain.
1294 	 */
1295 	if (m_prev->m_next != NULL) {
1296 		m_tail->m_next = m_prev->m_next;
1297 		m_prev->m_next = NULL;
1298 	}
1299 
1300 	error = vtnet_rxq_enqueue_buf(rxq, m_new);
1301 	if (error) {
1302 		/*
1303 		 * BAD! We could not enqueue the replacement mbuf chain. We
1304 		 * must restore the m0 chain to the original state if it was
1305 		 * modified so we can subsequently discard it.
1306 		 *
1307 		 * NOTE: The replacement is suppose to be an identical copy
1308 		 * to the one just dequeued so this is an unexpected error.
1309 		 */
1310 		sc->vtnet_stats.rx_enq_replacement_failed++;
1311 
1312 		if (m_tail->m_next != NULL) {
1313 			m_prev->m_next = m_tail->m_next;
1314 			m_tail->m_next = NULL;
1315 		}
1316 
1317 		m_prev->m_len = clsize;
1318 		m_freem(m_new);
1319 	}
1320 
1321 	return (error);
1322 }
1323 
1324 static int
1325 vtnet_rxq_replace_buf(struct vtnet_rxq *rxq, struct mbuf *m, int len)
1326 {
1327 	struct vtnet_softc *sc;
1328 	struct mbuf *m_new;
1329 	int error;
1330 
1331 	sc = rxq->vtnrx_sc;
1332 
1333 	KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL,
1334 	    ("%s: chained mbuf without LRO_NOMRG", __func__));
1335 
1336 	if (m->m_next == NULL) {
1337 		/* Fast-path for the common case of just one mbuf. */
1338 		if (m->m_len < len)
1339 			return (EINVAL);
1340 
1341 		m_new = vtnet_rx_alloc_buf(sc, 1, NULL);
1342 		if (m_new == NULL)
1343 			return (ENOBUFS);
1344 
1345 		error = vtnet_rxq_enqueue_buf(rxq, m_new);
1346 		if (error) {
1347 			/*
1348 			 * The new mbuf is suppose to be an identical
1349 			 * copy of the one just dequeued so this is an
1350 			 * unexpected error.
1351 			 */
1352 			m_freem(m_new);
1353 			sc->vtnet_stats.rx_enq_replacement_failed++;
1354 		} else
1355 			m->m_len = len;
1356 	} else
1357 		error = vtnet_rxq_replace_lro_nomgr_buf(rxq, m, len);
1358 
1359 	return (error);
1360 }
1361 
1362 static int
1363 vtnet_rxq_enqueue_buf(struct vtnet_rxq *rxq, struct mbuf *m)
1364 {
1365 	struct sglist sg;
1366 	struct sglist_seg segs[VTNET_MAX_RX_SEGS];
1367 	struct vtnet_softc *sc;
1368 	struct vtnet_rx_header *rxhdr;
1369 	uint8_t *mdata;
1370 	int offset, error;
1371 
1372 	sc = rxq->vtnrx_sc;
1373 	mdata = mtod(m, uint8_t *);
1374 
1375 	VTNET_RXQ_LOCK_ASSERT(rxq);
1376 	KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL,
1377 	    ("%s: chained mbuf without LRO_NOMRG", __func__));
1378 	KASSERT(m->m_len == sc->vtnet_rx_clsize,
1379 	    ("%s: unexpected cluster size %d/%d", __func__, m->m_len,
1380 	     sc->vtnet_rx_clsize));
1381 
1382 	sglist_init(&sg, VTNET_MAX_RX_SEGS, segs);
1383 	if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
1384 		MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr));
1385 		rxhdr = (struct vtnet_rx_header *) mdata;
1386 		sglist_append(&sg, &rxhdr->vrh_hdr, sc->vtnet_hdr_size);
1387 		offset = sizeof(struct vtnet_rx_header);
1388 	} else
1389 		offset = 0;
1390 
1391 	sglist_append(&sg, mdata + offset, m->m_len - offset);
1392 	if (m->m_next != NULL) {
1393 		error = sglist_append_mbuf(&sg, m->m_next);
1394 		MPASS(error == 0);
1395 	}
1396 
1397 	error = virtqueue_enqueue(rxq->vtnrx_vq, m, &sg, 0, sg.sg_nseg);
1398 
1399 	return (error);
1400 }
1401 
1402 static int
1403 vtnet_rxq_new_buf(struct vtnet_rxq *rxq)
1404 {
1405 	struct vtnet_softc *sc;
1406 	struct mbuf *m;
1407 	int error;
1408 
1409 	sc = rxq->vtnrx_sc;
1410 
1411 	m = vtnet_rx_alloc_buf(sc, sc->vtnet_rx_nmbufs, NULL);
1412 	if (m == NULL)
1413 		return (ENOBUFS);
1414 
1415 	error = vtnet_rxq_enqueue_buf(rxq, m);
1416 	if (error)
1417 		m_freem(m);
1418 
1419 	return (error);
1420 }
1421 
1422 /*
1423  * Use the checksum offset in the VirtIO header to set the
1424  * correct CSUM_* flags.
1425  */
1426 static int
1427 vtnet_rxq_csum_by_offset(struct vtnet_rxq *rxq, struct mbuf *m,
1428     uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr)
1429 {
1430 	struct vtnet_softc *sc;
1431 #if defined(INET) || defined(INET6)
1432 	int offset = hdr->csum_start + hdr->csum_offset;
1433 #endif
1434 
1435 	sc = rxq->vtnrx_sc;
1436 
1437 	/* Only do a basic sanity check on the offset. */
1438 	switch (eth_type) {
1439 #if defined(INET)
1440 	case ETHERTYPE_IP:
1441 		if (__predict_false(offset < ip_start + sizeof(struct ip)))
1442 			return (1);
1443 		break;
1444 #endif
1445 #if defined(INET6)
1446 	case ETHERTYPE_IPV6:
1447 		if (__predict_false(offset < ip_start + sizeof(struct ip6_hdr)))
1448 			return (1);
1449 		break;
1450 #endif
1451 	default:
1452 		sc->vtnet_stats.rx_csum_bad_ethtype++;
1453 		return (1);
1454 	}
1455 
1456 	/*
1457 	 * Use the offset to determine the appropriate CSUM_* flags. This is
1458 	 * a bit dirty, but we can get by with it since the checksum offsets
1459 	 * happen to be different. We assume the host host does not do IPv4
1460 	 * header checksum offloading.
1461 	 */
1462 	switch (hdr->csum_offset) {
1463 	case offsetof(struct udphdr, uh_sum):
1464 	case offsetof(struct tcphdr, th_sum):
1465 		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1466 		m->m_pkthdr.csum_data = 0xFFFF;
1467 		break;
1468 	case offsetof(struct sctphdr, checksum):
1469 		m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
1470 		break;
1471 	default:
1472 		sc->vtnet_stats.rx_csum_bad_offset++;
1473 		return (1);
1474 	}
1475 
1476 	return (0);
1477 }
1478 
1479 static int
1480 vtnet_rxq_csum_by_parse(struct vtnet_rxq *rxq, struct mbuf *m,
1481     uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr)
1482 {
1483 	struct vtnet_softc *sc;
1484 	int offset, proto;
1485 
1486 	sc = rxq->vtnrx_sc;
1487 
1488 	switch (eth_type) {
1489 #if defined(INET)
1490 	case ETHERTYPE_IP: {
1491 		struct ip *ip;
1492 		if (__predict_false(m->m_len < ip_start + sizeof(struct ip)))
1493 			return (1);
1494 		ip = (struct ip *)(m->m_data + ip_start);
1495 		proto = ip->ip_p;
1496 		offset = ip_start + (ip->ip_hl << 2);
1497 		break;
1498 	}
1499 #endif
1500 #if defined(INET6)
1501 	case ETHERTYPE_IPV6:
1502 		if (__predict_false(m->m_len < ip_start +
1503 		    sizeof(struct ip6_hdr)))
1504 			return (1);
1505 		offset = ip6_lasthdr(m, ip_start, IPPROTO_IPV6, &proto);
1506 		if (__predict_false(offset < 0))
1507 			return (1);
1508 		break;
1509 #endif
1510 	default:
1511 		sc->vtnet_stats.rx_csum_bad_ethtype++;
1512 		return (1);
1513 	}
1514 
1515 	switch (proto) {
1516 	case IPPROTO_TCP:
1517 		if (__predict_false(m->m_len < offset + sizeof(struct tcphdr)))
1518 			return (1);
1519 		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1520 		m->m_pkthdr.csum_data = 0xFFFF;
1521 		break;
1522 	case IPPROTO_UDP:
1523 		if (__predict_false(m->m_len < offset + sizeof(struct udphdr)))
1524 			return (1);
1525 		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1526 		m->m_pkthdr.csum_data = 0xFFFF;
1527 		break;
1528 	case IPPROTO_SCTP:
1529 		if (__predict_false(m->m_len < offset + sizeof(struct sctphdr)))
1530 			return (1);
1531 		m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
1532 		break;
1533 	default:
1534 		/*
1535 		 * For the remaining protocols, FreeBSD does not support
1536 		 * checksum offloading, so the checksum will be recomputed.
1537 		 */
1538 #if 0
1539 		if_printf(sc->vtnet_ifp, "cksum offload of unsupported "
1540 		    "protocol eth_type=%#x proto=%d csum_start=%d "
1541 		    "csum_offset=%d\n", __func__, eth_type, proto,
1542 		    hdr->csum_start, hdr->csum_offset);
1543 #endif
1544 		break;
1545 	}
1546 
1547 	return (0);
1548 }
1549 
1550 /*
1551  * Set the appropriate CSUM_* flags. Unfortunately, the information
1552  * provided is not directly useful to us. The VirtIO header gives the
1553  * offset of the checksum, which is all Linux needs, but this is not
1554  * how FreeBSD does things. We are forced to peek inside the packet
1555  * a bit.
1556  *
1557  * It would be nice if VirtIO gave us the L4 protocol or if FreeBSD
1558  * could accept the offsets and let the stack figure it out.
1559  */
1560 static int
1561 vtnet_rxq_csum(struct vtnet_rxq *rxq, struct mbuf *m,
1562     struct virtio_net_hdr *hdr)
1563 {
1564 	struct ether_header *eh;
1565 	struct ether_vlan_header *evh;
1566 	uint16_t eth_type;
1567 	int offset, error;
1568 
1569 	eh = mtod(m, struct ether_header *);
1570 	eth_type = ntohs(eh->ether_type);
1571 	if (eth_type == ETHERTYPE_VLAN) {
1572 		/* BMV: We should handle nested VLAN tags too. */
1573 		evh = mtod(m, struct ether_vlan_header *);
1574 		eth_type = ntohs(evh->evl_proto);
1575 		offset = sizeof(struct ether_vlan_header);
1576 	} else
1577 		offset = sizeof(struct ether_header);
1578 
1579 	if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
1580 		error = vtnet_rxq_csum_by_offset(rxq, m, eth_type, offset, hdr);
1581 	else
1582 		error = vtnet_rxq_csum_by_parse(rxq, m, eth_type, offset, hdr);
1583 
1584 	return (error);
1585 }
1586 
1587 static void
1588 vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *rxq, int nbufs)
1589 {
1590 	struct mbuf *m;
1591 
1592 	while (--nbufs > 0) {
1593 		m = virtqueue_dequeue(rxq->vtnrx_vq, NULL);
1594 		if (m == NULL)
1595 			break;
1596 		vtnet_rxq_discard_buf(rxq, m);
1597 	}
1598 }
1599 
1600 static void
1601 vtnet_rxq_discard_buf(struct vtnet_rxq *rxq, struct mbuf *m)
1602 {
1603 	int error;
1604 
1605 	/*
1606 	 * Requeue the discarded mbuf. This should always be successful
1607 	 * since it was just dequeued.
1608 	 */
1609 	error = vtnet_rxq_enqueue_buf(rxq, m);
1610 	KASSERT(error == 0,
1611 	    ("%s: cannot requeue discarded mbuf %d", __func__, error));
1612 }
1613 
1614 static int
1615 vtnet_rxq_merged_eof(struct vtnet_rxq *rxq, struct mbuf *m_head, int nbufs)
1616 {
1617 	struct vtnet_softc *sc;
1618 	struct ifnet *ifp;
1619 	struct virtqueue *vq;
1620 	struct mbuf *m, *m_tail;
1621 	int len;
1622 
1623 	sc = rxq->vtnrx_sc;
1624 	vq = rxq->vtnrx_vq;
1625 	ifp = sc->vtnet_ifp;
1626 	m_tail = m_head;
1627 
1628 	while (--nbufs > 0) {
1629 		m = virtqueue_dequeue(vq, &len);
1630 		if (m == NULL) {
1631 			rxq->vtnrx_stats.vrxs_ierrors++;
1632 			goto fail;
1633 		}
1634 
1635 		if (vtnet_rxq_new_buf(rxq) != 0) {
1636 			rxq->vtnrx_stats.vrxs_iqdrops++;
1637 			vtnet_rxq_discard_buf(rxq, m);
1638 			if (nbufs > 1)
1639 				vtnet_rxq_discard_merged_bufs(rxq, nbufs);
1640 			goto fail;
1641 		}
1642 
1643 		if (m->m_len < len)
1644 			len = m->m_len;
1645 
1646 		m->m_len = len;
1647 		m->m_flags &= ~M_PKTHDR;
1648 
1649 		m_head->m_pkthdr.len += len;
1650 		m_tail->m_next = m;
1651 		m_tail = m;
1652 	}
1653 
1654 	return (0);
1655 
1656 fail:
1657 	sc->vtnet_stats.rx_mergeable_failed++;
1658 	m_freem(m_head);
1659 
1660 	return (1);
1661 }
1662 
1663 static void
1664 vtnet_rxq_input(struct vtnet_rxq *rxq, struct mbuf *m,
1665     struct virtio_net_hdr *hdr)
1666 {
1667 	struct vtnet_softc *sc;
1668 	struct ifnet *ifp;
1669 	struct ether_header *eh;
1670 
1671 	sc = rxq->vtnrx_sc;
1672 	ifp = sc->vtnet_ifp;
1673 
1674 	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1675 		eh = mtod(m, struct ether_header *);
1676 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1677 			vtnet_vlan_tag_remove(m);
1678 			/*
1679 			 * With the 802.1Q header removed, update the
1680 			 * checksum starting location accordingly.
1681 			 */
1682 			if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
1683 				hdr->csum_start -= ETHER_VLAN_ENCAP_LEN;
1684 		}
1685 	}
1686 
1687 	m->m_pkthdr.flowid = rxq->vtnrx_id;
1688 	m->m_flags |= M_FLOWID;
1689 
1690 	/*
1691 	 * BMV: FreeBSD does not have the UNNECESSARY and PARTIAL checksum
1692 	 * distinction that Linux does. Need to reevaluate if performing
1693 	 * offloading for the NEEDS_CSUM case is really appropriate.
1694 	 */
1695 	if (hdr->flags & (VIRTIO_NET_HDR_F_NEEDS_CSUM |
1696 	    VIRTIO_NET_HDR_F_DATA_VALID)) {
1697 		if (vtnet_rxq_csum(rxq, m, hdr) == 0)
1698 			rxq->vtnrx_stats.vrxs_csum++;
1699 		else
1700 			rxq->vtnrx_stats.vrxs_csum_failed++;
1701 	}
1702 
1703 	rxq->vtnrx_stats.vrxs_ipackets++;
1704 	rxq->vtnrx_stats.vrxs_ibytes += m->m_pkthdr.len;
1705 
1706 	VTNET_RXQ_UNLOCK(rxq);
1707 	(*ifp->if_input)(ifp, m);
1708 	VTNET_RXQ_LOCK(rxq);
1709 }
1710 
1711 static int
1712 vtnet_rxq_eof(struct vtnet_rxq *rxq)
1713 {
1714 	struct virtio_net_hdr lhdr, *hdr;
1715 	struct vtnet_softc *sc;
1716 	struct ifnet *ifp;
1717 	struct virtqueue *vq;
1718 	struct mbuf *m;
1719 	struct virtio_net_hdr_mrg_rxbuf *mhdr;
1720 	int len, deq, nbufs, adjsz, count;
1721 
1722 	sc = rxq->vtnrx_sc;
1723 	vq = rxq->vtnrx_vq;
1724 	ifp = sc->vtnet_ifp;
1725 	hdr = &lhdr;
1726 	deq = 0;
1727 	count = sc->vtnet_rx_process_limit;
1728 
1729 	VTNET_RXQ_LOCK_ASSERT(rxq);
1730 
1731 	while (count-- > 0) {
1732 		m = virtqueue_dequeue(vq, &len);
1733 		if (m == NULL)
1734 			break;
1735 		deq++;
1736 
1737 		if (len < sc->vtnet_hdr_size + ETHER_HDR_LEN) {
1738 			rxq->vtnrx_stats.vrxs_ierrors++;
1739 			vtnet_rxq_discard_buf(rxq, m);
1740 			continue;
1741 		}
1742 
1743 		if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
1744 			nbufs = 1;
1745 			adjsz = sizeof(struct vtnet_rx_header);
1746 			/*
1747 			 * Account for our pad inserted between the header
1748 			 * and the actual start of the frame.
1749 			 */
1750 			len += VTNET_RX_HEADER_PAD;
1751 		} else {
1752 			mhdr = mtod(m, struct virtio_net_hdr_mrg_rxbuf *);
1753 			nbufs = mhdr->num_buffers;
1754 			adjsz = sizeof(struct virtio_net_hdr_mrg_rxbuf);
1755 		}
1756 
1757 		if (vtnet_rxq_replace_buf(rxq, m, len) != 0) {
1758 			rxq->vtnrx_stats.vrxs_iqdrops++;
1759 			vtnet_rxq_discard_buf(rxq, m);
1760 			if (nbufs > 1)
1761 				vtnet_rxq_discard_merged_bufs(rxq, nbufs);
1762 			continue;
1763 		}
1764 
1765 		m->m_pkthdr.len = len;
1766 		m->m_pkthdr.rcvif = ifp;
1767 		m->m_pkthdr.csum_flags = 0;
1768 
1769 		if (nbufs > 1) {
1770 			/* Dequeue the rest of chain. */
1771 			if (vtnet_rxq_merged_eof(rxq, m, nbufs) != 0)
1772 				continue;
1773 		}
1774 
1775 		/*
1776 		 * Save copy of header before we strip it. For both mergeable
1777 		 * and non-mergeable, the header is at the beginning of the
1778 		 * mbuf data. We no longer need num_buffers, so always use a
1779 		 * regular header.
1780 		 *
1781 		 * BMV: Is this memcpy() expensive? We know the mbuf data is
1782 		 * still valid even after the m_adj().
1783 		 */
1784 		memcpy(hdr, mtod(m, void *), sizeof(struct virtio_net_hdr));
1785 		m_adj(m, adjsz);
1786 
1787 		vtnet_rxq_input(rxq, m, hdr);
1788 
1789 		/* Must recheck after dropping the Rx lock. */
1790 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1791 			break;
1792 	}
1793 
1794 	if (deq > 0)
1795 		virtqueue_notify(vq);
1796 
1797 	return (count > 0 ? 0 : EAGAIN);
1798 }
1799 
1800 static void
1801 vtnet_rx_vq_intr(void *xrxq)
1802 {
1803 	struct vtnet_softc *sc;
1804 	struct vtnet_rxq *rxq;
1805 	struct ifnet *ifp;
1806 	int tries, more;
1807 
1808 	rxq = xrxq;
1809 	sc = rxq->vtnrx_sc;
1810 	ifp = sc->vtnet_ifp;
1811 	tries = 0;
1812 
1813 	if (__predict_false(rxq->vtnrx_id >= sc->vtnet_act_vq_pairs)) {
1814 		/*
1815 		 * Ignore this interrupt. Either this is a spurious interrupt
1816 		 * or multiqueue without per-VQ MSIX so every queue needs to
1817 		 * be polled (a brain dead configuration we could try harder
1818 		 * to avoid).
1819 		 */
1820 		vtnet_rxq_disable_intr(rxq);
1821 		return;
1822 	}
1823 
1824 	VTNET_RXQ_LOCK(rxq);
1825 
1826 again:
1827 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1828 		VTNET_RXQ_UNLOCK(rxq);
1829 		return;
1830 	}
1831 
1832 	more = vtnet_rxq_eof(rxq);
1833 	if (more || vtnet_rxq_enable_intr(rxq) != 0) {
1834 		if (!more)
1835 			vtnet_rxq_disable_intr(rxq);
1836 		/*
1837 		 * This is an occasional condition or race (when !more),
1838 		 * so retry a few times before scheduling the taskqueue.
1839 		 */
1840 		if (tries++ < VTNET_INTR_DISABLE_RETRIES)
1841 			goto again;
1842 
1843 		VTNET_RXQ_UNLOCK(rxq);
1844 		rxq->vtnrx_stats.vrxs_rescheduled++;
1845 		taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
1846 	} else
1847 		VTNET_RXQ_UNLOCK(rxq);
1848 }
1849 
1850 static void
1851 vtnet_rxq_tq_intr(void *xrxq, int pending)
1852 {
1853 	struct vtnet_softc *sc;
1854 	struct vtnet_rxq *rxq;
1855 	struct ifnet *ifp;
1856 	int more;
1857 
1858 	rxq = xrxq;
1859 	sc = rxq->vtnrx_sc;
1860 	ifp = sc->vtnet_ifp;
1861 
1862 	VTNET_RXQ_LOCK(rxq);
1863 
1864 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1865 		VTNET_RXQ_UNLOCK(rxq);
1866 		return;
1867 	}
1868 
1869 	more = vtnet_rxq_eof(rxq);
1870 	if (more || vtnet_rxq_enable_intr(rxq) != 0) {
1871 		if (!more)
1872 			vtnet_rxq_disable_intr(rxq);
1873 		rxq->vtnrx_stats.vrxs_rescheduled++;
1874 		taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
1875 	}
1876 
1877 	VTNET_RXQ_UNLOCK(rxq);
1878 }
1879 
1880 static void
1881 vtnet_txq_free_mbufs(struct vtnet_txq *txq)
1882 {
1883 	struct virtqueue *vq;
1884 	struct vtnet_tx_header *txhdr;
1885 	int last;
1886 
1887 	vq = txq->vtntx_vq;
1888 	last = 0;
1889 
1890 	while ((txhdr = virtqueue_drain(vq, &last)) != NULL) {
1891 		m_freem(txhdr->vth_mbuf);
1892 		uma_zfree(vtnet_tx_header_zone, txhdr);
1893 	}
1894 
1895 	KASSERT(virtqueue_empty(vq),
1896 	    ("%s: mbufs remaining in tx queue %p", __func__, txq));
1897 }
1898 
1899 /*
1900  * BMV: Much of this can go away once we finally have offsets in
1901  * the mbuf packet header. Bug andre@.
1902  */
1903 static int
1904 vtnet_txq_offload_ctx(struct vtnet_txq *txq, struct mbuf *m,
1905     int *etype, int *proto, int *start)
1906 {
1907 	struct vtnet_softc *sc;
1908 	struct ether_vlan_header *evh;
1909 	int offset;
1910 
1911 	sc = txq->vtntx_sc;
1912 
1913 	evh = mtod(m, struct ether_vlan_header *);
1914 	if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1915 		/* BMV: We should handle nested VLAN tags too. */
1916 		*etype = ntohs(evh->evl_proto);
1917 		offset = sizeof(struct ether_vlan_header);
1918 	} else {
1919 		*etype = ntohs(evh->evl_encap_proto);
1920 		offset = sizeof(struct ether_header);
1921 	}
1922 
1923 	switch (*etype) {
1924 #if defined(INET)
1925 	case ETHERTYPE_IP: {
1926 		struct ip *ip, iphdr;
1927 		if (__predict_false(m->m_len < offset + sizeof(struct ip))) {
1928 			m_copydata(m, offset, sizeof(struct ip),
1929 			    (caddr_t) &iphdr);
1930 			ip = &iphdr;
1931 		} else
1932 			ip = (struct ip *)(m->m_data + offset);
1933 		*proto = ip->ip_p;
1934 		*start = offset + (ip->ip_hl << 2);
1935 		break;
1936 	}
1937 #endif
1938 #if defined(INET6)
1939 	case ETHERTYPE_IPV6:
1940 		*proto = -1;
1941 		*start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto);
1942 		/* Assert the network stack sent us a valid packet. */
1943 		KASSERT(*start > offset,
1944 		    ("%s: mbuf %p start %d offset %d proto %d", __func__, m,
1945 		    *start, offset, *proto));
1946 		break;
1947 #endif
1948 	default:
1949 		sc->vtnet_stats.tx_csum_bad_ethtype++;
1950 		return (EINVAL);
1951 	}
1952 
1953 	return (0);
1954 }
1955 
1956 static int
1957 vtnet_txq_offload_tso(struct vtnet_txq *txq, struct mbuf *m, int eth_type,
1958     int offset, struct virtio_net_hdr *hdr)
1959 {
1960 	static struct timeval lastecn;
1961 	static int curecn;
1962 	struct vtnet_softc *sc;
1963 	struct tcphdr *tcp, tcphdr;
1964 
1965 	sc = txq->vtntx_sc;
1966 
1967 	if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) {
1968 		m_copydata(m, offset, sizeof(struct tcphdr), (caddr_t) &tcphdr);
1969 		tcp = &tcphdr;
1970 	} else
1971 		tcp = (struct tcphdr *)(m->m_data + offset);
1972 
1973 	hdr->hdr_len = offset + (tcp->th_off << 2);
1974 	hdr->gso_size = m->m_pkthdr.tso_segsz;
1975 	hdr->gso_type = eth_type == ETHERTYPE_IP ? VIRTIO_NET_HDR_GSO_TCPV4 :
1976 	    VIRTIO_NET_HDR_GSO_TCPV6;
1977 
1978 	if (tcp->th_flags & TH_CWR) {
1979 		/*
1980 		 * Drop if VIRTIO_NET_F_HOST_ECN was not negotiated. In FreeBSD,
1981 		 * ECN support is not on a per-interface basis, but globally via
1982 		 * the net.inet.tcp.ecn.enable sysctl knob. The default is off.
1983 		 */
1984 		if ((sc->vtnet_flags & VTNET_FLAG_TSO_ECN) == 0) {
1985 			if (ppsratecheck(&lastecn, &curecn, 1))
1986 				if_printf(sc->vtnet_ifp,
1987 				    "TSO with ECN not negotiated with host\n");
1988 			return (ENOTSUP);
1989 		}
1990 		hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
1991 	}
1992 
1993 	txq->vtntx_stats.vtxs_tso++;
1994 
1995 	return (0);
1996 }
1997 
1998 static struct mbuf *
1999 vtnet_txq_offload(struct vtnet_txq *txq, struct mbuf *m,
2000     struct virtio_net_hdr *hdr)
2001 {
2002 	struct vtnet_softc *sc;
2003 	int flags, etype, csum_start, proto, error;
2004 
2005 	sc = txq->vtntx_sc;
2006 	flags = m->m_pkthdr.csum_flags;
2007 
2008 	error = vtnet_txq_offload_ctx(txq, m, &etype, &proto, &csum_start);
2009 	if (error)
2010 		goto drop;
2011 
2012 	if ((etype == ETHERTYPE_IP && flags & VTNET_CSUM_OFFLOAD) ||
2013 	    (etype == ETHERTYPE_IPV6 && flags & VTNET_CSUM_OFFLOAD_IPV6)) {
2014 		/*
2015 		 * We could compare the IP protocol vs the CSUM_ flag too,
2016 		 * but that really should not be necessary.
2017 		 */
2018 		hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM;
2019 		hdr->csum_start = csum_start;
2020 		hdr->csum_offset = m->m_pkthdr.csum_data;
2021 		txq->vtntx_stats.vtxs_csum++;
2022 	}
2023 
2024 	if (flags & CSUM_TSO) {
2025 		if (__predict_false(proto != IPPROTO_TCP)) {
2026 			/* Likely failed to correctly parse the mbuf. */
2027 			sc->vtnet_stats.tx_tso_not_tcp++;
2028 			goto drop;
2029 		}
2030 
2031 		KASSERT(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM,
2032 		    ("%s: mbuf %p TSO without checksum offload %#x",
2033 		    __func__, m, flags));
2034 
2035 		error = vtnet_txq_offload_tso(txq, m, etype, csum_start, hdr);
2036 		if (error)
2037 			goto drop;
2038 	}
2039 
2040 	return (m);
2041 
2042 drop:
2043 	m_freem(m);
2044 	return (NULL);
2045 }
2046 
2047 static int
2048 vtnet_txq_enqueue_buf(struct vtnet_txq *txq, struct mbuf **m_head,
2049     struct vtnet_tx_header *txhdr)
2050 {
2051 	struct sglist sg;
2052 	struct sglist_seg segs[VTNET_MAX_TX_SEGS];
2053 	struct vtnet_softc *sc;
2054 	struct virtqueue *vq;
2055 	struct mbuf *m;
2056 	int collapsed, error;
2057 
2058 	vq = txq->vtntx_vq;
2059 	sc = txq->vtntx_sc;
2060 	m = *m_head;
2061 	collapsed = 0;
2062 
2063 	sglist_init(&sg, VTNET_MAX_TX_SEGS, segs);
2064 	error = sglist_append(&sg, &txhdr->vth_uhdr, sc->vtnet_hdr_size);
2065 	KASSERT(error == 0 && sg.sg_nseg == 1,
2066 	    ("%s: error %d adding header to sglist", __func__, error));
2067 
2068 again:
2069 	error = sglist_append_mbuf(&sg, m);
2070 	if (error) {
2071 		if (collapsed)
2072 			goto fail;
2073 
2074 		m = m_collapse(m, M_NOWAIT, VTNET_MAX_TX_SEGS - 1);
2075 		if (m == NULL)
2076 			goto fail;
2077 
2078 		*m_head = m;
2079 		collapsed = 1;
2080 		txq->vtntx_stats.vtxs_collapsed++;
2081 		goto again;
2082 	}
2083 
2084 	txhdr->vth_mbuf = m;
2085 	error = virtqueue_enqueue(vq, txhdr, &sg, sg.sg_nseg, 0);
2086 
2087 	return (error);
2088 
2089 fail:
2090 	m_freem(*m_head);
2091 	*m_head = NULL;
2092 
2093 	return (ENOBUFS);
2094 }
2095 
2096 static int
2097 vtnet_txq_encap(struct vtnet_txq *txq, struct mbuf **m_head)
2098 {
2099 	struct vtnet_softc *sc;
2100 	struct vtnet_tx_header *txhdr;
2101 	struct virtio_net_hdr *hdr;
2102 	struct mbuf *m;
2103 	int error;
2104 
2105 	sc = txq->vtntx_sc;
2106 	m = *m_head;
2107 	M_ASSERTPKTHDR(m);
2108 
2109 	txhdr = uma_zalloc(vtnet_tx_header_zone, M_NOWAIT | M_ZERO);
2110 	if (txhdr == NULL) {
2111 		m_freem(m);
2112 		*m_head = NULL;
2113 		return (ENOMEM);
2114 	}
2115 
2116 	/*
2117 	 * Always use the non-mergeable header, regardless if the feature
2118 	 * was negotiated. For transmit, num_buffers is always zero. The
2119 	 * vtnet_hdr_size is used to enqueue the correct header size.
2120 	 */
2121 	hdr = &txhdr->vth_uhdr.hdr;
2122 
2123 	if (m->m_flags & M_VLANTAG) {
2124 		m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
2125 		if ((*m_head = m) == NULL) {
2126 			error = ENOBUFS;
2127 			goto fail;
2128 		}
2129 		m->m_flags &= ~M_VLANTAG;
2130 	}
2131 
2132 	if (m->m_pkthdr.csum_flags & VTNET_CSUM_ALL_OFFLOAD) {
2133 		m = vtnet_txq_offload(txq, m, hdr);
2134 		if ((*m_head = m) == NULL) {
2135 			error = ENOBUFS;
2136 			goto fail;
2137 		}
2138 	}
2139 
2140 	error = vtnet_txq_enqueue_buf(txq, m_head, txhdr);
2141 	if (error == 0)
2142 		return (0);
2143 
2144 fail:
2145 	uma_zfree(vtnet_tx_header_zone, txhdr);
2146 
2147 	return (error);
2148 }
2149 
2150 #ifdef VTNET_LEGACY_TX
2151 
2152 static void
2153 vtnet_start_locked(struct vtnet_txq *txq, struct ifnet *ifp)
2154 {
2155 	struct vtnet_softc *sc;
2156 	struct virtqueue *vq;
2157 	struct mbuf *m0;
2158 	int enq;
2159 
2160 	sc = txq->vtntx_sc;
2161 	vq = txq->vtntx_vq;
2162 	enq = 0;
2163 
2164 	VTNET_TXQ_LOCK_ASSERT(txq);
2165 
2166 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
2167 	    sc->vtnet_link_active == 0)
2168 		return;
2169 
2170 	vtnet_txq_eof(txq);
2171 
2172 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
2173 		if (virtqueue_full(vq))
2174 			break;
2175 
2176 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m0);
2177 		if (m0 == NULL)
2178 			break;
2179 
2180 		if (vtnet_txq_encap(txq, &m0) != 0) {
2181 			if (m0 != NULL)
2182 				IFQ_DRV_PREPEND(&ifp->if_snd, m0);
2183 			break;
2184 		}
2185 
2186 		enq++;
2187 		ETHER_BPF_MTAP(ifp, m0);
2188 	}
2189 
2190 	if (enq > 0) {
2191 		virtqueue_notify(vq);
2192 		txq->vtntx_watchdog = VTNET_TX_TIMEOUT;
2193 	}
2194 }
2195 
2196 static void
2197 vtnet_start(struct ifnet *ifp)
2198 {
2199 	struct vtnet_softc *sc;
2200 	struct vtnet_txq *txq;
2201 
2202 	sc = ifp->if_softc;
2203 	txq = &sc->vtnet_txqs[0];
2204 
2205 	VTNET_TXQ_LOCK(txq);
2206 	vtnet_start_locked(txq, ifp);
2207 	VTNET_TXQ_UNLOCK(txq);
2208 }
2209 
2210 #else /* !VTNET_LEGACY_TX */
2211 
2212 static int
2213 vtnet_txq_mq_start_locked(struct vtnet_txq *txq, struct mbuf *m)
2214 {
2215 	struct vtnet_softc *sc;
2216 	struct virtqueue *vq;
2217 	struct buf_ring *br;
2218 	struct ifnet *ifp;
2219 	int enq, error;
2220 
2221 	sc = txq->vtntx_sc;
2222 	vq = txq->vtntx_vq;
2223 	br = txq->vtntx_br;
2224 	ifp = sc->vtnet_ifp;
2225 	enq = 0;
2226 	error = 0;
2227 
2228 	VTNET_TXQ_LOCK_ASSERT(txq);
2229 
2230 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
2231 	    sc->vtnet_link_active == 0) {
2232 		if (m != NULL)
2233 			error = drbr_enqueue(ifp, br, m);
2234 		return (error);
2235 	}
2236 
2237 	if (m != NULL) {
2238 		error = drbr_enqueue(ifp, br, m);
2239 		if (error)
2240 			return (error);
2241 	}
2242 
2243 	vtnet_txq_eof(txq);
2244 
2245 	while ((m = drbr_peek(ifp, br)) != NULL) {
2246 		if (virtqueue_full(vq)) {
2247 			drbr_putback(ifp, br, m);
2248 			error = ENOBUFS;
2249 			break;
2250 		}
2251 
2252 		error = vtnet_txq_encap(txq, &m);
2253 		if (error) {
2254 			if (m != NULL)
2255 				drbr_putback(ifp, br, m);
2256 			else
2257 				drbr_advance(ifp, br);
2258 			break;
2259 		}
2260 		drbr_advance(ifp, br);
2261 
2262 		enq++;
2263 		ETHER_BPF_MTAP(ifp, m);
2264 	}
2265 
2266 	if (enq > 0) {
2267 		virtqueue_notify(vq);
2268 		txq->vtntx_watchdog = VTNET_TX_TIMEOUT;
2269 	}
2270 
2271 	return (error);
2272 }
2273 
2274 static int
2275 vtnet_txq_mq_start(struct ifnet *ifp, struct mbuf *m)
2276 {
2277 	struct vtnet_softc *sc;
2278 	struct vtnet_txq *txq;
2279 	int i, npairs, error;
2280 
2281 	sc = ifp->if_softc;
2282 	npairs = sc->vtnet_act_vq_pairs;
2283 
2284 	if (m->m_flags & M_FLOWID)
2285 		i = m->m_pkthdr.flowid % npairs;
2286 	else
2287 		i = curcpu % npairs;
2288 
2289 	txq = &sc->vtnet_txqs[i];
2290 
2291 	if (VTNET_TXQ_TRYLOCK(txq) != 0) {
2292 		error = vtnet_txq_mq_start_locked(txq, m);
2293 		VTNET_TXQ_UNLOCK(txq);
2294 	} else {
2295 		error = drbr_enqueue(ifp, txq->vtntx_br, m);
2296 		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_defrtask);
2297 	}
2298 
2299 	return (error);
2300 }
2301 
2302 static void
2303 vtnet_txq_tq_deferred(void *xtxq, int pending)
2304 {
2305 	struct vtnet_softc *sc;
2306 	struct vtnet_txq *txq;
2307 
2308 	txq = xtxq;
2309 	sc = txq->vtntx_sc;
2310 
2311 	VTNET_TXQ_LOCK(txq);
2312 	if (!drbr_empty(sc->vtnet_ifp, txq->vtntx_br))
2313 		vtnet_txq_mq_start_locked(txq, NULL);
2314 	VTNET_TXQ_UNLOCK(txq);
2315 }
2316 
2317 #endif /* VTNET_LEGACY_TX */
2318 
2319 static void
2320 vtnet_txq_start(struct vtnet_txq *txq)
2321 {
2322 	struct vtnet_softc *sc;
2323 	struct ifnet *ifp;
2324 
2325 	sc = txq->vtntx_sc;
2326 	ifp = sc->vtnet_ifp;
2327 
2328 #ifdef VTNET_LEGACY_TX
2329 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2330 		vtnet_start_locked(txq, ifp);
2331 #else
2332 	if (!drbr_empty(ifp, txq->vtntx_br))
2333 		vtnet_txq_mq_start_locked(txq, NULL);
2334 #endif
2335 }
2336 
2337 static void
2338 vtnet_txq_tq_intr(void *xtxq, int pending)
2339 {
2340 	struct vtnet_softc *sc;
2341 	struct vtnet_txq *txq;
2342 	struct ifnet *ifp;
2343 
2344 	txq = xtxq;
2345 	sc = txq->vtntx_sc;
2346 	ifp = sc->vtnet_ifp;
2347 
2348 	VTNET_TXQ_LOCK(txq);
2349 
2350 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2351 		VTNET_TXQ_UNLOCK(txq);
2352 		return;
2353 	}
2354 
2355 	vtnet_txq_eof(txq);
2356 
2357 	vtnet_txq_start(txq);
2358 
2359 	if (vtnet_txq_enable_intr(txq) != 0) {
2360 		vtnet_txq_disable_intr(txq);
2361 		txq->vtntx_stats.vtxs_rescheduled++;
2362 		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
2363 	}
2364 
2365 	VTNET_TXQ_UNLOCK(txq);
2366 }
2367 
2368 static void
2369 vtnet_txq_eof(struct vtnet_txq *txq)
2370 {
2371 	struct virtqueue *vq;
2372 	struct vtnet_tx_header *txhdr;
2373 	struct mbuf *m;
2374 
2375 	vq = txq->vtntx_vq;
2376 	VTNET_TXQ_LOCK_ASSERT(txq);
2377 
2378 	while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) {
2379 		m = txhdr->vth_mbuf;
2380 
2381 		txq->vtntx_stats.vtxs_opackets++;
2382 		txq->vtntx_stats.vtxs_obytes += m->m_pkthdr.len;
2383 		if (m->m_flags & M_MCAST)
2384 			txq->vtntx_stats.vtxs_omcasts++;
2385 
2386 		m_freem(m);
2387 		uma_zfree(vtnet_tx_header_zone, txhdr);
2388 	}
2389 
2390 	if (virtqueue_empty(vq))
2391 		txq->vtntx_watchdog = 0;
2392 }
2393 
2394 static void
2395 vtnet_tx_vq_intr(void *xtxq)
2396 {
2397 	struct vtnet_softc *sc;
2398 	struct vtnet_txq *txq;
2399 	struct ifnet *ifp;
2400 	int tries;
2401 
2402 	txq = xtxq;
2403 	sc = txq->vtntx_sc;
2404 	ifp = sc->vtnet_ifp;
2405 	tries = 0;
2406 
2407 	if (__predict_false(txq->vtntx_id >= sc->vtnet_act_vq_pairs)) {
2408 		/*
2409 		 * Ignore this interrupt. Either this is a spurious interrupt
2410 		 * or multiqueue without per-VQ MSIX so every queue needs to
2411 		 * be polled (a brain dead configuration we could try harder
2412 		 * to avoid).
2413 		 */
2414 		vtnet_txq_disable_intr(txq);
2415 		return;
2416 	}
2417 
2418 	VTNET_TXQ_LOCK(txq);
2419 
2420 again:
2421 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2422 		VTNET_TXQ_UNLOCK(txq);
2423 		return;
2424 	}
2425 
2426 	vtnet_txq_eof(txq);
2427 
2428 	vtnet_txq_start(txq);
2429 
2430 	if (vtnet_txq_enable_intr(txq) != 0) {
2431 		vtnet_txq_disable_intr(txq);
2432 		/*
2433 		 * This is an occasional race, so retry a few times
2434 		 * before scheduling the taskqueue.
2435 		 */
2436 		if (tries++ < VTNET_INTR_DISABLE_RETRIES)
2437 			goto again;
2438 
2439 		VTNET_TXQ_UNLOCK(txq);
2440 		txq->vtntx_stats.vtxs_rescheduled++;
2441 		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
2442 	} else
2443 		VTNET_TXQ_UNLOCK(txq);
2444 }
2445 
2446 static void
2447 vtnet_tx_start_all(struct vtnet_softc *sc)
2448 {
2449 	struct vtnet_txq *txq;
2450 	int i;
2451 
2452 	VTNET_CORE_LOCK_ASSERT(sc);
2453 
2454 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2455 		txq = &sc->vtnet_txqs[i];
2456 
2457 		VTNET_TXQ_LOCK(txq);
2458 		vtnet_txq_start(txq);
2459 		VTNET_TXQ_UNLOCK(txq);
2460 	}
2461 }
2462 
2463 #ifndef VTNET_LEGACY_TX
2464 static void
2465 vtnet_qflush(struct ifnet *ifp)
2466 {
2467 	struct vtnet_softc *sc;
2468 	struct vtnet_txq *txq;
2469 	struct mbuf *m;
2470 	int i;
2471 
2472 	sc = ifp->if_softc;
2473 
2474 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2475 		txq = &sc->vtnet_txqs[i];
2476 
2477 		VTNET_TXQ_LOCK(txq);
2478 		while ((m = buf_ring_dequeue_sc(txq->vtntx_br)) != NULL)
2479 			m_freem(m);
2480 		VTNET_TXQ_UNLOCK(txq);
2481 	}
2482 
2483 	if_qflush(ifp);
2484 }
2485 #endif
2486 
2487 static int
2488 vtnet_watchdog(struct vtnet_txq *txq)
2489 {
2490 	struct vtnet_softc *sc;
2491 
2492 	sc = txq->vtntx_sc;
2493 
2494 	VTNET_TXQ_LOCK(txq);
2495 	if (sc->vtnet_flags & VTNET_FLAG_EVENT_IDX)
2496 		vtnet_txq_eof(txq);
2497 	if (txq->vtntx_watchdog == 0 || --txq->vtntx_watchdog) {
2498 		VTNET_TXQ_UNLOCK(txq);
2499 		return (0);
2500 	}
2501 	VTNET_TXQ_UNLOCK(txq);
2502 
2503 	if_printf(sc->vtnet_ifp, "watchdog timeout on queue %d\n",
2504 	    txq->vtntx_id);
2505 	return (1);
2506 }
2507 
2508 static void
2509 vtnet_rxq_accum_stats(struct vtnet_rxq *rxq, struct vtnet_rxq_stats *accum)
2510 {
2511 	struct vtnet_rxq_stats *st;
2512 
2513 	st = &rxq->vtnrx_stats;
2514 
2515 	accum->vrxs_ipackets += st->vrxs_ipackets;
2516 	accum->vrxs_ibytes += st->vrxs_ibytes;
2517 	accum->vrxs_iqdrops += st->vrxs_iqdrops;
2518 	accum->vrxs_csum += st->vrxs_csum;
2519 	accum->vrxs_csum_failed += st->vrxs_csum_failed;
2520 	accum->vrxs_rescheduled += st->vrxs_rescheduled;
2521 }
2522 
2523 static void
2524 vtnet_txq_accum_stats(struct vtnet_txq *txq, struct vtnet_txq_stats *accum)
2525 {
2526 	struct vtnet_txq_stats *st;
2527 
2528 	st = &txq->vtntx_stats;
2529 
2530 	accum->vtxs_opackets += st->vtxs_opackets;
2531 	accum->vtxs_obytes += st->vtxs_obytes;
2532 	accum->vtxs_csum += st->vtxs_csum;
2533 	accum->vtxs_tso += st->vtxs_tso;
2534 	accum->vtxs_collapsed += st->vtxs_collapsed;
2535 	accum->vtxs_rescheduled += st->vtxs_rescheduled;
2536 }
2537 
2538 static void
2539 vtnet_accumulate_stats(struct vtnet_softc *sc)
2540 {
2541 	struct ifnet *ifp;
2542 	struct vtnet_statistics *st;
2543 	struct vtnet_rxq_stats rxaccum;
2544 	struct vtnet_txq_stats txaccum;
2545 	int i;
2546 
2547 	ifp = sc->vtnet_ifp;
2548 	st = &sc->vtnet_stats;
2549 	bzero(&rxaccum, sizeof(struct vtnet_rxq_stats));
2550 	bzero(&txaccum, sizeof(struct vtnet_txq_stats));
2551 
2552 	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
2553 		vtnet_rxq_accum_stats(&sc->vtnet_rxqs[i], &rxaccum);
2554 		vtnet_txq_accum_stats(&sc->vtnet_txqs[i], &txaccum);
2555 	}
2556 
2557 	st->rx_csum_offloaded = rxaccum.vrxs_csum;
2558 	st->rx_csum_failed = rxaccum.vrxs_csum_failed;
2559 	st->rx_task_rescheduled = rxaccum.vrxs_rescheduled;
2560 	st->tx_csum_offloaded = txaccum.vtxs_csum;
2561 	st->tx_tso_offloaded = txaccum.vtxs_tso;
2562 	st->tx_task_rescheduled = txaccum.vtxs_rescheduled;
2563 
2564 	/*
2565 	 * With the exception of if_ierrors, these ifnet statistics are
2566 	 * only updated in the driver, so just set them to our accumulated
2567 	 * values. if_ierrors is updated in ether_input() for malformed
2568 	 * frames that we should have already discarded.
2569 	 */
2570 	ifp->if_ipackets = rxaccum.vrxs_ipackets;
2571 	ifp->if_iqdrops = rxaccum.vrxs_iqdrops;
2572 	ifp->if_ierrors = rxaccum.vrxs_ierrors;
2573 	ifp->if_opackets = txaccum.vtxs_opackets;
2574 #ifndef VTNET_LEGACY_TX
2575 	ifp->if_obytes = txaccum.vtxs_obytes;
2576 	ifp->if_omcasts = txaccum.vtxs_omcasts;
2577 #endif
2578 }
2579 
2580 static void
2581 vtnet_tick(void *xsc)
2582 {
2583 	struct vtnet_softc *sc;
2584 	struct ifnet *ifp;
2585 	int i, timedout;
2586 
2587 	sc = xsc;
2588 	ifp = sc->vtnet_ifp;
2589 	timedout = 0;
2590 
2591 	VTNET_CORE_LOCK_ASSERT(sc);
2592 	vtnet_accumulate_stats(sc);
2593 
2594 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
2595 		timedout |= vtnet_watchdog(&sc->vtnet_txqs[i]);
2596 
2597 	if (timedout != 0) {
2598 		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2599 		vtnet_init_locked(sc);
2600 	} else
2601 		callout_schedule(&sc->vtnet_tick_ch, hz);
2602 }
2603 
2604 static void
2605 vtnet_start_taskqueues(struct vtnet_softc *sc)
2606 {
2607 	device_t dev;
2608 	struct vtnet_rxq *rxq;
2609 	struct vtnet_txq *txq;
2610 	int i, error;
2611 
2612 	dev = sc->vtnet_dev;
2613 
2614 	/*
2615 	 * Errors here are very difficult to recover from - we cannot
2616 	 * easily fail because, if this is during boot, we will hang
2617 	 * when freeing any successfully started taskqueues because
2618 	 * the scheduler isn't up yet.
2619 	 *
2620 	 * Most drivers just ignore the return value - it only fails
2621 	 * with ENOMEM so an error is not likely.
2622 	 */
2623 	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
2624 		rxq = &sc->vtnet_rxqs[i];
2625 		error = taskqueue_start_threads(&rxq->vtnrx_tq, 1, PI_NET,
2626 		    "%s rxq %d", device_get_nameunit(dev), rxq->vtnrx_id);
2627 		if (error) {
2628 			device_printf(dev, "failed to start rx taskq %d\n",
2629 			    rxq->vtnrx_id);
2630 		}
2631 
2632 		txq = &sc->vtnet_txqs[i];
2633 		error = taskqueue_start_threads(&txq->vtntx_tq, 1, PI_NET,
2634 		    "%s txq %d", device_get_nameunit(dev), txq->vtntx_id);
2635 		if (error) {
2636 			device_printf(dev, "failed to start tx taskq %d\n",
2637 			    txq->vtntx_id);
2638 		}
2639 	}
2640 }
2641 
2642 static void
2643 vtnet_free_taskqueues(struct vtnet_softc *sc)
2644 {
2645 	struct vtnet_rxq *rxq;
2646 	struct vtnet_txq *txq;
2647 	int i;
2648 
2649 	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
2650 		rxq = &sc->vtnet_rxqs[i];
2651 		if (rxq->vtnrx_tq != NULL) {
2652 			taskqueue_free(rxq->vtnrx_tq);
2653 			rxq->vtnrx_vq = NULL;
2654 		}
2655 
2656 		txq = &sc->vtnet_txqs[i];
2657 		if (txq->vtntx_tq != NULL) {
2658 			taskqueue_free(txq->vtntx_tq);
2659 			txq->vtntx_tq = NULL;
2660 		}
2661 	}
2662 }
2663 
2664 static void
2665 vtnet_drain_taskqueues(struct vtnet_softc *sc)
2666 {
2667 	struct vtnet_rxq *rxq;
2668 	struct vtnet_txq *txq;
2669 	int i;
2670 
2671 	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
2672 		rxq = &sc->vtnet_rxqs[i];
2673 		if (rxq->vtnrx_tq != NULL)
2674 			taskqueue_drain(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
2675 
2676 		txq = &sc->vtnet_txqs[i];
2677 		if (txq->vtntx_tq != NULL) {
2678 			taskqueue_drain(txq->vtntx_tq, &txq->vtntx_intrtask);
2679 #ifndef VTNET_LEGACY_TX
2680 			taskqueue_drain(txq->vtntx_tq, &txq->vtntx_defrtask);
2681 #endif
2682 		}
2683 	}
2684 }
2685 
2686 static void
2687 vtnet_drain_rxtx_queues(struct vtnet_softc *sc)
2688 {
2689 	struct vtnet_rxq *rxq;
2690 	struct vtnet_txq *txq;
2691 	int i;
2692 
2693 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2694 		rxq = &sc->vtnet_rxqs[i];
2695 		vtnet_rxq_free_mbufs(rxq);
2696 
2697 		txq = &sc->vtnet_txqs[i];
2698 		vtnet_txq_free_mbufs(txq);
2699 	}
2700 }
2701 
2702 static void
2703 vtnet_stop_rendezvous(struct vtnet_softc *sc)
2704 {
2705 	struct vtnet_rxq *rxq;
2706 	struct vtnet_txq *txq;
2707 	int i;
2708 
2709 	/*
2710 	 * Lock and unlock the per-queue mutex so we known the stop
2711 	 * state is visible. Doing only the active queues should be
2712 	 * sufficient, but it does not cost much extra to do all the
2713 	 * queues. Note we hold the core mutex here too.
2714 	 */
2715 	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
2716 		rxq = &sc->vtnet_rxqs[i];
2717 		VTNET_RXQ_LOCK(rxq);
2718 		VTNET_RXQ_UNLOCK(rxq);
2719 
2720 		txq = &sc->vtnet_txqs[i];
2721 		VTNET_TXQ_LOCK(txq);
2722 		VTNET_TXQ_UNLOCK(txq);
2723 	}
2724 }
2725 
2726 static void
2727 vtnet_stop(struct vtnet_softc *sc)
2728 {
2729 	device_t dev;
2730 	struct ifnet *ifp;
2731 
2732 	dev = sc->vtnet_dev;
2733 	ifp = sc->vtnet_ifp;
2734 
2735 	VTNET_CORE_LOCK_ASSERT(sc);
2736 
2737 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2738 	sc->vtnet_link_active = 0;
2739 	callout_stop(&sc->vtnet_tick_ch);
2740 
2741 	/* Only advisory. */
2742 	vtnet_disable_interrupts(sc);
2743 
2744 	/*
2745 	 * Stop the host adapter. This resets it to the pre-initialized
2746 	 * state. It will not generate any interrupts until after it is
2747 	 * reinitialized.
2748 	 */
2749 	virtio_stop(dev);
2750 	vtnet_stop_rendezvous(sc);
2751 
2752 	/* Free any mbufs left in the virtqueues. */
2753 	vtnet_drain_rxtx_queues(sc);
2754 }
2755 
2756 static int
2757 vtnet_virtio_reinit(struct vtnet_softc *sc)
2758 {
2759 	device_t dev;
2760 	struct ifnet *ifp;
2761 	uint64_t features;
2762 	int mask, error;
2763 
2764 	dev = sc->vtnet_dev;
2765 	ifp = sc->vtnet_ifp;
2766 	features = sc->vtnet_features;
2767 
2768 	mask = 0;
2769 #if defined(INET)
2770 	mask |= IFCAP_RXCSUM;
2771 #endif
2772 #if defined (INET6)
2773 	mask |= IFCAP_RXCSUM_IPV6;
2774 #endif
2775 
2776 	/*
2777 	 * Re-negotiate with the host, removing any disabled receive
2778 	 * features. Transmit features are disabled only on our side
2779 	 * via if_capenable and if_hwassist.
2780 	 */
2781 
2782 	if (ifp->if_capabilities & mask) {
2783 		/*
2784 		 * We require both IPv4 and IPv6 offloading to be enabled
2785 		 * in order to negotiated it: VirtIO does not distinguish
2786 		 * between the two.
2787 		 */
2788 		if ((ifp->if_capenable & mask) != mask)
2789 			features &= ~VIRTIO_NET_F_GUEST_CSUM;
2790 	}
2791 
2792 	if (ifp->if_capabilities & IFCAP_LRO) {
2793 		if ((ifp->if_capenable & IFCAP_LRO) == 0)
2794 			features &= ~VTNET_LRO_FEATURES;
2795 	}
2796 
2797 	if (ifp->if_capabilities & IFCAP_VLAN_HWFILTER) {
2798 		if ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0)
2799 			features &= ~VIRTIO_NET_F_CTRL_VLAN;
2800 	}
2801 
2802 	error = virtio_reinit(dev, features);
2803 	if (error)
2804 		device_printf(dev, "virtio reinit error %d\n", error);
2805 
2806 	return (error);
2807 }
2808 
2809 static void
2810 vtnet_init_rx_filters(struct vtnet_softc *sc)
2811 {
2812 	struct ifnet *ifp;
2813 
2814 	ifp = sc->vtnet_ifp;
2815 
2816 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
2817 		/* Restore promiscuous and all-multicast modes. */
2818 		vtnet_rx_filter(sc);
2819 		/* Restore filtered MAC addresses. */
2820 		vtnet_rx_filter_mac(sc);
2821 	}
2822 
2823 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
2824 		vtnet_rx_filter_vlan(sc);
2825 }
2826 
2827 static int
2828 vtnet_init_rx_queues(struct vtnet_softc *sc)
2829 {
2830 	device_t dev;
2831 	struct vtnet_rxq *rxq;
2832 	int i, clsize, error;
2833 
2834 	dev = sc->vtnet_dev;
2835 
2836 	/*
2837 	 * Use the new cluster size if one has been set (via a MTU
2838 	 * change). Otherwise, use the standard 2K clusters.
2839 	 *
2840 	 * BMV: It might make sense to use page sized clusters as
2841 	 * the default (depending on the features negotiated).
2842 	 */
2843 	if (sc->vtnet_rx_new_clsize != 0) {
2844 		clsize = sc->vtnet_rx_new_clsize;
2845 		sc->vtnet_rx_new_clsize = 0;
2846 	} else
2847 		clsize = MCLBYTES;
2848 
2849 	sc->vtnet_rx_clsize = clsize;
2850 	sc->vtnet_rx_nmbufs = VTNET_NEEDED_RX_MBUFS(sc, clsize);
2851 
2852 	/* The first segment is reserved for the header. */
2853 	KASSERT(sc->vtnet_rx_nmbufs < VTNET_MAX_RX_SEGS,
2854 	    ("%s: too many rx mbufs %d", __func__, sc->vtnet_rx_nmbufs));
2855 
2856 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2857 		rxq = &sc->vtnet_rxqs[i];
2858 
2859 		/* Hold the lock to satisfy asserts. */
2860 		VTNET_RXQ_LOCK(rxq);
2861 		error = vtnet_rxq_populate(rxq);
2862 		VTNET_RXQ_UNLOCK(rxq);
2863 
2864 		if (error) {
2865 			device_printf(dev,
2866 			    "cannot allocate mbufs for Rx queue %d\n", i);
2867 			return (error);
2868 		}
2869 	}
2870 
2871 	return (0);
2872 }
2873 
2874 static int
2875 vtnet_init_tx_queues(struct vtnet_softc *sc)
2876 {
2877 	struct vtnet_txq *txq;
2878 	int i;
2879 
2880 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2881 		txq = &sc->vtnet_txqs[i];
2882 		txq->vtntx_watchdog = 0;
2883 	}
2884 
2885 	return (0);
2886 }
2887 
2888 static int
2889 vtnet_init_rxtx_queues(struct vtnet_softc *sc)
2890 {
2891 	int error;
2892 
2893 	error = vtnet_init_rx_queues(sc);
2894 	if (error)
2895 		return (error);
2896 
2897 	error = vtnet_init_tx_queues(sc);
2898 	if (error)
2899 		return (error);
2900 
2901 	return (0);
2902 }
2903 
2904 static void
2905 vtnet_set_active_vq_pairs(struct vtnet_softc *sc)
2906 {
2907 	device_t dev;
2908 	int npairs;
2909 
2910 	dev = sc->vtnet_dev;
2911 
2912 	if ((sc->vtnet_flags & VTNET_FLAG_MULTIQ) == 0) {
2913 		MPASS(sc->vtnet_max_vq_pairs == 1);
2914 		sc->vtnet_act_vq_pairs = 1;
2915 		return;
2916 	}
2917 
2918 	/* BMV: Just use the maximum configured for now. */
2919 	npairs = sc->vtnet_max_vq_pairs;
2920 
2921 	if (vtnet_ctrl_mq_cmd(sc, npairs) != 0) {
2922 		device_printf(dev,
2923 		    "cannot set active queue pairs to %d\n", npairs);
2924 		npairs = 1;
2925 	}
2926 
2927 	sc->vtnet_act_vq_pairs = npairs;
2928 }
2929 
2930 static int
2931 vtnet_reinit(struct vtnet_softc *sc)
2932 {
2933 	device_t dev;
2934 	struct ifnet *ifp;
2935 	int error;
2936 
2937 	dev = sc->vtnet_dev;
2938 	ifp = sc->vtnet_ifp;
2939 
2940 	/* Use the current MAC address. */
2941 	bcopy(IF_LLADDR(ifp), sc->vtnet_hwaddr, ETHER_ADDR_LEN);
2942 	vtnet_set_hwaddr(sc);
2943 
2944 	vtnet_set_active_vq_pairs(sc);
2945 
2946 	ifp->if_hwassist = 0;
2947 	if (ifp->if_capenable & IFCAP_TXCSUM)
2948 		ifp->if_hwassist |= VTNET_CSUM_OFFLOAD;
2949 	if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
2950 		ifp->if_hwassist |= VTNET_CSUM_OFFLOAD_IPV6;
2951 	if (ifp->if_capenable & IFCAP_TSO4)
2952 		ifp->if_hwassist |= CSUM_TSO;
2953 	if (ifp->if_capenable & IFCAP_TSO6)
2954 		ifp->if_hwassist |= CSUM_TSO; /* No CSUM_TSO_IPV6. */
2955 
2956 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ)
2957 		vtnet_init_rx_filters(sc);
2958 
2959 	error = vtnet_init_rxtx_queues(sc);
2960 	if (error)
2961 		return (error);
2962 
2963 	vtnet_enable_interrupts(sc);
2964 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
2965 
2966 	return (0);
2967 }
2968 
2969 static void
2970 vtnet_init_locked(struct vtnet_softc *sc)
2971 {
2972 	device_t dev;
2973 	struct ifnet *ifp;
2974 
2975 	dev = sc->vtnet_dev;
2976 	ifp = sc->vtnet_ifp;
2977 
2978 	VTNET_CORE_LOCK_ASSERT(sc);
2979 
2980 	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2981 		return;
2982 
2983 	vtnet_stop(sc);
2984 
2985 	/* Reinitialize with the host. */
2986 	if (vtnet_virtio_reinit(sc) != 0)
2987 		goto fail;
2988 
2989 	if (vtnet_reinit(sc) != 0)
2990 		goto fail;
2991 
2992 	virtio_reinit_complete(dev);
2993 
2994 	vtnet_update_link_status(sc);
2995 	callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc);
2996 
2997 	return;
2998 
2999 fail:
3000 	vtnet_stop(sc);
3001 }
3002 
3003 static void
3004 vtnet_init(void *xsc)
3005 {
3006 	struct vtnet_softc *sc;
3007 
3008 	sc = xsc;
3009 
3010 	VTNET_CORE_LOCK(sc);
3011 	vtnet_init_locked(sc);
3012 	VTNET_CORE_UNLOCK(sc);
3013 }
3014 
3015 static void
3016 vtnet_free_ctrl_vq(struct vtnet_softc *sc)
3017 {
3018 	struct virtqueue *vq;
3019 
3020 	vq = sc->vtnet_ctrl_vq;
3021 
3022 	/*
3023 	 * The control virtqueue is only polled and therefore it should
3024 	 * already be empty.
3025 	 */
3026 	KASSERT(virtqueue_empty(vq),
3027 	    ("%s: ctrl vq %p not empty", __func__, vq));
3028 }
3029 
3030 static void
3031 vtnet_exec_ctrl_cmd(struct vtnet_softc *sc, void *cookie,
3032     struct sglist *sg, int readable, int writable)
3033 {
3034 	struct virtqueue *vq;
3035 
3036 	vq = sc->vtnet_ctrl_vq;
3037 
3038 	VTNET_CORE_LOCK_ASSERT(sc);
3039 	KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_VQ,
3040 	    ("%s: CTRL_VQ feature not negotiated", __func__));
3041 
3042 	if (!virtqueue_empty(vq))
3043 		return;
3044 	if (virtqueue_enqueue(vq, cookie, sg, readable, writable) != 0)
3045 		return;
3046 
3047 	/*
3048 	 * Poll for the response, but the command is likely already
3049 	 * done when we return from the notify.
3050 	 */
3051 	virtqueue_notify(vq);
3052 	virtqueue_poll(vq, NULL);
3053 }
3054 
3055 static int
3056 vtnet_ctrl_mac_cmd(struct vtnet_softc *sc, uint8_t *hwaddr)
3057 {
3058 	struct virtio_net_ctrl_hdr hdr;
3059 	struct sglist_seg segs[3];
3060 	struct sglist sg;
3061 	uint8_t ack;
3062 	int error;
3063 
3064 	hdr.class = VIRTIO_NET_CTRL_MAC;
3065 	hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
3066 	ack = VIRTIO_NET_ERR;
3067 
3068 	sglist_init(&sg, 3, segs);
3069 	error = 0;
3070 	error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
3071 	error |= sglist_append(&sg, hwaddr, ETHER_ADDR_LEN);
3072 	error |= sglist_append(&sg, &ack, sizeof(uint8_t));
3073 	KASSERT(error == 0 && sg.sg_nseg == 3,
3074 	    ("%s: error %d adding set MAC msg to sglist", __func__, error));
3075 
3076 	vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
3077 
3078 	return (ack == VIRTIO_NET_OK ? 0 : EIO);
3079 }
3080 
3081 static int
3082 vtnet_ctrl_mq_cmd(struct vtnet_softc *sc, uint16_t npairs)
3083 {
3084 	struct sglist_seg segs[3];
3085 	struct sglist sg;
3086 	struct {
3087 		struct virtio_net_ctrl_hdr hdr;
3088 		uint8_t pad1;
3089 		struct virtio_net_ctrl_mq mq;
3090 		uint8_t pad2;
3091 		uint8_t ack;
3092 	} s;
3093 	int error;
3094 
3095 	s.hdr.class = VIRTIO_NET_CTRL_MQ;
3096 	s.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
3097 	s.mq.virtqueue_pairs = npairs;
3098 	s.ack = VIRTIO_NET_ERR;
3099 
3100 	sglist_init(&sg, 3, segs);
3101 	error = 0;
3102 	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3103 	error |= sglist_append(&sg, &s.mq, sizeof(struct virtio_net_ctrl_mq));
3104 	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3105 	KASSERT(error == 0 && sg.sg_nseg == 3,
3106 	    ("%s: error %d adding MQ message to sglist", __func__, error));
3107 
3108 	vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3109 
3110 	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3111 }
3112 
3113 static int
3114 vtnet_ctrl_rx_cmd(struct vtnet_softc *sc, int cmd, int on)
3115 {
3116 	struct sglist_seg segs[3];
3117 	struct sglist sg;
3118 	struct {
3119 		struct virtio_net_ctrl_hdr hdr;
3120 		uint8_t pad1;
3121 		uint8_t onoff;
3122 		uint8_t pad2;
3123 		uint8_t ack;
3124 	} s;
3125 	int error;
3126 
3127 	KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX,
3128 	    ("%s: CTRL_RX feature not negotiated", __func__));
3129 
3130 	s.hdr.class = VIRTIO_NET_CTRL_RX;
3131 	s.hdr.cmd = cmd;
3132 	s.onoff = !!on;
3133 	s.ack = VIRTIO_NET_ERR;
3134 
3135 	sglist_init(&sg, 3, segs);
3136 	error = 0;
3137 	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3138 	error |= sglist_append(&sg, &s.onoff, sizeof(uint8_t));
3139 	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3140 	KASSERT(error == 0 && sg.sg_nseg == 3,
3141 	    ("%s: error %d adding Rx message to sglist", __func__, error));
3142 
3143 	vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3144 
3145 	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3146 }
3147 
3148 static int
3149 vtnet_set_promisc(struct vtnet_softc *sc, int on)
3150 {
3151 
3152 	return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_PROMISC, on));
3153 }
3154 
3155 static int
3156 vtnet_set_allmulti(struct vtnet_softc *sc, int on)
3157 {
3158 
3159 	return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, on));
3160 }
3161 
3162 /*
3163  * The device defaults to promiscuous mode for backwards compatibility.
3164  * Turn it off at attach time if possible.
3165  */
3166 static void
3167 vtnet_attach_disable_promisc(struct vtnet_softc *sc)
3168 {
3169 	struct ifnet *ifp;
3170 
3171 	ifp = sc->vtnet_ifp;
3172 
3173 	VTNET_CORE_LOCK(sc);
3174 	if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0) {
3175 		ifp->if_flags |= IFF_PROMISC;
3176 	} else if (vtnet_set_promisc(sc, 0) != 0) {
3177 		ifp->if_flags |= IFF_PROMISC;
3178 		device_printf(sc->vtnet_dev,
3179 		    "cannot disable default promiscuous mode\n");
3180 	}
3181 	VTNET_CORE_UNLOCK(sc);
3182 }
3183 
3184 static void
3185 vtnet_rx_filter(struct vtnet_softc *sc)
3186 {
3187 	device_t dev;
3188 	struct ifnet *ifp;
3189 
3190 	dev = sc->vtnet_dev;
3191 	ifp = sc->vtnet_ifp;
3192 
3193 	VTNET_CORE_LOCK_ASSERT(sc);
3194 
3195 	if (vtnet_set_promisc(sc, ifp->if_flags & IFF_PROMISC) != 0)
3196 		device_printf(dev, "cannot %s promiscuous mode\n",
3197 		    ifp->if_flags & IFF_PROMISC ? "enable" : "disable");
3198 
3199 	if (vtnet_set_allmulti(sc, ifp->if_flags & IFF_ALLMULTI) != 0)
3200 		device_printf(dev, "cannot %s all-multicast mode\n",
3201 		    ifp->if_flags & IFF_ALLMULTI ? "enable" : "disable");
3202 }
3203 
3204 static void
3205 vtnet_rx_filter_mac(struct vtnet_softc *sc)
3206 {
3207 	struct virtio_net_ctrl_hdr hdr;
3208 	struct vtnet_mac_filter *filter;
3209 	struct sglist_seg segs[4];
3210 	struct sglist sg;
3211 	struct ifnet *ifp;
3212 	struct ifaddr *ifa;
3213 	struct ifmultiaddr *ifma;
3214 	int ucnt, mcnt, promisc, allmulti, error;
3215 	uint8_t ack;
3216 
3217 	ifp = sc->vtnet_ifp;
3218 	filter = sc->vtnet_mac_filter;
3219 	ucnt = 0;
3220 	mcnt = 0;
3221 	promisc = 0;
3222 	allmulti = 0;
3223 
3224 	VTNET_CORE_LOCK_ASSERT(sc);
3225 	KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX,
3226 	    ("%s: CTRL_RX feature not negotiated", __func__));
3227 
3228 	/* Unicast MAC addresses: */
3229 	if_addr_rlock(ifp);
3230 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
3231 		if (ifa->ifa_addr->sa_family != AF_LINK)
3232 			continue;
3233 		else if (memcmp(LLADDR((struct sockaddr_dl *)ifa->ifa_addr),
3234 		    sc->vtnet_hwaddr, ETHER_ADDR_LEN) == 0)
3235 			continue;
3236 		else if (ucnt == VTNET_MAX_MAC_ENTRIES) {
3237 			promisc = 1;
3238 			break;
3239 		}
3240 
3241 		bcopy(LLADDR((struct sockaddr_dl *)ifa->ifa_addr),
3242 		    &filter->vmf_unicast.macs[ucnt], ETHER_ADDR_LEN);
3243 		ucnt++;
3244 	}
3245 	if_addr_runlock(ifp);
3246 
3247 	if (promisc != 0) {
3248 		filter->vmf_unicast.nentries = 0;
3249 		if_printf(ifp, "more than %d MAC addresses assigned, "
3250 		    "falling back to promiscuous mode\n",
3251 		    VTNET_MAX_MAC_ENTRIES);
3252 	} else
3253 		filter->vmf_unicast.nentries = ucnt;
3254 
3255 	/* Multicast MAC addresses: */
3256 	if_maddr_rlock(ifp);
3257 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
3258 		if (ifma->ifma_addr->sa_family != AF_LINK)
3259 			continue;
3260 		else if (mcnt == VTNET_MAX_MAC_ENTRIES) {
3261 			allmulti = 1;
3262 			break;
3263 		}
3264 
3265 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
3266 		    &filter->vmf_multicast.macs[mcnt], ETHER_ADDR_LEN);
3267 		mcnt++;
3268 	}
3269 	if_maddr_runlock(ifp);
3270 
3271 	if (allmulti != 0) {
3272 		filter->vmf_multicast.nentries = 0;
3273 		if_printf(ifp, "more than %d multicast MAC addresses "
3274 		    "assigned, falling back to all-multicast mode\n",
3275 		    VTNET_MAX_MAC_ENTRIES);
3276 	} else
3277 		filter->vmf_multicast.nentries = mcnt;
3278 
3279 	if (promisc != 0 && allmulti != 0)
3280 		goto out;
3281 
3282 	hdr.class = VIRTIO_NET_CTRL_MAC;
3283 	hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
3284 	ack = VIRTIO_NET_ERR;
3285 
3286 	sglist_init(&sg, 4, segs);
3287 	error = 0;
3288 	error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
3289 	error |= sglist_append(&sg, &filter->vmf_unicast,
3290 	    sizeof(uint32_t) + filter->vmf_unicast.nentries * ETHER_ADDR_LEN);
3291 	error |= sglist_append(&sg, &filter->vmf_multicast,
3292 	    sizeof(uint32_t) + filter->vmf_multicast.nentries * ETHER_ADDR_LEN);
3293 	error |= sglist_append(&sg, &ack, sizeof(uint8_t));
3294 	KASSERT(error == 0 && sg.sg_nseg == 4,
3295 	    ("%s: error %d adding MAC filter msg to sglist", __func__, error));
3296 
3297 	vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
3298 
3299 	if (ack != VIRTIO_NET_OK)
3300 		if_printf(ifp, "error setting host MAC filter table\n");
3301 
3302 out:
3303 	if (promisc != 0 && vtnet_set_promisc(sc, 1) != 0)
3304 		if_printf(ifp, "cannot enable promiscuous mode\n");
3305 	if (allmulti != 0 && vtnet_set_allmulti(sc, 1) != 0)
3306 		if_printf(ifp, "cannot enable all-multicast mode\n");
3307 }
3308 
3309 static int
3310 vtnet_exec_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
3311 {
3312 	struct sglist_seg segs[3];
3313 	struct sglist sg;
3314 	struct {
3315 		struct virtio_net_ctrl_hdr hdr;
3316 		uint8_t pad1;
3317 		uint16_t tag;
3318 		uint8_t pad2;
3319 		uint8_t ack;
3320 	} s;
3321 	int error;
3322 
3323 	s.hdr.class = VIRTIO_NET_CTRL_VLAN;
3324 	s.hdr.cmd = add ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
3325 	s.tag = tag;
3326 	s.ack = VIRTIO_NET_ERR;
3327 
3328 	sglist_init(&sg, 3, segs);
3329 	error = 0;
3330 	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3331 	error |= sglist_append(&sg, &s.tag, sizeof(uint16_t));
3332 	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3333 	KASSERT(error == 0 && sg.sg_nseg == 3,
3334 	    ("%s: error %d adding VLAN message to sglist", __func__, error));
3335 
3336 	vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3337 
3338 	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3339 }
3340 
3341 static void
3342 vtnet_rx_filter_vlan(struct vtnet_softc *sc)
3343 {
3344 	uint32_t w;
3345 	uint16_t tag;
3346 	int i, bit;
3347 
3348 	VTNET_CORE_LOCK_ASSERT(sc);
3349 	KASSERT(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER,
3350 	    ("%s: VLAN_FILTER feature not negotiated", __func__));
3351 
3352 	/* Enable the filter for each configured VLAN. */
3353 	for (i = 0; i < VTNET_VLAN_FILTER_NWORDS; i++) {
3354 		w = sc->vtnet_vlan_filter[i];
3355 
3356 		while ((bit = ffs(w) - 1) != -1) {
3357 			w &= ~(1 << bit);
3358 			tag = sizeof(w) * CHAR_BIT * i + bit;
3359 
3360 			if (vtnet_exec_vlan_filter(sc, 1, tag) != 0) {
3361 				device_printf(sc->vtnet_dev,
3362 				    "cannot enable VLAN %d filter\n", tag);
3363 			}
3364 		}
3365 	}
3366 }
3367 
3368 static void
3369 vtnet_update_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
3370 {
3371 	struct ifnet *ifp;
3372 	int idx, bit;
3373 
3374 	ifp = sc->vtnet_ifp;
3375 	idx = (tag >> 5) & 0x7F;
3376 	bit = tag & 0x1F;
3377 
3378 	if (tag == 0 || tag > 4095)
3379 		return;
3380 
3381 	VTNET_CORE_LOCK(sc);
3382 
3383 	if (add)
3384 		sc->vtnet_vlan_filter[idx] |= (1 << bit);
3385 	else
3386 		sc->vtnet_vlan_filter[idx] &= ~(1 << bit);
3387 
3388 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER &&
3389 	    vtnet_exec_vlan_filter(sc, add, tag) != 0) {
3390 		device_printf(sc->vtnet_dev,
3391 		    "cannot %s VLAN %d %s the host filter table\n",
3392 		    add ? "add" : "remove", tag, add ? "to" : "from");
3393 	}
3394 
3395 	VTNET_CORE_UNLOCK(sc);
3396 }
3397 
3398 static void
3399 vtnet_register_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
3400 {
3401 
3402 	if (ifp->if_softc != arg)
3403 		return;
3404 
3405 	vtnet_update_vlan_filter(arg, 1, tag);
3406 }
3407 
3408 static void
3409 vtnet_unregister_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
3410 {
3411 
3412 	if (ifp->if_softc != arg)
3413 		return;
3414 
3415 	vtnet_update_vlan_filter(arg, 0, tag);
3416 }
3417 
3418 static int
3419 vtnet_is_link_up(struct vtnet_softc *sc)
3420 {
3421 	device_t dev;
3422 	struct ifnet *ifp;
3423 	uint16_t status;
3424 
3425 	dev = sc->vtnet_dev;
3426 	ifp = sc->vtnet_ifp;
3427 
3428 	if ((ifp->if_capabilities & IFCAP_LINKSTATE) == 0)
3429 		status = VIRTIO_NET_S_LINK_UP;
3430 	else
3431 		status = virtio_read_dev_config_2(dev,
3432 		    offsetof(struct virtio_net_config, status));
3433 
3434 	return ((status & VIRTIO_NET_S_LINK_UP) != 0);
3435 }
3436 
3437 static void
3438 vtnet_update_link_status(struct vtnet_softc *sc)
3439 {
3440 	struct ifnet *ifp;
3441 	int link;
3442 
3443 	ifp = sc->vtnet_ifp;
3444 
3445 	VTNET_CORE_LOCK_ASSERT(sc);
3446 	link = vtnet_is_link_up(sc);
3447 
3448 	/* Notify if the link status has changed. */
3449 	if (link != 0 && sc->vtnet_link_active == 0) {
3450 		sc->vtnet_link_active = 1;
3451 		if_link_state_change(ifp, LINK_STATE_UP);
3452 	} else if (link == 0 && sc->vtnet_link_active != 0) {
3453 		sc->vtnet_link_active = 0;
3454 		if_link_state_change(ifp, LINK_STATE_DOWN);
3455 	}
3456 }
3457 
3458 static int
3459 vtnet_ifmedia_upd(struct ifnet *ifp)
3460 {
3461 	struct vtnet_softc *sc;
3462 	struct ifmedia *ifm;
3463 
3464 	sc = ifp->if_softc;
3465 	ifm = &sc->vtnet_media;
3466 
3467 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
3468 		return (EINVAL);
3469 
3470 	return (0);
3471 }
3472 
3473 static void
3474 vtnet_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
3475 {
3476 	struct vtnet_softc *sc;
3477 
3478 	sc = ifp->if_softc;
3479 
3480 	ifmr->ifm_status = IFM_AVALID;
3481 	ifmr->ifm_active = IFM_ETHER;
3482 
3483 	VTNET_CORE_LOCK(sc);
3484 	if (vtnet_is_link_up(sc) != 0) {
3485 		ifmr->ifm_status |= IFM_ACTIVE;
3486 		ifmr->ifm_active |= VTNET_MEDIATYPE;
3487 	} else
3488 		ifmr->ifm_active |= IFM_NONE;
3489 	VTNET_CORE_UNLOCK(sc);
3490 }
3491 
3492 static void
3493 vtnet_set_hwaddr(struct vtnet_softc *sc)
3494 {
3495 	device_t dev;
3496 	int i;
3497 
3498 	dev = sc->vtnet_dev;
3499 
3500 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_MAC) {
3501 		if (vtnet_ctrl_mac_cmd(sc, sc->vtnet_hwaddr) != 0)
3502 			device_printf(dev, "unable to set MAC address\n");
3503 	} else if (sc->vtnet_flags & VTNET_FLAG_MAC) {
3504 		for (i = 0; i < ETHER_ADDR_LEN; i++) {
3505 			virtio_write_dev_config_1(dev,
3506 			    offsetof(struct virtio_net_config, mac) + i,
3507 			    sc->vtnet_hwaddr[i]);
3508 		}
3509 	}
3510 }
3511 
3512 static void
3513 vtnet_get_hwaddr(struct vtnet_softc *sc)
3514 {
3515 	device_t dev;
3516 	int i;
3517 
3518 	dev = sc->vtnet_dev;
3519 
3520 	if ((sc->vtnet_flags & VTNET_FLAG_MAC) == 0) {
3521 		/*
3522 		 * Generate a random locally administered unicast address.
3523 		 *
3524 		 * It would be nice to generate the same MAC address across
3525 		 * reboots, but it seems all the hosts currently available
3526 		 * support the MAC feature, so this isn't too important.
3527 		 */
3528 		sc->vtnet_hwaddr[0] = 0xB2;
3529 		arc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1, 0);
3530 		vtnet_set_hwaddr(sc);
3531 		return;
3532 	}
3533 
3534 	for (i = 0; i < ETHER_ADDR_LEN; i++) {
3535 		sc->vtnet_hwaddr[i] = virtio_read_dev_config_1(dev,
3536 		    offsetof(struct virtio_net_config, mac) + i);
3537 	}
3538 }
3539 
3540 static void
3541 vtnet_vlan_tag_remove(struct mbuf *m)
3542 {
3543 	struct ether_vlan_header *evh;
3544 
3545 	evh = mtod(m, struct ether_vlan_header *);
3546 	m->m_pkthdr.ether_vtag = ntohs(evh->evl_tag);
3547 	m->m_flags |= M_VLANTAG;
3548 
3549 	/* Strip the 802.1Q header. */
3550 	bcopy((char *) evh, (char *) evh + ETHER_VLAN_ENCAP_LEN,
3551 	    ETHER_HDR_LEN - ETHER_TYPE_LEN);
3552 	m_adj(m, ETHER_VLAN_ENCAP_LEN);
3553 }
3554 
3555 static void
3556 vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *ctx,
3557     struct sysctl_oid_list *child, struct vtnet_rxq *rxq)
3558 {
3559 	struct sysctl_oid *node;
3560 	struct sysctl_oid_list *list;
3561 	struct vtnet_rxq_stats *stats;
3562 	char namebuf[16];
3563 
3564 	snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vtnrx_id);
3565 	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
3566 	    CTLFLAG_RD, NULL, "Receive Queue");
3567 	list = SYSCTL_CHILDREN(node);
3568 
3569 	stats = &rxq->vtnrx_stats;
3570 
3571 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD,
3572 	    &stats->vrxs_ipackets, "Receive packets");
3573 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD,
3574 	    &stats->vrxs_ibytes, "Receive bytes");
3575 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD,
3576 	    &stats->vrxs_iqdrops, "Receive drops");
3577 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD,
3578 	    &stats->vrxs_ierrors, "Receive errors");
3579 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
3580 	    &stats->vrxs_csum, "Receive checksum offloaded");
3581 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum_failed", CTLFLAG_RD,
3582 	    &stats->vrxs_csum_failed, "Receive checksum offload failed");
3583 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD,
3584 	    &stats->vrxs_rescheduled,
3585 	    "Receive interrupt handler rescheduled");
3586 }
3587 
3588 static void
3589 vtnet_setup_txq_sysctl(struct sysctl_ctx_list *ctx,
3590     struct sysctl_oid_list *child, struct vtnet_txq *txq)
3591 {
3592 	struct sysctl_oid *node;
3593 	struct sysctl_oid_list *list;
3594 	struct vtnet_txq_stats *stats;
3595 	char namebuf[16];
3596 
3597 	snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vtntx_id);
3598 	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
3599 	    CTLFLAG_RD, NULL, "Transmit Queue");
3600 	list = SYSCTL_CHILDREN(node);
3601 
3602 	stats = &txq->vtntx_stats;
3603 
3604 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD,
3605 	    &stats->vtxs_opackets, "Transmit packets");
3606 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD,
3607 	    &stats->vtxs_obytes, "Transmit bytes");
3608 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD,
3609 	    &stats->vtxs_omcasts, "Transmit multicasts");
3610 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
3611 	    &stats->vtxs_csum, "Transmit checksum offloaded");
3612 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD,
3613 	    &stats->vtxs_tso, "Transmit segmentation offloaded");
3614 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "collapsed", CTLFLAG_RD,
3615 	    &stats->vtxs_collapsed, "Transmit mbufs collapsed");
3616 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD,
3617 	    &stats->vtxs_rescheduled,
3618 	    "Transmit interrupt handler rescheduled");
3619 }
3620 
3621 static void
3622 vtnet_setup_queue_sysctl(struct vtnet_softc *sc)
3623 {
3624 	device_t dev;
3625 	struct sysctl_ctx_list *ctx;
3626 	struct sysctl_oid *tree;
3627 	struct sysctl_oid_list *child;
3628 	int i;
3629 
3630 	dev = sc->vtnet_dev;
3631 	ctx = device_get_sysctl_ctx(dev);
3632 	tree = device_get_sysctl_tree(dev);
3633 	child = SYSCTL_CHILDREN(tree);
3634 
3635 	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
3636 		vtnet_setup_rxq_sysctl(ctx, child, &sc->vtnet_rxqs[i]);
3637 		vtnet_setup_txq_sysctl(ctx, child, &sc->vtnet_txqs[i]);
3638 	}
3639 }
3640 
3641 static void
3642 vtnet_setup_stat_sysctl(struct sysctl_ctx_list *ctx,
3643     struct sysctl_oid_list *child, struct vtnet_softc *sc)
3644 {
3645 	struct vtnet_statistics *stats;
3646 
3647 	stats = &sc->vtnet_stats;
3648 
3649 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "mbuf_alloc_failed",
3650 	    CTLFLAG_RD, &stats->mbuf_alloc_failed,
3651 	    "Mbuf cluster allocation failures");
3652 
3653 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_frame_too_large",
3654 	    CTLFLAG_RD, &stats->rx_frame_too_large,
3655 	    "Received frame larger than the mbuf chain");
3656 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_enq_replacement_failed",
3657 	    CTLFLAG_RD, &stats->rx_enq_replacement_failed,
3658 	    "Enqueuing the replacement receive mbuf failed");
3659 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_mergeable_failed",
3660 	    CTLFLAG_RD, &stats->rx_mergeable_failed,
3661 	    "Mergeable buffers receive failures");
3662 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ethtype",
3663 	    CTLFLAG_RD, &stats->rx_csum_bad_ethtype,
3664 	    "Received checksum offloaded buffer with unsupported "
3665 	    "Ethernet type");
3666 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ipproto",
3667 	    CTLFLAG_RD, &stats->rx_csum_bad_ipproto,
3668 	    "Received checksum offloaded buffer with incorrect IP protocol");
3669 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_offset",
3670 	    CTLFLAG_RD, &stats->rx_csum_bad_offset,
3671 	    "Received checksum offloaded buffer with incorrect offset");
3672 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_proto",
3673 	    CTLFLAG_RD, &stats->rx_csum_bad_proto,
3674 	    "Received checksum offloaded buffer with incorrect protocol");
3675 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_failed",
3676 	    CTLFLAG_RD, &stats->rx_csum_failed,
3677 	    "Received buffer checksum offload failed");
3678 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_offloaded",
3679 	    CTLFLAG_RD, &stats->rx_csum_offloaded,
3680 	    "Received buffer checksum offload succeeded");
3681 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_task_rescheduled",
3682 	    CTLFLAG_RD, &stats->rx_task_rescheduled,
3683 	    "Times the receive interrupt task rescheduled itself");
3684 
3685 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_bad_ethtype",
3686 	    CTLFLAG_RD, &stats->tx_csum_bad_ethtype,
3687 	    "Aborted transmit of checksum offloaded buffer with unknown "
3688 	    "Ethernet type");
3689 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_bad_ethtype",
3690 	    CTLFLAG_RD, &stats->tx_tso_bad_ethtype,
3691 	    "Aborted transmit of TSO buffer with unknown Ethernet type");
3692 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_not_tcp",
3693 	    CTLFLAG_RD, &stats->tx_tso_not_tcp,
3694 	    "Aborted transmit of TSO buffer with non TCP protocol");
3695 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_offloaded",
3696 	    CTLFLAG_RD, &stats->tx_csum_offloaded,
3697 	    "Offloaded checksum of transmitted buffer");
3698 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_offloaded",
3699 	    CTLFLAG_RD, &stats->tx_tso_offloaded,
3700 	    "Segmentation offload of transmitted buffer");
3701 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_task_rescheduled",
3702 	    CTLFLAG_RD, &stats->tx_task_rescheduled,
3703 	    "Times the transmit interrupt task rescheduled itself");
3704 }
3705 
3706 static void
3707 vtnet_setup_sysctl(struct vtnet_softc *sc)
3708 {
3709 	device_t dev;
3710 	struct sysctl_ctx_list *ctx;
3711 	struct sysctl_oid *tree;
3712 	struct sysctl_oid_list *child;
3713 
3714 	dev = sc->vtnet_dev;
3715 	ctx = device_get_sysctl_ctx(dev);
3716 	tree = device_get_sysctl_tree(dev);
3717 	child = SYSCTL_CHILDREN(tree);
3718 
3719 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_vq_pairs",
3720 	    CTLFLAG_RD, &sc->vtnet_max_vq_pairs, 0,
3721 	    "Maximum number of supported virtqueue pairs");
3722 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "act_vq_pairs",
3723 	    CTLFLAG_RD, &sc->vtnet_act_vq_pairs, 0,
3724 	    "Number of active virtqueue pairs");
3725 
3726 	vtnet_setup_stat_sysctl(ctx, child, sc);
3727 }
3728 
3729 static int
3730 vtnet_rxq_enable_intr(struct vtnet_rxq *rxq)
3731 {
3732 
3733 	return (virtqueue_enable_intr(rxq->vtnrx_vq));
3734 }
3735 
3736 static void
3737 vtnet_rxq_disable_intr(struct vtnet_rxq *rxq)
3738 {
3739 
3740 	virtqueue_disable_intr(rxq->vtnrx_vq);
3741 }
3742 
3743 static int
3744 vtnet_txq_enable_intr(struct vtnet_txq *txq)
3745 {
3746 
3747 	return (virtqueue_postpone_intr(txq->vtntx_vq, VQ_POSTPONE_LONG));
3748 }
3749 
3750 static void
3751 vtnet_txq_disable_intr(struct vtnet_txq *txq)
3752 {
3753 
3754 	virtqueue_disable_intr(txq->vtntx_vq);
3755 }
3756 
3757 static void
3758 vtnet_enable_rx_interrupts(struct vtnet_softc *sc)
3759 {
3760 	int i;
3761 
3762 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
3763 		vtnet_rxq_enable_intr(&sc->vtnet_rxqs[i]);
3764 }
3765 
3766 static void
3767 vtnet_enable_tx_interrupts(struct vtnet_softc *sc)
3768 {
3769 	int i;
3770 
3771 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
3772 		vtnet_txq_enable_intr(&sc->vtnet_txqs[i]);
3773 }
3774 
3775 static void
3776 vtnet_enable_interrupts(struct vtnet_softc *sc)
3777 {
3778 
3779 	vtnet_enable_rx_interrupts(sc);
3780 	vtnet_enable_tx_interrupts(sc);
3781 }
3782 
3783 static void
3784 vtnet_disable_rx_interrupts(struct vtnet_softc *sc)
3785 {
3786 	int i;
3787 
3788 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
3789 		vtnet_rxq_disable_intr(&sc->vtnet_rxqs[i]);
3790 }
3791 
3792 static void
3793 vtnet_disable_tx_interrupts(struct vtnet_softc *sc)
3794 {
3795 	int i;
3796 
3797 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
3798 		vtnet_txq_disable_intr(&sc->vtnet_txqs[i]);
3799 }
3800 
3801 static void
3802 vtnet_disable_interrupts(struct vtnet_softc *sc)
3803 {
3804 
3805 	vtnet_disable_rx_interrupts(sc);
3806 	vtnet_disable_tx_interrupts(sc);
3807 }
3808 
3809 static int
3810 vtnet_tunable_int(struct vtnet_softc *sc, const char *knob, int def)
3811 {
3812 	char path[64];
3813 
3814 	snprintf(path, sizeof(path),
3815 	    "hw.vtnet.%d.%s", device_get_unit(sc->vtnet_dev), knob);
3816 	TUNABLE_INT_FETCH(path, &def);
3817 
3818 	return (def);
3819 }
3820