xref: /freebsd/sys/dev/virtio/network/if_vtnet.c (revision 3fc9e2c36555140de248a0b4def91bbfa44d7c2c)
1 /*-
2  * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice unmodified, this list of conditions, and the following
10  *    disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 /* Driver for VirtIO network devices. */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kernel.h>
35 #include <sys/sockio.h>
36 #include <sys/mbuf.h>
37 #include <sys/malloc.h>
38 #include <sys/module.h>
39 #include <sys/socket.h>
40 #include <sys/sysctl.h>
41 #include <sys/random.h>
42 #include <sys/sglist.h>
43 #include <sys/lock.h>
44 #include <sys/mutex.h>
45 #include <sys/taskqueue.h>
46 #include <sys/smp.h>
47 #include <machine/smp.h>
48 
49 #include <vm/uma.h>
50 
51 #include <net/ethernet.h>
52 #include <net/if.h>
53 #include <net/if_arp.h>
54 #include <net/if_dl.h>
55 #include <net/if_types.h>
56 #include <net/if_media.h>
57 #include <net/if_vlan_var.h>
58 
59 #include <net/bpf.h>
60 
61 #include <netinet/in_systm.h>
62 #include <netinet/in.h>
63 #include <netinet/ip.h>
64 #include <netinet/ip6.h>
65 #include <netinet6/ip6_var.h>
66 #include <netinet/udp.h>
67 #include <netinet/tcp.h>
68 #include <netinet/sctp.h>
69 
70 #include <machine/bus.h>
71 #include <machine/resource.h>
72 #include <sys/bus.h>
73 #include <sys/rman.h>
74 
75 #include <dev/virtio/virtio.h>
76 #include <dev/virtio/virtqueue.h>
77 #include <dev/virtio/network/virtio_net.h>
78 #include <dev/virtio/network/if_vtnetvar.h>
79 
80 #include "virtio_if.h"
81 
82 #include "opt_inet.h"
83 #include "opt_inet6.h"
84 
85 static int	vtnet_modevent(module_t, int, void *);
86 
87 static int	vtnet_probe(device_t);
88 static int	vtnet_attach(device_t);
89 static int	vtnet_detach(device_t);
90 static int	vtnet_suspend(device_t);
91 static int	vtnet_resume(device_t);
92 static int	vtnet_shutdown(device_t);
93 static int	vtnet_attach_completed(device_t);
94 static int	vtnet_config_change(device_t);
95 
96 static void	vtnet_negotiate_features(struct vtnet_softc *);
97 static void	vtnet_setup_features(struct vtnet_softc *);
98 static int	vtnet_init_rxq(struct vtnet_softc *, int);
99 static int	vtnet_init_txq(struct vtnet_softc *, int);
100 static int	vtnet_alloc_rxtx_queues(struct vtnet_softc *);
101 static void	vtnet_free_rxtx_queues(struct vtnet_softc *);
102 static int	vtnet_alloc_rx_filters(struct vtnet_softc *);
103 static void	vtnet_free_rx_filters(struct vtnet_softc *);
104 static int	vtnet_alloc_virtqueues(struct vtnet_softc *);
105 static int	vtnet_setup_interface(struct vtnet_softc *);
106 static int	vtnet_change_mtu(struct vtnet_softc *, int);
107 static int	vtnet_ioctl(struct ifnet *, u_long, caddr_t);
108 
109 static int	vtnet_rxq_populate(struct vtnet_rxq *);
110 static void	vtnet_rxq_free_mbufs(struct vtnet_rxq *);
111 static struct mbuf *
112 		vtnet_rx_alloc_buf(struct vtnet_softc *, int , struct mbuf **);
113 static int	vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *,
114 		    struct mbuf *, int);
115 static int	vtnet_rxq_replace_buf(struct vtnet_rxq *, struct mbuf *, int);
116 static int	vtnet_rxq_enqueue_buf(struct vtnet_rxq *, struct mbuf *);
117 static int	vtnet_rxq_new_buf(struct vtnet_rxq *);
118 static int	vtnet_rxq_csum(struct vtnet_rxq *, struct mbuf *,
119 		     struct virtio_net_hdr *);
120 static void	vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *, int);
121 static void	vtnet_rxq_discard_buf(struct vtnet_rxq *, struct mbuf *);
122 static int	vtnet_rxq_merged_eof(struct vtnet_rxq *, struct mbuf *, int);
123 static void	vtnet_rxq_input(struct vtnet_rxq *, struct mbuf *,
124 		    struct virtio_net_hdr *);
125 static int	vtnet_rxq_eof(struct vtnet_rxq *);
126 static void	vtnet_rx_vq_intr(void *);
127 static void	vtnet_rxq_tq_intr(void *, int);
128 
129 static void	vtnet_txq_free_mbufs(struct vtnet_txq *);
130 static int	vtnet_txq_offload_ctx(struct vtnet_txq *, struct mbuf *,
131 		    int *, int *, int *);
132 static int	vtnet_txq_offload_tso(struct vtnet_txq *, struct mbuf *, int,
133 		    int, struct virtio_net_hdr *);
134 static struct mbuf *
135 		vtnet_txq_offload(struct vtnet_txq *, struct mbuf *,
136 		    struct virtio_net_hdr *);
137 static int	vtnet_txq_enqueue_buf(struct vtnet_txq *, struct mbuf **,
138 		    struct vtnet_tx_header *);
139 static int	vtnet_txq_encap(struct vtnet_txq *, struct mbuf **);
140 #ifdef VTNET_LEGACY_TX
141 static void	vtnet_start_locked(struct vtnet_txq *, struct ifnet *);
142 static void	vtnet_start(struct ifnet *);
143 #else
144 static int	vtnet_txq_mq_start_locked(struct vtnet_txq *, struct mbuf *);
145 static int	vtnet_txq_mq_start(struct ifnet *, struct mbuf *);
146 static void	vtnet_txq_tq_deferred(void *, int);
147 #endif
148 static void	vtnet_txq_tq_intr(void *, int);
149 static void	vtnet_txq_eof(struct vtnet_txq *);
150 static void	vtnet_tx_vq_intr(void *);
151 static void	vtnet_tx_start_all(struct vtnet_softc *);
152 
153 #ifndef VTNET_LEGACY_TX
154 static void	vtnet_qflush(struct ifnet *);
155 #endif
156 
157 static int	vtnet_watchdog(struct vtnet_txq *);
158 static void	vtnet_rxq_accum_stats(struct vtnet_rxq *,
159 		    struct vtnet_rxq_stats *);
160 static void	vtnet_txq_accum_stats(struct vtnet_txq *,
161 		    struct vtnet_txq_stats *);
162 static void	vtnet_accumulate_stats(struct vtnet_softc *);
163 static void	vtnet_tick(void *);
164 
165 static void	vtnet_start_taskqueues(struct vtnet_softc *);
166 static void	vtnet_free_taskqueues(struct vtnet_softc *);
167 static void	vtnet_drain_taskqueues(struct vtnet_softc *);
168 
169 static void	vtnet_drain_rxtx_queues(struct vtnet_softc *);
170 static void	vtnet_stop_rendezvous(struct vtnet_softc *);
171 static void	vtnet_stop(struct vtnet_softc *);
172 static int	vtnet_virtio_reinit(struct vtnet_softc *);
173 static void	vtnet_init_rx_filters(struct vtnet_softc *);
174 static int	vtnet_init_rx_queues(struct vtnet_softc *);
175 static int	vtnet_init_tx_queues(struct vtnet_softc *);
176 static int	vtnet_init_rxtx_queues(struct vtnet_softc *);
177 static void	vtnet_set_active_vq_pairs(struct vtnet_softc *);
178 static int	vtnet_reinit(struct vtnet_softc *);
179 static void	vtnet_init_locked(struct vtnet_softc *);
180 static void	vtnet_init(void *);
181 
182 static void	vtnet_free_ctrl_vq(struct vtnet_softc *);
183 static void	vtnet_exec_ctrl_cmd(struct vtnet_softc *, void *,
184 		    struct sglist *, int, int);
185 static int	vtnet_ctrl_mac_cmd(struct vtnet_softc *, uint8_t *);
186 static int	vtnet_ctrl_mq_cmd(struct vtnet_softc *, uint16_t);
187 static int	vtnet_ctrl_rx_cmd(struct vtnet_softc *, int, int);
188 static int	vtnet_set_promisc(struct vtnet_softc *, int);
189 static int	vtnet_set_allmulti(struct vtnet_softc *, int);
190 static void	vtnet_attach_disable_promisc(struct vtnet_softc *);
191 static void	vtnet_rx_filter(struct vtnet_softc *);
192 static void	vtnet_rx_filter_mac(struct vtnet_softc *);
193 static int	vtnet_exec_vlan_filter(struct vtnet_softc *, int, uint16_t);
194 static void	vtnet_rx_filter_vlan(struct vtnet_softc *);
195 static void	vtnet_update_vlan_filter(struct vtnet_softc *, int, uint16_t);
196 static void	vtnet_register_vlan(void *, struct ifnet *, uint16_t);
197 static void	vtnet_unregister_vlan(void *, struct ifnet *, uint16_t);
198 
199 static int	vtnet_is_link_up(struct vtnet_softc *);
200 static void	vtnet_update_link_status(struct vtnet_softc *);
201 static int	vtnet_ifmedia_upd(struct ifnet *);
202 static void	vtnet_ifmedia_sts(struct ifnet *, struct ifmediareq *);
203 static void	vtnet_get_hwaddr(struct vtnet_softc *);
204 static void	vtnet_set_hwaddr(struct vtnet_softc *);
205 static void	vtnet_vlan_tag_remove(struct mbuf *);
206 
207 static void	vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *,
208 		    struct sysctl_oid_list *, struct vtnet_rxq *);
209 static void	vtnet_setup_txq_sysctl(struct sysctl_ctx_list *,
210 		    struct sysctl_oid_list *, struct vtnet_txq *);
211 static void	vtnet_setup_queue_sysctl(struct vtnet_softc *);
212 static void	vtnet_setup_sysctl(struct vtnet_softc *);
213 
214 static int	vtnet_rxq_enable_intr(struct vtnet_rxq *);
215 static void	vtnet_rxq_disable_intr(struct vtnet_rxq *);
216 static int	vtnet_txq_enable_intr(struct vtnet_txq *);
217 static void	vtnet_txq_disable_intr(struct vtnet_txq *);
218 static void	vtnet_enable_rx_interrupts(struct vtnet_softc *);
219 static void	vtnet_enable_tx_interrupts(struct vtnet_softc *);
220 static void	vtnet_enable_interrupts(struct vtnet_softc *);
221 static void	vtnet_disable_rx_interrupts(struct vtnet_softc *);
222 static void	vtnet_disable_tx_interrupts(struct vtnet_softc *);
223 static void	vtnet_disable_interrupts(struct vtnet_softc *);
224 
225 static int	vtnet_tunable_int(struct vtnet_softc *, const char *, int);
226 
227 /* Tunables. */
228 static int vtnet_csum_disable = 0;
229 TUNABLE_INT("hw.vtnet.csum_disable", &vtnet_csum_disable);
230 static int vtnet_tso_disable = 0;
231 TUNABLE_INT("hw.vtnet.tso_disable", &vtnet_tso_disable);
232 static int vtnet_lro_disable = 0;
233 TUNABLE_INT("hw.vtnet.lro_disable", &vtnet_lro_disable);
234 static int vtnet_mq_disable = 0;
235 TUNABLE_INT("hw.vtnet.mq_disable", &vtnet_mq_disable);
236 static int vtnet_mq_max_pairs = 0;
237 TUNABLE_INT("hw.vtnet.mq_max_pairs", &vtnet_mq_max_pairs);
238 static int vtnet_rx_process_limit = 512;
239 TUNABLE_INT("hw.vtnet.rx_process_limit", &vtnet_rx_process_limit);
240 
241 /*
242  * Reducing the number of transmit completed interrupts can improve
243  * performance. To do so, the define below keeps the Tx vq interrupt
244  * disabled and adds calls to vtnet_txeof() in the start and watchdog
245  * paths. The price to pay for this is the m_free'ing of transmitted
246  * mbufs may be delayed until the watchdog fires.
247  *
248  * BMV: Reintroduce this later as a run-time option, if it makes
249  * sense after the EVENT_IDX feature is supported.
250  *
251  * #define VTNET_TX_INTR_MODERATION
252  */
253 
254 static uma_zone_t vtnet_tx_header_zone;
255 
256 static struct virtio_feature_desc vtnet_feature_desc[] = {
257 	{ VIRTIO_NET_F_CSUM,		"TxChecksum"	},
258 	{ VIRTIO_NET_F_GUEST_CSUM,	"RxChecksum"	},
259 	{ VIRTIO_NET_F_MAC,		"MacAddress"	},
260 	{ VIRTIO_NET_F_GSO,		"TxAllGSO"	},
261 	{ VIRTIO_NET_F_GUEST_TSO4,	"RxTSOv4"	},
262 	{ VIRTIO_NET_F_GUEST_TSO6,	"RxTSOv6"	},
263 	{ VIRTIO_NET_F_GUEST_ECN,	"RxECN"		},
264 	{ VIRTIO_NET_F_GUEST_UFO,	"RxUFO"		},
265 	{ VIRTIO_NET_F_HOST_TSO4,	"TxTSOv4"	},
266 	{ VIRTIO_NET_F_HOST_TSO6,	"TxTSOv6"	},
267 	{ VIRTIO_NET_F_HOST_ECN,	"TxTSOECN"	},
268 	{ VIRTIO_NET_F_HOST_UFO,	"TxUFO"		},
269 	{ VIRTIO_NET_F_MRG_RXBUF,	"MrgRxBuf"	},
270 	{ VIRTIO_NET_F_STATUS,		"Status"	},
271 	{ VIRTIO_NET_F_CTRL_VQ,		"ControlVq"	},
272 	{ VIRTIO_NET_F_CTRL_RX,		"RxMode"	},
273 	{ VIRTIO_NET_F_CTRL_VLAN,	"VLanFilter"	},
274 	{ VIRTIO_NET_F_CTRL_RX_EXTRA,	"RxModeExtra"	},
275 	{ VIRTIO_NET_F_GUEST_ANNOUNCE,	"GuestAnnounce"	},
276 	{ VIRTIO_NET_F_MQ,		"Multiqueue"	},
277 	{ VIRTIO_NET_F_CTRL_MAC_ADDR,	"SetMacAddress"	},
278 
279 	{ 0, NULL }
280 };
281 
282 static device_method_t vtnet_methods[] = {
283 	/* Device methods. */
284 	DEVMETHOD(device_probe,			vtnet_probe),
285 	DEVMETHOD(device_attach,		vtnet_attach),
286 	DEVMETHOD(device_detach,		vtnet_detach),
287 	DEVMETHOD(device_suspend,		vtnet_suspend),
288 	DEVMETHOD(device_resume,		vtnet_resume),
289 	DEVMETHOD(device_shutdown,		vtnet_shutdown),
290 
291 	/* VirtIO methods. */
292 	DEVMETHOD(virtio_attach_completed,	vtnet_attach_completed),
293 	DEVMETHOD(virtio_config_change,		vtnet_config_change),
294 
295 	DEVMETHOD_END
296 };
297 
298 static driver_t vtnet_driver = {
299 	"vtnet",
300 	vtnet_methods,
301 	sizeof(struct vtnet_softc)
302 };
303 static devclass_t vtnet_devclass;
304 
305 DRIVER_MODULE(vtnet, virtio_pci, vtnet_driver, vtnet_devclass,
306     vtnet_modevent, 0);
307 MODULE_VERSION(vtnet, 1);
308 MODULE_DEPEND(vtnet, virtio, 1, 1, 1);
309 
310 static int
311 vtnet_modevent(module_t mod, int type, void *unused)
312 {
313 	int error;
314 
315 	error = 0;
316 
317 	switch (type) {
318 	case MOD_LOAD:
319 		vtnet_tx_header_zone = uma_zcreate("vtnet_tx_hdr",
320 		    sizeof(struct vtnet_tx_header),
321 		    NULL, NULL, NULL, NULL, 0, 0);
322 		break;
323 	case MOD_QUIESCE:
324 	case MOD_UNLOAD:
325 		if (uma_zone_get_cur(vtnet_tx_header_zone) > 0)
326 			error = EBUSY;
327 		else if (type == MOD_UNLOAD) {
328 			uma_zdestroy(vtnet_tx_header_zone);
329 			vtnet_tx_header_zone = NULL;
330 		}
331 		break;
332 	case MOD_SHUTDOWN:
333 		break;
334 	default:
335 		error = EOPNOTSUPP;
336 		break;
337 	}
338 
339 	return (error);
340 }
341 
342 static int
343 vtnet_probe(device_t dev)
344 {
345 
346 	if (virtio_get_device_type(dev) != VIRTIO_ID_NETWORK)
347 		return (ENXIO);
348 
349 	device_set_desc(dev, "VirtIO Networking Adapter");
350 
351 	return (BUS_PROBE_DEFAULT);
352 }
353 
354 static int
355 vtnet_attach(device_t dev)
356 {
357 	struct vtnet_softc *sc;
358 	int error;
359 
360 	sc = device_get_softc(dev);
361 	sc->vtnet_dev = dev;
362 
363 	/* Register our feature descriptions. */
364 	virtio_set_feature_desc(dev, vtnet_feature_desc);
365 
366 	VTNET_CORE_LOCK_INIT(sc);
367 	callout_init_mtx(&sc->vtnet_tick_ch, VTNET_CORE_MTX(sc), 0);
368 
369 	vtnet_setup_sysctl(sc);
370 	vtnet_setup_features(sc);
371 
372 	error = vtnet_alloc_rx_filters(sc);
373 	if (error) {
374 		device_printf(dev, "cannot allocate Rx filters\n");
375 		goto fail;
376 	}
377 
378 	error = vtnet_alloc_rxtx_queues(sc);
379 	if (error) {
380 		device_printf(dev, "cannot allocate queues\n");
381 		goto fail;
382 	}
383 
384 	error = vtnet_alloc_virtqueues(sc);
385 	if (error) {
386 		device_printf(dev, "cannot allocate virtqueues\n");
387 		goto fail;
388 	}
389 
390 	error = vtnet_setup_interface(sc);
391 	if (error) {
392 		device_printf(dev, "cannot setup interface\n");
393 		goto fail;
394 	}
395 
396 	error = virtio_setup_intr(dev, INTR_TYPE_NET);
397 	if (error) {
398 		device_printf(dev, "cannot setup virtqueue interrupts\n");
399 		/* BMV: This will crash if during boot! */
400 		ether_ifdetach(sc->vtnet_ifp);
401 		goto fail;
402 	}
403 
404 	vtnet_start_taskqueues(sc);
405 
406 fail:
407 	if (error)
408 		vtnet_detach(dev);
409 
410 	return (error);
411 }
412 
413 static int
414 vtnet_detach(device_t dev)
415 {
416 	struct vtnet_softc *sc;
417 	struct ifnet *ifp;
418 
419 	sc = device_get_softc(dev);
420 	ifp = sc->vtnet_ifp;
421 
422 	if (device_is_attached(dev)) {
423 		VTNET_CORE_LOCK(sc);
424 		vtnet_stop(sc);
425 		VTNET_CORE_UNLOCK(sc);
426 
427 		callout_drain(&sc->vtnet_tick_ch);
428 		vtnet_drain_taskqueues(sc);
429 
430 		ether_ifdetach(ifp);
431 	}
432 
433 	vtnet_free_taskqueues(sc);
434 
435 	if (sc->vtnet_vlan_attach != NULL) {
436 		EVENTHANDLER_DEREGISTER(vlan_config, sc->vtnet_vlan_attach);
437 		sc->vtnet_vlan_attach = NULL;
438 	}
439 	if (sc->vtnet_vlan_detach != NULL) {
440 		EVENTHANDLER_DEREGISTER(vlan_unconfg, sc->vtnet_vlan_detach);
441 		sc->vtnet_vlan_detach = NULL;
442 	}
443 
444 	ifmedia_removeall(&sc->vtnet_media);
445 
446 	if (ifp != NULL) {
447 		if_free(ifp);
448 		sc->vtnet_ifp = NULL;
449 	}
450 
451 	vtnet_free_rxtx_queues(sc);
452 	vtnet_free_rx_filters(sc);
453 
454 	if (sc->vtnet_ctrl_vq != NULL)
455 		vtnet_free_ctrl_vq(sc);
456 
457 	VTNET_CORE_LOCK_DESTROY(sc);
458 
459 	return (0);
460 }
461 
462 static int
463 vtnet_suspend(device_t dev)
464 {
465 	struct vtnet_softc *sc;
466 
467 	sc = device_get_softc(dev);
468 
469 	VTNET_CORE_LOCK(sc);
470 	vtnet_stop(sc);
471 	sc->vtnet_flags |= VTNET_FLAG_SUSPENDED;
472 	VTNET_CORE_UNLOCK(sc);
473 
474 	return (0);
475 }
476 
477 static int
478 vtnet_resume(device_t dev)
479 {
480 	struct vtnet_softc *sc;
481 	struct ifnet *ifp;
482 
483 	sc = device_get_softc(dev);
484 	ifp = sc->vtnet_ifp;
485 
486 	VTNET_CORE_LOCK(sc);
487 	if (ifp->if_flags & IFF_UP)
488 		vtnet_init_locked(sc);
489 	sc->vtnet_flags &= ~VTNET_FLAG_SUSPENDED;
490 	VTNET_CORE_UNLOCK(sc);
491 
492 	return (0);
493 }
494 
495 static int
496 vtnet_shutdown(device_t dev)
497 {
498 
499 	/*
500 	 * Suspend already does all of what we need to
501 	 * do here; we just never expect to be resumed.
502 	 */
503 	return (vtnet_suspend(dev));
504 }
505 
506 static int
507 vtnet_attach_completed(device_t dev)
508 {
509 
510 	vtnet_attach_disable_promisc(device_get_softc(dev));
511 
512 	return (0);
513 }
514 
515 static int
516 vtnet_config_change(device_t dev)
517 {
518 	struct vtnet_softc *sc;
519 
520 	sc = device_get_softc(dev);
521 
522 	VTNET_CORE_LOCK(sc);
523 	vtnet_update_link_status(sc);
524 	if (sc->vtnet_link_active != 0)
525 		vtnet_tx_start_all(sc);
526 	VTNET_CORE_UNLOCK(sc);
527 
528 	return (0);
529 }
530 
531 static void
532 vtnet_negotiate_features(struct vtnet_softc *sc)
533 {
534 	device_t dev;
535 	uint64_t mask, features;
536 
537 	dev = sc->vtnet_dev;
538 	mask = 0;
539 
540 	/*
541 	 * TSO and LRO are only available when their corresponding checksum
542 	 * offload feature is also negotiated.
543 	 */
544 	if (vtnet_tunable_int(sc, "csum_disable", vtnet_csum_disable)) {
545 		mask |= VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM;
546 		mask |= VTNET_TSO_FEATURES | VTNET_LRO_FEATURES;
547 	}
548 	if (vtnet_tunable_int(sc, "tso_disable", vtnet_tso_disable))
549 		mask |= VTNET_TSO_FEATURES;
550 	if (vtnet_tunable_int(sc, "lro_disable", vtnet_lro_disable))
551 		mask |= VTNET_LRO_FEATURES;
552 	if (vtnet_tunable_int(sc, "mq_disable", vtnet_mq_disable))
553 		mask |= VIRTIO_NET_F_MQ;
554 #ifdef VTNET_LEGACY_TX
555 	mask |= VIRTIO_NET_F_MQ;
556 #endif
557 
558 	features = VTNET_FEATURES & ~mask;
559 	sc->vtnet_features = virtio_negotiate_features(dev, features);
560 
561 	if (virtio_with_feature(dev, VTNET_LRO_FEATURES) == 0)
562 		return;
563 	if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF))
564 		return;
565 
566 	/*
567 	 * LRO without mergeable buffers requires special care. This is not
568 	 * ideal because every receive buffer must be large enough to hold
569 	 * the maximum TCP packet, the Ethernet header, and the header. This
570 	 * requires up to 34 descriptors with MCLBYTES clusters. If we do
571 	 * not have indirect descriptors, LRO is disabled since the virtqueue
572 	 * will not contain very many receive buffers.
573 	 */
574 	if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC) == 0) {
575 		device_printf(dev,
576 		    "LRO disabled due to both mergeable buffers and indirect "
577 		    "descriptors not negotiated\n");
578 
579 		features &= ~VTNET_LRO_FEATURES;
580 		sc->vtnet_features = virtio_negotiate_features(dev, features);
581 	} else
582 		sc->vtnet_flags |= VTNET_FLAG_LRO_NOMRG;
583 }
584 
585 static void
586 vtnet_setup_features(struct vtnet_softc *sc)
587 {
588 	device_t dev;
589 	int max_pairs, max;
590 
591 	dev = sc->vtnet_dev;
592 
593 	vtnet_negotiate_features(sc);
594 
595 	if (virtio_with_feature(dev, VIRTIO_RING_F_EVENT_IDX))
596 		sc->vtnet_flags |= VTNET_FLAG_EVENT_IDX;
597 
598 	if (virtio_with_feature(dev, VIRTIO_NET_F_MAC)) {
599 		/* This feature should always be negotiated. */
600 		sc->vtnet_flags |= VTNET_FLAG_MAC;
601 	}
602 
603 	if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) {
604 		sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS;
605 		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
606 	} else
607 		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
608 
609 	if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VQ)) {
610 		sc->vtnet_flags |= VTNET_FLAG_CTRL_VQ;
611 
612 		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX))
613 			sc->vtnet_flags |= VTNET_FLAG_CTRL_RX;
614 		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VLAN))
615 			sc->vtnet_flags |= VTNET_FLAG_VLAN_FILTER;
616 		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_MAC_ADDR))
617 			sc->vtnet_flags |= VTNET_FLAG_CTRL_MAC;
618 	}
619 
620 	if (virtio_with_feature(dev, VIRTIO_NET_F_MQ) &&
621 	    sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
622 		max_pairs = virtio_read_dev_config_2(dev,
623 		    offsetof(struct virtio_net_config, max_virtqueue_pairs));
624 		if (max_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
625 		    max_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX)
626 			max_pairs = 1;
627 	} else
628 		max_pairs = 1;
629 
630 	if (max_pairs > 1) {
631 		/*
632 		 * Limit the maximum number of queue pairs to the number of
633 		 * CPUs or the configured maximum. The actual number of
634 		 * queues that get used may be less.
635 		 */
636 		max = vtnet_tunable_int(sc, "mq_max_pairs", vtnet_mq_max_pairs);
637 		if (max > 0 && max_pairs > max)
638 			max_pairs = max;
639 		if (max_pairs > mp_ncpus)
640 			max_pairs = mp_ncpus;
641 		if (max_pairs > VTNET_MAX_QUEUE_PAIRS)
642 			max_pairs = VTNET_MAX_QUEUE_PAIRS;
643 		if (max_pairs > 1)
644 			sc->vtnet_flags |= VTNET_FLAG_MULTIQ;
645 	}
646 
647 	sc->vtnet_max_vq_pairs = max_pairs;
648 }
649 
650 static int
651 vtnet_init_rxq(struct vtnet_softc *sc, int id)
652 {
653 	struct vtnet_rxq *rxq;
654 
655 	rxq = &sc->vtnet_rxqs[id];
656 
657 	snprintf(rxq->vtnrx_name, sizeof(rxq->vtnrx_name), "%s-rx%d",
658 	    device_get_nameunit(sc->vtnet_dev), id);
659 	mtx_init(&rxq->vtnrx_mtx, rxq->vtnrx_name, NULL, MTX_DEF);
660 
661 	rxq->vtnrx_sc = sc;
662 	rxq->vtnrx_id = id;
663 
664 	TASK_INIT(&rxq->vtnrx_intrtask, 0, vtnet_rxq_tq_intr, rxq);
665 	rxq->vtnrx_tq = taskqueue_create(rxq->vtnrx_name, M_NOWAIT,
666 	    taskqueue_thread_enqueue, &rxq->vtnrx_tq);
667 
668 	return (rxq->vtnrx_tq == NULL ? ENOMEM : 0);
669 }
670 
671 static int
672 vtnet_init_txq(struct vtnet_softc *sc, int id)
673 {
674 	struct vtnet_txq *txq;
675 
676 	txq = &sc->vtnet_txqs[id];
677 
678 	snprintf(txq->vtntx_name, sizeof(txq->vtntx_name), "%s-tx%d",
679 	    device_get_nameunit(sc->vtnet_dev), id);
680 	mtx_init(&txq->vtntx_mtx, txq->vtntx_name, NULL, MTX_DEF);
681 
682 	txq->vtntx_sc = sc;
683 	txq->vtntx_id = id;
684 
685 #ifndef VTNET_LEGACY_TX
686 	txq->vtntx_br = buf_ring_alloc(VTNET_DEFAULT_BUFRING_SIZE, M_DEVBUF,
687 	    M_NOWAIT, &txq->vtntx_mtx);
688 	if (txq->vtntx_br == NULL)
689 		return (ENOMEM);
690 
691 	TASK_INIT(&txq->vtntx_defrtask, 0, vtnet_txq_tq_deferred, txq);
692 #endif
693 	TASK_INIT(&txq->vtntx_intrtask, 0, vtnet_txq_tq_intr, txq);
694 	txq->vtntx_tq = taskqueue_create(txq->vtntx_name, M_NOWAIT,
695 	    taskqueue_thread_enqueue, &txq->vtntx_tq);
696 	if (txq->vtntx_tq == NULL)
697 		return (ENOMEM);
698 
699 	return (0);
700 }
701 
702 static int
703 vtnet_alloc_rxtx_queues(struct vtnet_softc *sc)
704 {
705 	int i, npairs, error;
706 
707 	npairs = sc->vtnet_max_vq_pairs;
708 
709 	sc->vtnet_rxqs = malloc(sizeof(struct vtnet_rxq) * npairs, M_DEVBUF,
710 	    M_NOWAIT | M_ZERO);
711 	sc->vtnet_txqs = malloc(sizeof(struct vtnet_txq) * npairs, M_DEVBUF,
712 	    M_NOWAIT | M_ZERO);
713 	if (sc->vtnet_rxqs == NULL || sc->vtnet_txqs == NULL)
714 		return (ENOMEM);
715 
716 	for (i = 0; i < npairs; i++) {
717 		error = vtnet_init_rxq(sc, i);
718 		if (error)
719 			return (error);
720 		error = vtnet_init_txq(sc, i);
721 		if (error)
722 			return (error);
723 	}
724 
725 	vtnet_setup_queue_sysctl(sc);
726 
727 	return (0);
728 }
729 
730 static void
731 vtnet_destroy_rxq(struct vtnet_rxq *rxq)
732 {
733 
734 	rxq->vtnrx_sc = NULL;
735 	rxq->vtnrx_id = -1;
736 
737 	if (mtx_initialized(&rxq->vtnrx_mtx) != 0)
738 		mtx_destroy(&rxq->vtnrx_mtx);
739 }
740 
741 static void
742 vtnet_destroy_txq(struct vtnet_txq *txq)
743 {
744 
745 	txq->vtntx_sc = NULL;
746 	txq->vtntx_id = -1;
747 
748 #ifndef VTNET_LEGACY_TX
749 	if (txq->vtntx_br != NULL) {
750 		buf_ring_free(txq->vtntx_br, M_DEVBUF);
751 		txq->vtntx_br = NULL;
752 	}
753 #endif
754 
755 	if (mtx_initialized(&txq->vtntx_mtx) != 0)
756 		mtx_destroy(&txq->vtntx_mtx);
757 }
758 
759 static void
760 vtnet_free_rxtx_queues(struct vtnet_softc *sc)
761 {
762 	int i;
763 
764 	if (sc->vtnet_rxqs != NULL) {
765 		for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
766 			vtnet_destroy_rxq(&sc->vtnet_rxqs[i]);
767 		free(sc->vtnet_rxqs, M_DEVBUF);
768 		sc->vtnet_rxqs = NULL;
769 	}
770 
771 	if (sc->vtnet_txqs != NULL) {
772 		for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
773 			vtnet_destroy_txq(&sc->vtnet_txqs[i]);
774 		free(sc->vtnet_txqs, M_DEVBUF);
775 		sc->vtnet_txqs = NULL;
776 	}
777 }
778 
779 static int
780 vtnet_alloc_rx_filters(struct vtnet_softc *sc)
781 {
782 
783 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
784 		sc->vtnet_mac_filter = malloc(sizeof(struct vtnet_mac_filter),
785 		    M_DEVBUF, M_NOWAIT | M_ZERO);
786 		if (sc->vtnet_mac_filter == NULL)
787 			return (ENOMEM);
788 	}
789 
790 	if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
791 		sc->vtnet_vlan_filter = malloc(sizeof(uint32_t) *
792 		    VTNET_VLAN_FILTER_NWORDS, M_DEVBUF, M_NOWAIT | M_ZERO);
793 		if (sc->vtnet_vlan_filter == NULL)
794 			return (ENOMEM);
795 	}
796 
797 	return (0);
798 }
799 
800 static void
801 vtnet_free_rx_filters(struct vtnet_softc *sc)
802 {
803 
804 	if (sc->vtnet_mac_filter != NULL) {
805 		free(sc->vtnet_mac_filter, M_DEVBUF);
806 		sc->vtnet_mac_filter = NULL;
807 	}
808 
809 	if (sc->vtnet_vlan_filter != NULL) {
810 		free(sc->vtnet_vlan_filter, M_DEVBUF);
811 		sc->vtnet_vlan_filter = NULL;
812 	}
813 }
814 
815 static int
816 vtnet_alloc_virtqueues(struct vtnet_softc *sc)
817 {
818 	device_t dev;
819 	struct vq_alloc_info *info;
820 	struct vtnet_rxq *rxq;
821 	struct vtnet_txq *txq;
822 	int i, idx, flags, nvqs, rxsegs, error;
823 
824 	dev = sc->vtnet_dev;
825 	flags = 0;
826 
827 	/*
828 	 * Indirect descriptors are not needed for the Rx virtqueue when
829 	 * mergeable buffers are negotiated. The header is placed inline
830 	 * with the data, not in a separate descriptor, and mbuf clusters
831 	 * are always physically contiguous.
832 	 */
833 	if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS)
834 		rxsegs = 0;
835 	else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
836 		rxsegs = VTNET_MAX_RX_SEGS;
837 	else
838 		rxsegs = VTNET_MIN_RX_SEGS;
839 
840 	nvqs = sc->vtnet_max_vq_pairs * 2;
841 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ)
842 		nvqs++;
843 
844 	info = malloc(sizeof(struct vq_alloc_info) * nvqs , M_TEMP, M_NOWAIT);
845 	if (info == NULL)
846 		return (ENOMEM);
847 
848 	for (i = 0, idx = 0; i < sc->vtnet_max_vq_pairs; i++, idx+=2) {
849 		rxq = &sc->vtnet_rxqs[i];
850 		VQ_ALLOC_INFO_INIT(&info[idx], rxsegs,
851 		    vtnet_rx_vq_intr, rxq, &rxq->vtnrx_vq,
852 		    "%s-%d rx", device_get_nameunit(dev), rxq->vtnrx_id);
853 
854 		txq = &sc->vtnet_txqs[i];
855 		VQ_ALLOC_INFO_INIT(&info[idx+1], VTNET_MAX_TX_SEGS,
856 		    vtnet_tx_vq_intr, txq, &txq->vtntx_vq,
857 		    "%s-%d tx", device_get_nameunit(dev), txq->vtntx_id);
858 	}
859 
860 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
861 		VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, NULL,
862 		    &sc->vtnet_ctrl_vq, "%s ctrl", device_get_nameunit(dev));
863 	}
864 
865 	/*
866 	 * Enable interrupt binding if this is multiqueue. This only matters
867 	 * when per-vq MSIX is available.
868 	 */
869 	if (sc->vtnet_flags & VTNET_FLAG_MULTIQ)
870 		flags |= 0;
871 
872 	error = virtio_alloc_virtqueues(dev, flags, nvqs, info);
873 	free(info, M_TEMP);
874 
875 	return (error);
876 }
877 
878 static int
879 vtnet_setup_interface(struct vtnet_softc *sc)
880 {
881 	device_t dev;
882 	struct ifnet *ifp;
883 	int limit;
884 
885 	dev = sc->vtnet_dev;
886 
887 	ifp = sc->vtnet_ifp = if_alloc(IFT_ETHER);
888 	if (ifp == NULL) {
889 		device_printf(dev, "cannot allocate ifnet structure\n");
890 		return (ENOSPC);
891 	}
892 
893 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
894 	if_initbaudrate(ifp, IF_Gbps(10));	/* Approx. */
895 	ifp->if_softc = sc;
896 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
897 	ifp->if_init = vtnet_init;
898 	ifp->if_ioctl = vtnet_ioctl;
899 
900 #ifndef VTNET_LEGACY_TX
901 	ifp->if_transmit = vtnet_txq_mq_start;
902 	ifp->if_qflush = vtnet_qflush;
903 #else
904 	struct virtqueue *vq = sc->vtnet_txqs[0].vtntx_vq;
905 	ifp->if_start = vtnet_start;
906 	IFQ_SET_MAXLEN(&ifp->if_snd, virtqueue_size(vq) - 1);
907 	ifp->if_snd.ifq_drv_maxlen = virtqueue_size(vq) - 1;
908 	IFQ_SET_READY(&ifp->if_snd);
909 #endif
910 
911 	ifmedia_init(&sc->vtnet_media, IFM_IMASK, vtnet_ifmedia_upd,
912 	    vtnet_ifmedia_sts);
913 	ifmedia_add(&sc->vtnet_media, VTNET_MEDIATYPE, 0, NULL);
914 	ifmedia_set(&sc->vtnet_media, VTNET_MEDIATYPE);
915 
916 	/* Read (or generate) the MAC address for the adapter. */
917 	vtnet_get_hwaddr(sc);
918 
919 	ether_ifattach(ifp, sc->vtnet_hwaddr);
920 
921 	if (virtio_with_feature(dev, VIRTIO_NET_F_STATUS))
922 		ifp->if_capabilities |= IFCAP_LINKSTATE;
923 
924 	/* Tell the upper layer(s) we support long frames. */
925 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
926 	ifp->if_capabilities |= IFCAP_JUMBO_MTU | IFCAP_VLAN_MTU;
927 
928 	if (virtio_with_feature(dev, VIRTIO_NET_F_CSUM)) {
929 		ifp->if_capabilities |= IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6;
930 
931 		if (virtio_with_feature(dev, VIRTIO_NET_F_GSO)) {
932 			ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_TSO6;
933 			sc->vtnet_flags |= VTNET_FLAG_TSO_ECN;
934 		} else {
935 			if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4))
936 				ifp->if_capabilities |= IFCAP_TSO4;
937 			if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
938 				ifp->if_capabilities |= IFCAP_TSO6;
939 			if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN))
940 				sc->vtnet_flags |= VTNET_FLAG_TSO_ECN;
941 		}
942 
943 		if (ifp->if_capabilities & IFCAP_TSO)
944 			ifp->if_capabilities |= IFCAP_VLAN_HWTSO;
945 	}
946 
947 	if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM))
948 		ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6;
949 
950 	if (ifp->if_capabilities & IFCAP_HWCSUM) {
951 		/*
952 		 * VirtIO does not support VLAN tagging, but we can fake
953 		 * it by inserting and removing the 802.1Q header during
954 		 * transmit and receive. We are then able to do checksum
955 		 * offloading of VLAN frames.
956 		 */
957 		ifp->if_capabilities |=
958 		    IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
959 	}
960 
961 	ifp->if_capenable = ifp->if_capabilities;
962 
963 	/*
964 	 * Capabilities after here are not enabled by default.
965 	 */
966 
967 	if (ifp->if_capabilities & IFCAP_RXCSUM) {
968 		if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) ||
969 		    virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6))
970 			ifp->if_capabilities |= IFCAP_LRO;
971 	}
972 
973 	if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
974 		ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
975 
976 		sc->vtnet_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
977 		    vtnet_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
978 		sc->vtnet_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
979 		    vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
980 	}
981 
982 	limit = vtnet_tunable_int(sc, "rx_process_limit",
983 	    vtnet_rx_process_limit);
984 	if (limit < 0)
985 		limit = INT_MAX;
986 	sc->vtnet_rx_process_limit = limit;
987 
988 	return (0);
989 }
990 
991 static int
992 vtnet_change_mtu(struct vtnet_softc *sc, int new_mtu)
993 {
994 	struct ifnet *ifp;
995 	int frame_size, clsize;
996 
997 	ifp = sc->vtnet_ifp;
998 
999 	if (new_mtu < ETHERMIN || new_mtu > VTNET_MAX_MTU)
1000 		return (EINVAL);
1001 
1002 	frame_size = sc->vtnet_hdr_size + sizeof(struct ether_vlan_header) +
1003 	    new_mtu;
1004 
1005 	/*
1006 	 * Based on the new MTU (and hence frame size) determine which
1007 	 * cluster size is most appropriate for the receive queues.
1008 	 */
1009 	if (frame_size <= MCLBYTES) {
1010 		clsize = MCLBYTES;
1011 	} else if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
1012 		/* Avoid going past 9K jumbos. */
1013 		if (frame_size > MJUM9BYTES)
1014 			return (EINVAL);
1015 		clsize = MJUM9BYTES;
1016 	} else
1017 		clsize = MJUMPAGESIZE;
1018 
1019 	ifp->if_mtu = new_mtu;
1020 	sc->vtnet_rx_new_clsize = clsize;
1021 
1022 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1023 		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1024 		vtnet_init_locked(sc);
1025 	}
1026 
1027 	return (0);
1028 }
1029 
1030 static int
1031 vtnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1032 {
1033 	struct vtnet_softc *sc;
1034 	struct ifreq *ifr;
1035 	int reinit, mask, error;
1036 
1037 	sc = ifp->if_softc;
1038 	ifr = (struct ifreq *) data;
1039 	error = 0;
1040 
1041 	switch (cmd) {
1042 	case SIOCSIFMTU:
1043 		if (ifp->if_mtu != ifr->ifr_mtu) {
1044 			VTNET_CORE_LOCK(sc);
1045 			error = vtnet_change_mtu(sc, ifr->ifr_mtu);
1046 			VTNET_CORE_UNLOCK(sc);
1047 		}
1048 		break;
1049 
1050 	case SIOCSIFFLAGS:
1051 		VTNET_CORE_LOCK(sc);
1052 		if ((ifp->if_flags & IFF_UP) == 0) {
1053 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1054 				vtnet_stop(sc);
1055 		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1056 			if ((ifp->if_flags ^ sc->vtnet_if_flags) &
1057 			    (IFF_PROMISC | IFF_ALLMULTI)) {
1058 				if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX)
1059 					vtnet_rx_filter(sc);
1060 				else
1061 					error = ENOTSUP;
1062 			}
1063 		} else
1064 			vtnet_init_locked(sc);
1065 
1066 		if (error == 0)
1067 			sc->vtnet_if_flags = ifp->if_flags;
1068 		VTNET_CORE_UNLOCK(sc);
1069 		break;
1070 
1071 	case SIOCADDMULTI:
1072 	case SIOCDELMULTI:
1073 		if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0)
1074 			break;
1075 		VTNET_CORE_LOCK(sc);
1076 		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1077 			vtnet_rx_filter_mac(sc);
1078 		VTNET_CORE_UNLOCK(sc);
1079 		break;
1080 
1081 	case SIOCSIFMEDIA:
1082 	case SIOCGIFMEDIA:
1083 		error = ifmedia_ioctl(ifp, ifr, &sc->vtnet_media, cmd);
1084 		break;
1085 
1086 	case SIOCSIFCAP:
1087 		VTNET_CORE_LOCK(sc);
1088 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1089 
1090 		if (mask & IFCAP_TXCSUM)
1091 			ifp->if_capenable ^= IFCAP_TXCSUM;
1092 		if (mask & IFCAP_TXCSUM_IPV6)
1093 			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
1094 		if (mask & IFCAP_TSO4)
1095 			ifp->if_capenable ^= IFCAP_TSO4;
1096 		if (mask & IFCAP_TSO6)
1097 			ifp->if_capenable ^= IFCAP_TSO6;
1098 
1099 		if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO |
1100 		    IFCAP_VLAN_HWFILTER)) {
1101 			/* These Rx features require us to renegotiate. */
1102 			reinit = 1;
1103 
1104 			if (mask & IFCAP_RXCSUM)
1105 				ifp->if_capenable ^= IFCAP_RXCSUM;
1106 			if (mask & IFCAP_RXCSUM_IPV6)
1107 				ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
1108 			if (mask & IFCAP_LRO)
1109 				ifp->if_capenable ^= IFCAP_LRO;
1110 			if (mask & IFCAP_VLAN_HWFILTER)
1111 				ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1112 		} else
1113 			reinit = 0;
1114 
1115 		if (mask & IFCAP_VLAN_HWTSO)
1116 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1117 		if (mask & IFCAP_VLAN_HWTAGGING)
1118 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1119 
1120 		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1121 			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1122 			vtnet_init_locked(sc);
1123 		}
1124 
1125 		VTNET_CORE_UNLOCK(sc);
1126 		VLAN_CAPABILITIES(ifp);
1127 
1128 		break;
1129 
1130 	default:
1131 		error = ether_ioctl(ifp, cmd, data);
1132 		break;
1133 	}
1134 
1135 	VTNET_CORE_LOCK_ASSERT_NOTOWNED(sc);
1136 
1137 	return (error);
1138 }
1139 
1140 static int
1141 vtnet_rxq_populate(struct vtnet_rxq *rxq)
1142 {
1143 	struct virtqueue *vq;
1144 	int nbufs, error;
1145 
1146 	vq = rxq->vtnrx_vq;
1147 	error = ENOSPC;
1148 
1149 	for (nbufs = 0; !virtqueue_full(vq); nbufs++) {
1150 		error = vtnet_rxq_new_buf(rxq);
1151 		if (error)
1152 			break;
1153 	}
1154 
1155 	if (nbufs > 0) {
1156 		virtqueue_notify(vq);
1157 		/*
1158 		 * EMSGSIZE signifies the virtqueue did not have enough
1159 		 * entries available to hold the last mbuf. This is not
1160 		 * an error.
1161 		 */
1162 		if (error == EMSGSIZE)
1163 			error = 0;
1164 	}
1165 
1166 	return (error);
1167 }
1168 
1169 static void
1170 vtnet_rxq_free_mbufs(struct vtnet_rxq *rxq)
1171 {
1172 	struct virtqueue *vq;
1173 	struct mbuf *m;
1174 	int last;
1175 
1176 	vq = rxq->vtnrx_vq;
1177 	last = 0;
1178 
1179 	while ((m = virtqueue_drain(vq, &last)) != NULL)
1180 		m_freem(m);
1181 
1182 	KASSERT(virtqueue_empty(vq),
1183 	    ("%s: mbufs remaining in rx queue %p", __func__, rxq));
1184 }
1185 
1186 static struct mbuf *
1187 vtnet_rx_alloc_buf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp)
1188 {
1189 	struct mbuf *m_head, *m_tail, *m;
1190 	int i, clsize;
1191 
1192 	clsize = sc->vtnet_rx_clsize;
1193 
1194 	KASSERT(nbufs == 1 || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
1195 	    ("%s: chained mbuf %d request without LRO_NOMRG", __func__, nbufs));
1196 
1197 	m_head = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, clsize);
1198 	if (m_head == NULL)
1199 		goto fail;
1200 
1201 	m_head->m_len = clsize;
1202 	m_tail = m_head;
1203 
1204 	/* Allocate the rest of the chain. */
1205 	for (i = 1; i < nbufs; i++) {
1206 		m = m_getjcl(M_NOWAIT, MT_DATA, 0, clsize);
1207 		if (m == NULL)
1208 			goto fail;
1209 
1210 		m->m_len = clsize;
1211 		m_tail->m_next = m;
1212 		m_tail = m;
1213 	}
1214 
1215 	if (m_tailp != NULL)
1216 		*m_tailp = m_tail;
1217 
1218 	return (m_head);
1219 
1220 fail:
1221 	sc->vtnet_stats.mbuf_alloc_failed++;
1222 	m_freem(m_head);
1223 
1224 	return (NULL);
1225 }
1226 
1227 /*
1228  * Slow path for when LRO without mergeable buffers is negotiated.
1229  */
1230 static int
1231 vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *rxq, struct mbuf *m0,
1232     int len0)
1233 {
1234 	struct vtnet_softc *sc;
1235 	struct mbuf *m, *m_prev;
1236 	struct mbuf *m_new, *m_tail;
1237 	int len, clsize, nreplace, error;
1238 
1239 	sc = rxq->vtnrx_sc;
1240 	clsize = sc->vtnet_rx_clsize;
1241 
1242 	m_prev = NULL;
1243 	m_tail = NULL;
1244 	nreplace = 0;
1245 
1246 	m = m0;
1247 	len = len0;
1248 
1249 	/*
1250 	 * Since these mbuf chains are so large, we avoid allocating an
1251 	 * entire replacement chain if possible. When the received frame
1252 	 * did not consume the entire chain, the unused mbufs are moved
1253 	 * to the replacement chain.
1254 	 */
1255 	while (len > 0) {
1256 		/*
1257 		 * Something is seriously wrong if we received a frame
1258 		 * larger than the chain. Drop it.
1259 		 */
1260 		if (m == NULL) {
1261 			sc->vtnet_stats.rx_frame_too_large++;
1262 			return (EMSGSIZE);
1263 		}
1264 
1265 		/* We always allocate the same cluster size. */
1266 		KASSERT(m->m_len == clsize,
1267 		    ("%s: mbuf size %d is not the cluster size %d",
1268 		    __func__, m->m_len, clsize));
1269 
1270 		m->m_len = MIN(m->m_len, len);
1271 		len -= m->m_len;
1272 
1273 		m_prev = m;
1274 		m = m->m_next;
1275 		nreplace++;
1276 	}
1277 
1278 	KASSERT(nreplace <= sc->vtnet_rx_nmbufs,
1279 	    ("%s: too many replacement mbufs %d max %d", __func__, nreplace,
1280 	    sc->vtnet_rx_nmbufs));
1281 
1282 	m_new = vtnet_rx_alloc_buf(sc, nreplace, &m_tail);
1283 	if (m_new == NULL) {
1284 		m_prev->m_len = clsize;
1285 		return (ENOBUFS);
1286 	}
1287 
1288 	/*
1289 	 * Move any unused mbufs from the received chain onto the end
1290 	 * of the new chain.
1291 	 */
1292 	if (m_prev->m_next != NULL) {
1293 		m_tail->m_next = m_prev->m_next;
1294 		m_prev->m_next = NULL;
1295 	}
1296 
1297 	error = vtnet_rxq_enqueue_buf(rxq, m_new);
1298 	if (error) {
1299 		/*
1300 		 * BAD! We could not enqueue the replacement mbuf chain. We
1301 		 * must restore the m0 chain to the original state if it was
1302 		 * modified so we can subsequently discard it.
1303 		 *
1304 		 * NOTE: The replacement is suppose to be an identical copy
1305 		 * to the one just dequeued so this is an unexpected error.
1306 		 */
1307 		sc->vtnet_stats.rx_enq_replacement_failed++;
1308 
1309 		if (m_tail->m_next != NULL) {
1310 			m_prev->m_next = m_tail->m_next;
1311 			m_tail->m_next = NULL;
1312 		}
1313 
1314 		m_prev->m_len = clsize;
1315 		m_freem(m_new);
1316 	}
1317 
1318 	return (error);
1319 }
1320 
1321 static int
1322 vtnet_rxq_replace_buf(struct vtnet_rxq *rxq, struct mbuf *m, int len)
1323 {
1324 	struct vtnet_softc *sc;
1325 	struct mbuf *m_new;
1326 	int error;
1327 
1328 	sc = rxq->vtnrx_sc;
1329 
1330 	KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL,
1331 	    ("%s: chained mbuf without LRO_NOMRG", __func__));
1332 
1333 	if (m->m_next == NULL) {
1334 		/* Fast-path for the common case of just one mbuf. */
1335 		if (m->m_len < len)
1336 			return (EINVAL);
1337 
1338 		m_new = vtnet_rx_alloc_buf(sc, 1, NULL);
1339 		if (m_new == NULL)
1340 			return (ENOBUFS);
1341 
1342 		error = vtnet_rxq_enqueue_buf(rxq, m_new);
1343 		if (error) {
1344 			/*
1345 			 * The new mbuf is suppose to be an identical
1346 			 * copy of the one just dequeued so this is an
1347 			 * unexpected error.
1348 			 */
1349 			m_freem(m_new);
1350 			sc->vtnet_stats.rx_enq_replacement_failed++;
1351 		} else
1352 			m->m_len = len;
1353 	} else
1354 		error = vtnet_rxq_replace_lro_nomgr_buf(rxq, m, len);
1355 
1356 	return (error);
1357 }
1358 
1359 static int
1360 vtnet_rxq_enqueue_buf(struct vtnet_rxq *rxq, struct mbuf *m)
1361 {
1362 	struct sglist sg;
1363 	struct sglist_seg segs[VTNET_MAX_RX_SEGS];
1364 	struct vtnet_softc *sc;
1365 	struct vtnet_rx_header *rxhdr;
1366 	uint8_t *mdata;
1367 	int offset, error;
1368 
1369 	sc = rxq->vtnrx_sc;
1370 	mdata = mtod(m, uint8_t *);
1371 
1372 	VTNET_RXQ_LOCK_ASSERT(rxq);
1373 	KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL,
1374 	    ("%s: chained mbuf without LRO_NOMRG", __func__));
1375 	KASSERT(m->m_len == sc->vtnet_rx_clsize,
1376 	    ("%s: unexpected cluster size %d/%d", __func__, m->m_len,
1377 	     sc->vtnet_rx_clsize));
1378 
1379 	sglist_init(&sg, VTNET_MAX_RX_SEGS, segs);
1380 	if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
1381 		MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr));
1382 		rxhdr = (struct vtnet_rx_header *) mdata;
1383 		sglist_append(&sg, &rxhdr->vrh_hdr, sc->vtnet_hdr_size);
1384 		offset = sizeof(struct vtnet_rx_header);
1385 	} else
1386 		offset = 0;
1387 
1388 	sglist_append(&sg, mdata + offset, m->m_len - offset);
1389 	if (m->m_next != NULL) {
1390 		error = sglist_append_mbuf(&sg, m->m_next);
1391 		MPASS(error == 0);
1392 	}
1393 
1394 	error = virtqueue_enqueue(rxq->vtnrx_vq, m, &sg, 0, sg.sg_nseg);
1395 
1396 	return (error);
1397 }
1398 
1399 static int
1400 vtnet_rxq_new_buf(struct vtnet_rxq *rxq)
1401 {
1402 	struct vtnet_softc *sc;
1403 	struct mbuf *m;
1404 	int error;
1405 
1406 	sc = rxq->vtnrx_sc;
1407 
1408 	m = vtnet_rx_alloc_buf(sc, sc->vtnet_rx_nmbufs, NULL);
1409 	if (m == NULL)
1410 		return (ENOBUFS);
1411 
1412 	error = vtnet_rxq_enqueue_buf(rxq, m);
1413 	if (error)
1414 		m_freem(m);
1415 
1416 	return (error);
1417 }
1418 
1419 /*
1420  * Use the checksum offset in the VirtIO header to set the
1421  * correct CSUM_* flags.
1422  */
1423 static int
1424 vtnet_rxq_csum_by_offset(struct vtnet_rxq *rxq, struct mbuf *m,
1425     uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr)
1426 {
1427 	struct vtnet_softc *sc;
1428 #if defined(INET) || defined(INET6)
1429 	int offset = hdr->csum_start + hdr->csum_offset;
1430 #endif
1431 
1432 	sc = rxq->vtnrx_sc;
1433 
1434 	/* Only do a basic sanity check on the offset. */
1435 	switch (eth_type) {
1436 #if defined(INET)
1437 	case ETHERTYPE_IP:
1438 		if (__predict_false(offset < ip_start + sizeof(struct ip)))
1439 			return (1);
1440 		break;
1441 #endif
1442 #if defined(INET6)
1443 	case ETHERTYPE_IPV6:
1444 		if (__predict_false(offset < ip_start + sizeof(struct ip6_hdr)))
1445 			return (1);
1446 		break;
1447 #endif
1448 	default:
1449 		sc->vtnet_stats.rx_csum_bad_ethtype++;
1450 		return (1);
1451 	}
1452 
1453 	/*
1454 	 * Use the offset to determine the appropriate CSUM_* flags. This is
1455 	 * a bit dirty, but we can get by with it since the checksum offsets
1456 	 * happen to be different. We assume the host host does not do IPv4
1457 	 * header checksum offloading.
1458 	 */
1459 	switch (hdr->csum_offset) {
1460 	case offsetof(struct udphdr, uh_sum):
1461 	case offsetof(struct tcphdr, th_sum):
1462 		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1463 		m->m_pkthdr.csum_data = 0xFFFF;
1464 		break;
1465 	case offsetof(struct sctphdr, checksum):
1466 		m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
1467 		break;
1468 	default:
1469 		sc->vtnet_stats.rx_csum_bad_offset++;
1470 		return (1);
1471 	}
1472 
1473 	return (0);
1474 }
1475 
1476 static int
1477 vtnet_rxq_csum_by_parse(struct vtnet_rxq *rxq, struct mbuf *m,
1478     uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr)
1479 {
1480 	struct vtnet_softc *sc;
1481 	int offset, proto;
1482 
1483 	sc = rxq->vtnrx_sc;
1484 
1485 	switch (eth_type) {
1486 #if defined(INET)
1487 	case ETHERTYPE_IP: {
1488 		struct ip *ip;
1489 		if (__predict_false(m->m_len < ip_start + sizeof(struct ip)))
1490 			return (1);
1491 		ip = (struct ip *)(m->m_data + ip_start);
1492 		proto = ip->ip_p;
1493 		offset = ip_start + (ip->ip_hl << 2);
1494 		break;
1495 	}
1496 #endif
1497 #if defined(INET6)
1498 	case ETHERTYPE_IPV6:
1499 		if (__predict_false(m->m_len < ip_start +
1500 		    sizeof(struct ip6_hdr)))
1501 			return (1);
1502 		offset = ip6_lasthdr(m, ip_start, IPPROTO_IPV6, &proto);
1503 		if (__predict_false(offset < 0))
1504 			return (1);
1505 		break;
1506 #endif
1507 	default:
1508 		sc->vtnet_stats.rx_csum_bad_ethtype++;
1509 		return (1);
1510 	}
1511 
1512 	switch (proto) {
1513 	case IPPROTO_TCP:
1514 		if (__predict_false(m->m_len < offset + sizeof(struct tcphdr)))
1515 			return (1);
1516 		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1517 		m->m_pkthdr.csum_data = 0xFFFF;
1518 		break;
1519 	case IPPROTO_UDP:
1520 		if (__predict_false(m->m_len < offset + sizeof(struct udphdr)))
1521 			return (1);
1522 		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1523 		m->m_pkthdr.csum_data = 0xFFFF;
1524 		break;
1525 	case IPPROTO_SCTP:
1526 		if (__predict_false(m->m_len < offset + sizeof(struct sctphdr)))
1527 			return (1);
1528 		m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
1529 		break;
1530 	default:
1531 		/*
1532 		 * For the remaining protocols, FreeBSD does not support
1533 		 * checksum offloading, so the checksum will be recomputed.
1534 		 */
1535 #if 0
1536 		if_printf(sc->vtnet_ifp, "cksum offload of unsupported "
1537 		    "protocol eth_type=%#x proto=%d csum_start=%d "
1538 		    "csum_offset=%d\n", __func__, eth_type, proto,
1539 		    hdr->csum_start, hdr->csum_offset);
1540 #endif
1541 		break;
1542 	}
1543 
1544 	return (0);
1545 }
1546 
1547 /*
1548  * Set the appropriate CSUM_* flags. Unfortunately, the information
1549  * provided is not directly useful to us. The VirtIO header gives the
1550  * offset of the checksum, which is all Linux needs, but this is not
1551  * how FreeBSD does things. We are forced to peek inside the packet
1552  * a bit.
1553  *
1554  * It would be nice if VirtIO gave us the L4 protocol or if FreeBSD
1555  * could accept the offsets and let the stack figure it out.
1556  */
1557 static int
1558 vtnet_rxq_csum(struct vtnet_rxq *rxq, struct mbuf *m,
1559     struct virtio_net_hdr *hdr)
1560 {
1561 	struct ether_header *eh;
1562 	struct ether_vlan_header *evh;
1563 	uint16_t eth_type;
1564 	int offset, error;
1565 
1566 	eh = mtod(m, struct ether_header *);
1567 	eth_type = ntohs(eh->ether_type);
1568 	if (eth_type == ETHERTYPE_VLAN) {
1569 		/* BMV: We should handle nested VLAN tags too. */
1570 		evh = mtod(m, struct ether_vlan_header *);
1571 		eth_type = ntohs(evh->evl_proto);
1572 		offset = sizeof(struct ether_vlan_header);
1573 	} else
1574 		offset = sizeof(struct ether_header);
1575 
1576 	if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
1577 		error = vtnet_rxq_csum_by_offset(rxq, m, eth_type, offset, hdr);
1578 	else
1579 		error = vtnet_rxq_csum_by_parse(rxq, m, eth_type, offset, hdr);
1580 
1581 	return (error);
1582 }
1583 
1584 static void
1585 vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *rxq, int nbufs)
1586 {
1587 	struct mbuf *m;
1588 
1589 	while (--nbufs > 0) {
1590 		m = virtqueue_dequeue(rxq->vtnrx_vq, NULL);
1591 		if (m == NULL)
1592 			break;
1593 		vtnet_rxq_discard_buf(rxq, m);
1594 	}
1595 }
1596 
1597 static void
1598 vtnet_rxq_discard_buf(struct vtnet_rxq *rxq, struct mbuf *m)
1599 {
1600 	int error;
1601 
1602 	/*
1603 	 * Requeue the discarded mbuf. This should always be successful
1604 	 * since it was just dequeued.
1605 	 */
1606 	error = vtnet_rxq_enqueue_buf(rxq, m);
1607 	KASSERT(error == 0,
1608 	    ("%s: cannot requeue discarded mbuf %d", __func__, error));
1609 }
1610 
1611 static int
1612 vtnet_rxq_merged_eof(struct vtnet_rxq *rxq, struct mbuf *m_head, int nbufs)
1613 {
1614 	struct vtnet_softc *sc;
1615 	struct ifnet *ifp;
1616 	struct virtqueue *vq;
1617 	struct mbuf *m, *m_tail;
1618 	int len;
1619 
1620 	sc = rxq->vtnrx_sc;
1621 	vq = rxq->vtnrx_vq;
1622 	ifp = sc->vtnet_ifp;
1623 	m_tail = m_head;
1624 
1625 	while (--nbufs > 0) {
1626 		m = virtqueue_dequeue(vq, &len);
1627 		if (m == NULL) {
1628 			rxq->vtnrx_stats.vrxs_ierrors++;
1629 			goto fail;
1630 		}
1631 
1632 		if (vtnet_rxq_new_buf(rxq) != 0) {
1633 			rxq->vtnrx_stats.vrxs_iqdrops++;
1634 			vtnet_rxq_discard_buf(rxq, m);
1635 			if (nbufs > 1)
1636 				vtnet_rxq_discard_merged_bufs(rxq, nbufs);
1637 			goto fail;
1638 		}
1639 
1640 		if (m->m_len < len)
1641 			len = m->m_len;
1642 
1643 		m->m_len = len;
1644 		m->m_flags &= ~M_PKTHDR;
1645 
1646 		m_head->m_pkthdr.len += len;
1647 		m_tail->m_next = m;
1648 		m_tail = m;
1649 	}
1650 
1651 	return (0);
1652 
1653 fail:
1654 	sc->vtnet_stats.rx_mergeable_failed++;
1655 	m_freem(m_head);
1656 
1657 	return (1);
1658 }
1659 
1660 static void
1661 vtnet_rxq_input(struct vtnet_rxq *rxq, struct mbuf *m,
1662     struct virtio_net_hdr *hdr)
1663 {
1664 	struct vtnet_softc *sc;
1665 	struct ifnet *ifp;
1666 	struct ether_header *eh;
1667 
1668 	sc = rxq->vtnrx_sc;
1669 	ifp = sc->vtnet_ifp;
1670 
1671 	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1672 		eh = mtod(m, struct ether_header *);
1673 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1674 			vtnet_vlan_tag_remove(m);
1675 			/*
1676 			 * With the 802.1Q header removed, update the
1677 			 * checksum starting location accordingly.
1678 			 */
1679 			if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
1680 				hdr->csum_start -= ETHER_VLAN_ENCAP_LEN;
1681 		}
1682 	}
1683 
1684 	m->m_pkthdr.flowid = rxq->vtnrx_id;
1685 	m->m_flags |= M_FLOWID;
1686 
1687 	/*
1688 	 * BMV: FreeBSD does not have the UNNECESSARY and PARTIAL checksum
1689 	 * distinction that Linux does. Need to reevaluate if performing
1690 	 * offloading for the NEEDS_CSUM case is really appropriate.
1691 	 */
1692 	if (hdr->flags & (VIRTIO_NET_HDR_F_NEEDS_CSUM |
1693 	    VIRTIO_NET_HDR_F_DATA_VALID)) {
1694 		if (vtnet_rxq_csum(rxq, m, hdr) == 0)
1695 			rxq->vtnrx_stats.vrxs_csum++;
1696 		else
1697 			rxq->vtnrx_stats.vrxs_csum_failed++;
1698 	}
1699 
1700 	rxq->vtnrx_stats.vrxs_ipackets++;
1701 	rxq->vtnrx_stats.vrxs_ibytes += m->m_pkthdr.len;
1702 
1703 	/* VTNET_RXQ_UNLOCK(rxq); */
1704 	(*ifp->if_input)(ifp, m);
1705 	/* VTNET_RXQ_LOCK(rxq); */
1706 }
1707 
1708 static int
1709 vtnet_rxq_eof(struct vtnet_rxq *rxq)
1710 {
1711 	struct virtio_net_hdr lhdr, *hdr;
1712 	struct vtnet_softc *sc;
1713 	struct ifnet *ifp;
1714 	struct virtqueue *vq;
1715 	struct mbuf *m;
1716 	struct virtio_net_hdr_mrg_rxbuf *mhdr;
1717 	int len, deq, nbufs, adjsz, count;
1718 
1719 	sc = rxq->vtnrx_sc;
1720 	vq = rxq->vtnrx_vq;
1721 	ifp = sc->vtnet_ifp;
1722 	hdr = &lhdr;
1723 	deq = 0;
1724 	count = sc->vtnet_rx_process_limit;
1725 
1726 	VTNET_RXQ_LOCK_ASSERT(rxq);
1727 
1728 	while (count-- > 0) {
1729 		m = virtqueue_dequeue(vq, &len);
1730 		if (m == NULL)
1731 			break;
1732 		deq++;
1733 
1734 		if (len < sc->vtnet_hdr_size + ETHER_HDR_LEN) {
1735 			rxq->vtnrx_stats.vrxs_ierrors++;
1736 			vtnet_rxq_discard_buf(rxq, m);
1737 			continue;
1738 		}
1739 
1740 		if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
1741 			nbufs = 1;
1742 			adjsz = sizeof(struct vtnet_rx_header);
1743 			/*
1744 			 * Account for our pad inserted between the header
1745 			 * and the actual start of the frame.
1746 			 */
1747 			len += VTNET_RX_HEADER_PAD;
1748 		} else {
1749 			mhdr = mtod(m, struct virtio_net_hdr_mrg_rxbuf *);
1750 			nbufs = mhdr->num_buffers;
1751 			adjsz = sizeof(struct virtio_net_hdr_mrg_rxbuf);
1752 		}
1753 
1754 		if (vtnet_rxq_replace_buf(rxq, m, len) != 0) {
1755 			rxq->vtnrx_stats.vrxs_iqdrops++;
1756 			vtnet_rxq_discard_buf(rxq, m);
1757 			if (nbufs > 1)
1758 				vtnet_rxq_discard_merged_bufs(rxq, nbufs);
1759 			continue;
1760 		}
1761 
1762 		m->m_pkthdr.len = len;
1763 		m->m_pkthdr.rcvif = ifp;
1764 		m->m_pkthdr.csum_flags = 0;
1765 
1766 		if (nbufs > 1) {
1767 			/* Dequeue the rest of chain. */
1768 			if (vtnet_rxq_merged_eof(rxq, m, nbufs) != 0)
1769 				continue;
1770 		}
1771 
1772 		/*
1773 		 * Save copy of header before we strip it. For both mergeable
1774 		 * and non-mergeable, the header is at the beginning of the
1775 		 * mbuf data. We no longer need num_buffers, so always use a
1776 		 * regular header.
1777 		 *
1778 		 * BMV: Is this memcpy() expensive? We know the mbuf data is
1779 		 * still valid even after the m_adj().
1780 		 */
1781 		memcpy(hdr, mtod(m, void *), sizeof(struct virtio_net_hdr));
1782 		m_adj(m, adjsz);
1783 
1784 		vtnet_rxq_input(rxq, m, hdr);
1785 	}
1786 
1787 	if (deq > 0)
1788 		virtqueue_notify(vq);
1789 
1790 	return (count > 0 ? 0 : EAGAIN);
1791 }
1792 
1793 static void
1794 vtnet_rx_vq_intr(void *xrxq)
1795 {
1796 	struct vtnet_softc *sc;
1797 	struct vtnet_rxq *rxq;
1798 	struct ifnet *ifp;
1799 	int tries, more;
1800 
1801 	rxq = xrxq;
1802 	sc = rxq->vtnrx_sc;
1803 	ifp = sc->vtnet_ifp;
1804 	tries = 0;
1805 
1806 	if (__predict_false(rxq->vtnrx_id >= sc->vtnet_act_vq_pairs)) {
1807 		/*
1808 		 * Ignore this interrupt. Either this is a spurious interrupt
1809 		 * or multiqueue without per-VQ MSIX so every queue needs to
1810 		 * be polled (a brain dead configuration we could try harder
1811 		 * to avoid).
1812 		 */
1813 		vtnet_rxq_disable_intr(rxq);
1814 		return;
1815 	}
1816 
1817 again:
1818 	VTNET_RXQ_LOCK(rxq);
1819 
1820 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1821 		VTNET_RXQ_UNLOCK(rxq);
1822 		return;
1823 	}
1824 
1825 	more = vtnet_rxq_eof(rxq);
1826 	if (more || vtnet_rxq_enable_intr(rxq) != 0) {
1827 		if (!more)
1828 			vtnet_rxq_disable_intr(rxq);
1829 		/*
1830 		 * This is an occasional condition or race (when !more),
1831 		 * so retry a few times before scheduling the taskqueue.
1832 		 */
1833 		rxq->vtnrx_stats.vrxs_rescheduled++;
1834 		VTNET_RXQ_UNLOCK(rxq);
1835 		if (tries++ < VTNET_INTR_DISABLE_RETRIES)
1836 			goto again;
1837 		taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
1838 	} else
1839 		VTNET_RXQ_UNLOCK(rxq);
1840 }
1841 
1842 static void
1843 vtnet_rxq_tq_intr(void *xrxq, int pending)
1844 {
1845 	struct vtnet_softc *sc;
1846 	struct vtnet_rxq *rxq;
1847 	struct ifnet *ifp;
1848 	int more;
1849 
1850 	rxq = xrxq;
1851 	sc = rxq->vtnrx_sc;
1852 	ifp = sc->vtnet_ifp;
1853 
1854 	VTNET_RXQ_LOCK(rxq);
1855 
1856 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1857 		VTNET_RXQ_UNLOCK(rxq);
1858 		return;
1859 	}
1860 
1861 	more = vtnet_rxq_eof(rxq);
1862 	if (more || vtnet_rxq_enable_intr(rxq) != 0) {
1863 		if (!more)
1864 			vtnet_rxq_disable_intr(rxq);
1865 		rxq->vtnrx_stats.vrxs_rescheduled++;
1866 		taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
1867 	}
1868 
1869 	VTNET_RXQ_UNLOCK(rxq);
1870 }
1871 
1872 static void
1873 vtnet_txq_free_mbufs(struct vtnet_txq *txq)
1874 {
1875 	struct virtqueue *vq;
1876 	struct vtnet_tx_header *txhdr;
1877 	int last;
1878 
1879 	vq = txq->vtntx_vq;
1880 	last = 0;
1881 
1882 	while ((txhdr = virtqueue_drain(vq, &last)) != NULL) {
1883 		m_freem(txhdr->vth_mbuf);
1884 		uma_zfree(vtnet_tx_header_zone, txhdr);
1885 	}
1886 
1887 	KASSERT(virtqueue_empty(vq),
1888 	    ("%s: mbufs remaining in tx queue %p", __func__, txq));
1889 }
1890 
1891 /*
1892  * BMV: Much of this can go away once we finally have offsets in
1893  * the mbuf packet header. Bug andre@.
1894  */
1895 static int
1896 vtnet_txq_offload_ctx(struct vtnet_txq *txq, struct mbuf *m,
1897     int *etype, int *proto, int *start)
1898 {
1899 	struct vtnet_softc *sc;
1900 	struct ether_vlan_header *evh;
1901 	int offset;
1902 
1903 	sc = txq->vtntx_sc;
1904 
1905 	evh = mtod(m, struct ether_vlan_header *);
1906 	if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1907 		/* BMV: We should handle nested VLAN tags too. */
1908 		*etype = ntohs(evh->evl_proto);
1909 		offset = sizeof(struct ether_vlan_header);
1910 	} else {
1911 		*etype = ntohs(evh->evl_encap_proto);
1912 		offset = sizeof(struct ether_header);
1913 	}
1914 
1915 	switch (*etype) {
1916 #if defined(INET)
1917 	case ETHERTYPE_IP: {
1918 		struct ip *ip, iphdr;
1919 		if (__predict_false(m->m_len < offset + sizeof(struct ip))) {
1920 			m_copydata(m, offset, sizeof(struct ip),
1921 			    (caddr_t) &iphdr);
1922 			ip = &iphdr;
1923 		} else
1924 			ip = (struct ip *)(m->m_data + offset);
1925 		*proto = ip->ip_p;
1926 		*start = offset + (ip->ip_hl << 2);
1927 		break;
1928 	}
1929 #endif
1930 #if defined(INET6)
1931 	case ETHERTYPE_IPV6:
1932 		*proto = -1;
1933 		*start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto);
1934 		/* Assert the network stack sent us a valid packet. */
1935 		KASSERT(*start > offset,
1936 		    ("%s: mbuf %p start %d offset %d proto %d", __func__, m,
1937 		    *start, offset, *proto));
1938 		break;
1939 #endif
1940 	default:
1941 		sc->vtnet_stats.tx_csum_bad_ethtype++;
1942 		return (EINVAL);
1943 	}
1944 
1945 	return (0);
1946 }
1947 
1948 static int
1949 vtnet_txq_offload_tso(struct vtnet_txq *txq, struct mbuf *m, int eth_type,
1950     int offset, struct virtio_net_hdr *hdr)
1951 {
1952 	static struct timeval lastecn;
1953 	static int curecn;
1954 	struct vtnet_softc *sc;
1955 	struct tcphdr *tcp, tcphdr;
1956 
1957 	sc = txq->vtntx_sc;
1958 
1959 	if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) {
1960 		m_copydata(m, offset, sizeof(struct tcphdr), (caddr_t) &tcphdr);
1961 		tcp = &tcphdr;
1962 	} else
1963 		tcp = (struct tcphdr *)(m->m_data + offset);
1964 
1965 	hdr->hdr_len = offset + (tcp->th_off << 2);
1966 	hdr->gso_size = m->m_pkthdr.tso_segsz;
1967 	hdr->gso_type = eth_type == ETHERTYPE_IP ? VIRTIO_NET_HDR_GSO_TCPV4 :
1968 	    VIRTIO_NET_HDR_GSO_TCPV6;
1969 
1970 	if (tcp->th_flags & TH_CWR) {
1971 		/*
1972 		 * Drop if VIRTIO_NET_F_HOST_ECN was not negotiated. In FreeBSD,
1973 		 * ECN support is not on a per-interface basis, but globally via
1974 		 * the net.inet.tcp.ecn.enable sysctl knob. The default is off.
1975 		 */
1976 		if ((sc->vtnet_flags & VTNET_FLAG_TSO_ECN) == 0) {
1977 			if (ppsratecheck(&lastecn, &curecn, 1))
1978 				if_printf(sc->vtnet_ifp,
1979 				    "TSO with ECN not negotiated with host\n");
1980 			return (ENOTSUP);
1981 		}
1982 		hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
1983 	}
1984 
1985 	txq->vtntx_stats.vtxs_tso++;
1986 
1987 	return (0);
1988 }
1989 
1990 static struct mbuf *
1991 vtnet_txq_offload(struct vtnet_txq *txq, struct mbuf *m,
1992     struct virtio_net_hdr *hdr)
1993 {
1994 	struct vtnet_softc *sc;
1995 	int flags, etype, csum_start, proto, error;
1996 
1997 	sc = txq->vtntx_sc;
1998 	flags = m->m_pkthdr.csum_flags;
1999 
2000 	error = vtnet_txq_offload_ctx(txq, m, &etype, &proto, &csum_start);
2001 	if (error)
2002 		goto drop;
2003 
2004 	if ((etype == ETHERTYPE_IP && flags & VTNET_CSUM_OFFLOAD) ||
2005 	    (etype == ETHERTYPE_IPV6 && flags & VTNET_CSUM_OFFLOAD_IPV6)) {
2006 		/*
2007 		 * We could compare the IP protocol vs the CSUM_ flag too,
2008 		 * but that really should not be necessary.
2009 		 */
2010 		hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM;
2011 		hdr->csum_start = csum_start;
2012 		hdr->csum_offset = m->m_pkthdr.csum_data;
2013 		txq->vtntx_stats.vtxs_csum++;
2014 	}
2015 
2016 	if (flags & CSUM_TSO) {
2017 		if (__predict_false(proto != IPPROTO_TCP)) {
2018 			/* Likely failed to correctly parse the mbuf. */
2019 			sc->vtnet_stats.tx_tso_not_tcp++;
2020 			goto drop;
2021 		}
2022 
2023 		KASSERT(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM,
2024 		    ("%s: mbuf %p TSO without checksum offload", __func__, m));
2025 
2026 		error = vtnet_txq_offload_tso(txq, m, etype, csum_start, hdr);
2027 		if (error)
2028 			goto drop;
2029 	}
2030 
2031 	return (m);
2032 
2033 drop:
2034 	m_freem(m);
2035 	return (NULL);
2036 }
2037 
2038 static int
2039 vtnet_txq_enqueue_buf(struct vtnet_txq *txq, struct mbuf **m_head,
2040     struct vtnet_tx_header *txhdr)
2041 {
2042 	struct sglist sg;
2043 	struct sglist_seg segs[VTNET_MAX_TX_SEGS];
2044 	struct vtnet_softc *sc;
2045 	struct virtqueue *vq;
2046 	struct mbuf *m;
2047 	int collapsed, error;
2048 
2049 	vq = txq->vtntx_vq;
2050 	sc = txq->vtntx_sc;
2051 	m = *m_head;
2052 	collapsed = 0;
2053 
2054 	sglist_init(&sg, VTNET_MAX_TX_SEGS, segs);
2055 	error = sglist_append(&sg, &txhdr->vth_uhdr, sc->vtnet_hdr_size);
2056 	KASSERT(error == 0 && sg.sg_nseg == 1,
2057 	    ("%s: error %d adding header to sglist", __func__, error));
2058 
2059 again:
2060 	error = sglist_append_mbuf(&sg, m);
2061 	if (error) {
2062 		if (collapsed)
2063 			goto fail;
2064 
2065 		m = m_collapse(m, M_NOWAIT, VTNET_MAX_TX_SEGS - 1);
2066 		if (m == NULL)
2067 			goto fail;
2068 
2069 		*m_head = m;
2070 		collapsed = 1;
2071 		txq->vtntx_stats.vtxs_collapsed++;
2072 		goto again;
2073 	}
2074 
2075 	txhdr->vth_mbuf = m;
2076 	error = virtqueue_enqueue(vq, txhdr, &sg, sg.sg_nseg, 0);
2077 
2078 	return (error);
2079 
2080 fail:
2081 	m_freem(*m_head);
2082 	*m_head = NULL;
2083 
2084 	return (ENOBUFS);
2085 }
2086 
2087 static int
2088 vtnet_txq_encap(struct vtnet_txq *txq, struct mbuf **m_head)
2089 {
2090 	struct vtnet_softc *sc;
2091 	struct vtnet_tx_header *txhdr;
2092 	struct virtio_net_hdr *hdr;
2093 	struct mbuf *m;
2094 	int error;
2095 
2096 	sc = txq->vtntx_sc;
2097 	m = *m_head;
2098 	M_ASSERTPKTHDR(m);
2099 
2100 	txhdr = uma_zalloc(vtnet_tx_header_zone, M_NOWAIT | M_ZERO);
2101 	if (txhdr == NULL) {
2102 		m_freem(m);
2103 		*m_head = NULL;
2104 		return (ENOMEM);
2105 	}
2106 
2107 	/*
2108 	 * Always use the non-mergeable header, regardless if the feature
2109 	 * was negotiated. For transmit, num_buffers is always zero. The
2110 	 * vtnet_hdr_size is used to enqueue the correct header size.
2111 	 */
2112 	hdr = &txhdr->vth_uhdr.hdr;
2113 
2114 	if (m->m_flags & M_VLANTAG) {
2115 		m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
2116 		if ((*m_head = m) == NULL) {
2117 			error = ENOBUFS;
2118 			goto fail;
2119 		}
2120 		m->m_flags &= ~M_VLANTAG;
2121 	}
2122 
2123 	if (m->m_pkthdr.csum_flags & VTNET_CSUM_ALL_OFFLOAD) {
2124 		m = vtnet_txq_offload(txq, m, hdr);
2125 		if ((*m_head = m) == NULL) {
2126 			error = ENOBUFS;
2127 			goto fail;
2128 		}
2129 	}
2130 
2131 	error = vtnet_txq_enqueue_buf(txq, m_head, txhdr);
2132 	if (error == 0)
2133 		return (0);
2134 
2135 fail:
2136 	uma_zfree(vtnet_tx_header_zone, txhdr);
2137 
2138 	return (error);
2139 }
2140 
2141 #ifdef VTNET_LEGACY_TX
2142 
2143 static void
2144 vtnet_start_locked(struct vtnet_txq *txq, struct ifnet *ifp)
2145 {
2146 	struct vtnet_softc *sc;
2147 	struct virtqueue *vq;
2148 	struct mbuf *m0;
2149 	int enq;
2150 
2151 	sc = txq->vtntx_sc;
2152 	vq = txq->vtntx_vq;
2153 	enq = 0;
2154 
2155 	VTNET_TXQ_LOCK_ASSERT(txq);
2156 
2157 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
2158 	    sc->vtnet_link_active == 0)
2159 		return;
2160 
2161 	vtnet_txq_eof(txq);
2162 
2163 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
2164 		if (virtqueue_full(vq))
2165 			break;
2166 
2167 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m0);
2168 		if (m0 == NULL)
2169 			break;
2170 
2171 		if (vtnet_txq_encap(txq, &m0) != 0) {
2172 			if (m0 != NULL)
2173 				IFQ_DRV_PREPEND(&ifp->if_snd, m0);
2174 			break;
2175 		}
2176 
2177 		enq++;
2178 		ETHER_BPF_MTAP(ifp, m0);
2179 	}
2180 
2181 	if (enq > 0) {
2182 		virtqueue_notify(vq);
2183 		txq->vtntx_watchdog = VTNET_TX_TIMEOUT;
2184 	}
2185 }
2186 
2187 static void
2188 vtnet_start(struct ifnet *ifp)
2189 {
2190 	struct vtnet_softc *sc;
2191 	struct vtnet_txq *txq;
2192 
2193 	sc = ifp->if_softc;
2194 	txq = &sc->vtnet_txqs[0];
2195 
2196 	VTNET_TXQ_LOCK(txq);
2197 	vtnet_start_locked(txq, ifp);
2198 	VTNET_TXQ_UNLOCK(txq);
2199 }
2200 
2201 #else /* !VTNET_LEGACY_TX */
2202 
2203 static int
2204 vtnet_txq_mq_start_locked(struct vtnet_txq *txq, struct mbuf *m)
2205 {
2206 	struct vtnet_softc *sc;
2207 	struct virtqueue *vq;
2208 	struct buf_ring *br;
2209 	struct ifnet *ifp;
2210 	int enq, error;
2211 
2212 	sc = txq->vtntx_sc;
2213 	vq = txq->vtntx_vq;
2214 	br = txq->vtntx_br;
2215 	ifp = sc->vtnet_ifp;
2216 	enq = 0;
2217 	error = 0;
2218 
2219 	VTNET_TXQ_LOCK_ASSERT(txq);
2220 
2221 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
2222 	    sc->vtnet_link_active == 0) {
2223 		if (m != NULL)
2224 			error = drbr_enqueue(ifp, br, m);
2225 		return (error);
2226 	}
2227 
2228 	if (m != NULL) {
2229 		error = drbr_enqueue(ifp, br, m);
2230 		if (error)
2231 			return (error);
2232 	}
2233 
2234 	vtnet_txq_eof(txq);
2235 
2236 	while ((m = drbr_peek(ifp, br)) != NULL) {
2237 		error = vtnet_txq_encap(txq, &m);
2238 		if (error) {
2239 			if (m != NULL)
2240 				drbr_putback(ifp, br, m);
2241 			else
2242 				drbr_advance(ifp, br);
2243 			break;
2244 		}
2245 		drbr_advance(ifp, br);
2246 
2247 		enq++;
2248 		ETHER_BPF_MTAP(ifp, m);
2249 	}
2250 
2251 	if (enq > 0) {
2252 		virtqueue_notify(vq);
2253 		txq->vtntx_watchdog = VTNET_TX_TIMEOUT;
2254 	}
2255 
2256 	return (error);
2257 }
2258 
2259 static int
2260 vtnet_txq_mq_start(struct ifnet *ifp, struct mbuf *m)
2261 {
2262 	struct vtnet_softc *sc;
2263 	struct vtnet_txq *txq;
2264 	int i, npairs, error;
2265 
2266 	sc = ifp->if_softc;
2267 	npairs = sc->vtnet_act_vq_pairs;
2268 
2269 	if (m->m_flags & M_FLOWID)
2270 		i = m->m_pkthdr.flowid % npairs;
2271 	else
2272 		i = curcpu % npairs;
2273 
2274 	txq = &sc->vtnet_txqs[i];
2275 
2276 	if (VTNET_TXQ_TRYLOCK(txq) != 0) {
2277 		error = vtnet_txq_mq_start_locked(txq, m);
2278 		VTNET_TXQ_UNLOCK(txq);
2279 	} else {
2280 		error = drbr_enqueue(ifp, txq->vtntx_br, m);
2281 		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_defrtask);
2282 	}
2283 
2284 	return (error);
2285 }
2286 
2287 static void
2288 vtnet_txq_tq_deferred(void *xtxq, int pending)
2289 {
2290 	struct vtnet_softc *sc;
2291 	struct vtnet_txq *txq;
2292 
2293 	txq = xtxq;
2294 	sc = txq->vtntx_sc;
2295 
2296 	VTNET_TXQ_LOCK(txq);
2297 	if (!drbr_empty(sc->vtnet_ifp, txq->vtntx_br))
2298 		vtnet_txq_mq_start_locked(txq, NULL);
2299 	VTNET_TXQ_UNLOCK(txq);
2300 }
2301 
2302 #endif /* VTNET_LEGACY_TX */
2303 
2304 static void
2305 vtnet_txq_tq_intr(void *xtxq, int pending)
2306 {
2307 	struct vtnet_softc *sc;
2308 	struct vtnet_txq *txq;
2309 	struct ifnet *ifp;
2310 
2311 	txq = xtxq;
2312 	sc = txq->vtntx_sc;
2313 	ifp = sc->vtnet_ifp;
2314 
2315 	VTNET_TXQ_LOCK(txq);
2316 
2317 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2318 		VTNET_TXQ_UNLOCK(txq);
2319 		return;
2320 	}
2321 
2322 	vtnet_txq_eof(txq);
2323 
2324 #ifdef VTNET_LEGACY_TX
2325 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2326 		vtnet_start_locked(txq, ifp);
2327 #else
2328 	if (!drbr_empty(ifp, txq->vtntx_br))
2329 		vtnet_txq_mq_start_locked(txq, NULL);
2330 #endif
2331 
2332 	if (vtnet_txq_enable_intr(txq) != 0) {
2333 		vtnet_txq_disable_intr(txq);
2334 		txq->vtntx_stats.vtxs_rescheduled++;
2335 		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
2336 	}
2337 
2338 	VTNET_TXQ_UNLOCK(txq);
2339 }
2340 
2341 static void
2342 vtnet_txq_eof(struct vtnet_txq *txq)
2343 {
2344 	struct virtqueue *vq;
2345 	struct vtnet_tx_header *txhdr;
2346 	struct mbuf *m;
2347 
2348 	vq = txq->vtntx_vq;
2349 	VTNET_TXQ_LOCK_ASSERT(txq);
2350 
2351 	while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) {
2352 		m = txhdr->vth_mbuf;
2353 
2354 		txq->vtntx_stats.vtxs_opackets++;
2355 		txq->vtntx_stats.vtxs_obytes += m->m_pkthdr.len;
2356 		if (m->m_flags & M_MCAST)
2357 			txq->vtntx_stats.vtxs_omcasts++;
2358 
2359 		m_freem(m);
2360 		uma_zfree(vtnet_tx_header_zone, txhdr);
2361 	}
2362 
2363 	if (virtqueue_empty(vq))
2364 		txq->vtntx_watchdog = 0;
2365 }
2366 
2367 static void
2368 vtnet_tx_vq_intr(void *xtxq)
2369 {
2370 	struct vtnet_softc *sc;
2371 	struct vtnet_txq *txq;
2372 	struct ifnet *ifp;
2373 	int tries;
2374 
2375 	txq = xtxq;
2376 	sc = txq->vtntx_sc;
2377 	ifp = sc->vtnet_ifp;
2378 	tries = 0;
2379 
2380 	if (__predict_false(txq->vtntx_id >= sc->vtnet_act_vq_pairs)) {
2381 		/*
2382 		 * Ignore this interrupt. Either this is a spurious interrupt
2383 		 * or multiqueue without per-VQ MSIX so every queue needs to
2384 		 * be polled (a brain dead configuration we could try harder
2385 		 * to avoid).
2386 		 */
2387 		vtnet_txq_disable_intr(txq);
2388 		return;
2389 	}
2390 
2391 again:
2392 	VTNET_TXQ_LOCK(txq);
2393 
2394 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2395 		VTNET_TXQ_UNLOCK(txq);
2396 		return;
2397 	}
2398 
2399 	vtnet_txq_eof(txq);
2400 
2401 #ifdef VTNET_LEGACY_TX
2402 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2403 		vtnet_start_locked(txq, ifp);
2404 #else
2405 	if (!drbr_empty(ifp, txq->vtntx_br))
2406 		vtnet_txq_mq_start_locked(txq, NULL);
2407 #endif
2408 
2409 	if (vtnet_txq_enable_intr(txq) != 0) {
2410 		vtnet_txq_disable_intr(txq);
2411 		/*
2412 		 * This is an occasional race, so retry a few times
2413 		 * before scheduling the taskqueue.
2414 		 */
2415 		VTNET_TXQ_UNLOCK(txq);
2416 		if (tries++ < VTNET_INTR_DISABLE_RETRIES)
2417 			goto again;
2418 		txq->vtntx_stats.vtxs_rescheduled++;
2419 		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
2420 	} else
2421 		VTNET_TXQ_UNLOCK(txq);
2422 }
2423 
2424 static void
2425 vtnet_tx_start_all(struct vtnet_softc *sc)
2426 {
2427 	struct ifnet *ifp;
2428 	struct vtnet_txq *txq;
2429 	int i;
2430 
2431 	ifp = sc->vtnet_ifp;
2432 	VTNET_CORE_LOCK_ASSERT(sc);
2433 
2434 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2435 		txq = &sc->vtnet_txqs[i];
2436 
2437 		VTNET_TXQ_LOCK(txq);
2438 #ifdef VTNET_LEGACY_TX
2439 		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2440 			vtnet_start_locked(txq, ifp);
2441 #else
2442 		if (!drbr_empty(ifp, txq->vtntx_br))
2443 			vtnet_txq_mq_start_locked(txq, NULL);
2444 #endif
2445 		VTNET_TXQ_UNLOCK(txq);
2446 	}
2447 }
2448 
2449 #ifndef VTNET_LEGACY_TX
2450 static void
2451 vtnet_qflush(struct ifnet *ifp)
2452 {
2453 	struct vtnet_softc *sc;
2454 	struct vtnet_txq *txq;
2455 	struct mbuf *m;
2456 	int i;
2457 
2458 	sc = ifp->if_softc;
2459 
2460 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2461 		txq = &sc->vtnet_txqs[i];
2462 
2463 		VTNET_TXQ_LOCK(txq);
2464 		while ((m = buf_ring_dequeue_sc(txq->vtntx_br)) != NULL)
2465 			m_freem(m);
2466 		VTNET_TXQ_UNLOCK(txq);
2467 	}
2468 
2469 	if_qflush(ifp);
2470 }
2471 #endif
2472 
2473 static int
2474 vtnet_watchdog(struct vtnet_txq *txq)
2475 {
2476 	struct vtnet_softc *sc;
2477 
2478 	sc = txq->vtntx_sc;
2479 
2480 	VTNET_TXQ_LOCK(txq);
2481 	if (sc->vtnet_flags & VTNET_FLAG_EVENT_IDX)
2482 		vtnet_txq_eof(txq);
2483 	if (txq->vtntx_watchdog == 0 || --txq->vtntx_watchdog) {
2484 		VTNET_TXQ_UNLOCK(txq);
2485 		return (0);
2486 	}
2487 	VTNET_TXQ_UNLOCK(txq);
2488 
2489 	if_printf(sc->vtnet_ifp, "watchdog timeout on queue %d\n",
2490 	    txq->vtntx_id);
2491 	return (1);
2492 }
2493 
2494 static void
2495 vtnet_rxq_accum_stats(struct vtnet_rxq *rxq, struct vtnet_rxq_stats *accum)
2496 {
2497 	struct vtnet_rxq_stats *st;
2498 
2499 	st = &rxq->vtnrx_stats;
2500 
2501 	accum->vrxs_ipackets += st->vrxs_ipackets;
2502 	accum->vrxs_ibytes += st->vrxs_ibytes;
2503 	accum->vrxs_iqdrops += st->vrxs_iqdrops;
2504 	accum->vrxs_csum += st->vrxs_csum;
2505 	accum->vrxs_csum_failed += st->vrxs_csum_failed;
2506 	accum->vrxs_rescheduled += st->vrxs_rescheduled;
2507 }
2508 
2509 static void
2510 vtnet_txq_accum_stats(struct vtnet_txq *txq, struct vtnet_txq_stats *accum)
2511 {
2512 	struct vtnet_txq_stats *st;
2513 
2514 	st = &txq->vtntx_stats;
2515 
2516 	accum->vtxs_opackets += st->vtxs_opackets;
2517 	accum->vtxs_obytes += st->vtxs_obytes;
2518 	accum->vtxs_csum += st->vtxs_csum;
2519 	accum->vtxs_tso += st->vtxs_tso;
2520 	accum->vtxs_collapsed += st->vtxs_collapsed;
2521 	accum->vtxs_rescheduled += st->vtxs_rescheduled;
2522 }
2523 
2524 static void
2525 vtnet_accumulate_stats(struct vtnet_softc *sc)
2526 {
2527 	struct ifnet *ifp;
2528 	struct vtnet_statistics *st;
2529 	struct vtnet_rxq_stats rxaccum;
2530 	struct vtnet_txq_stats txaccum;
2531 	int i;
2532 
2533 	ifp = sc->vtnet_ifp;
2534 	st = &sc->vtnet_stats;
2535 	bzero(&rxaccum, sizeof(struct vtnet_rxq_stats));
2536 	bzero(&txaccum, sizeof(struct vtnet_txq_stats));
2537 
2538 	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
2539 		vtnet_rxq_accum_stats(&sc->vtnet_rxqs[i], &rxaccum);
2540 		vtnet_txq_accum_stats(&sc->vtnet_txqs[i], &txaccum);
2541 	}
2542 
2543 	st->rx_csum_offloaded = rxaccum.vrxs_csum;
2544 	st->rx_csum_failed = rxaccum.vrxs_csum_failed;
2545 	st->rx_task_rescheduled = rxaccum.vrxs_rescheduled;
2546 	st->tx_csum_offloaded = txaccum.vtxs_csum;
2547 	st->tx_tso_offloaded = txaccum.vtxs_tso;
2548 	st->tx_task_rescheduled = txaccum.vtxs_rescheduled;
2549 
2550 	/*
2551 	 * With the exception of if_ierrors, these ifnet statistics are
2552 	 * only updated in the driver, so just set them to our accumulated
2553 	 * values. if_ierrors is updated in ether_input() for malformed
2554 	 * frames that we should have already discarded.
2555 	 */
2556 	ifp->if_ipackets = rxaccum.vrxs_ipackets;
2557 	ifp->if_iqdrops = rxaccum.vrxs_iqdrops;
2558 	ifp->if_ierrors = rxaccum.vrxs_ierrors;
2559 	ifp->if_opackets = txaccum.vtxs_opackets;
2560 #ifndef VTNET_LEGACY_TX
2561 	ifp->if_obytes = txaccum.vtxs_obytes;
2562 	ifp->if_omcasts = txaccum.vtxs_omcasts;
2563 #endif
2564 }
2565 
2566 static void
2567 vtnet_tick(void *xsc)
2568 {
2569 	struct vtnet_softc *sc;
2570 	struct ifnet *ifp;
2571 	int i, timedout;
2572 
2573 	sc = xsc;
2574 	ifp = sc->vtnet_ifp;
2575 	timedout = 0;
2576 
2577 	VTNET_CORE_LOCK_ASSERT(sc);
2578 	vtnet_accumulate_stats(sc);
2579 
2580 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
2581 		timedout |= vtnet_watchdog(&sc->vtnet_txqs[i]);
2582 
2583 	if (timedout != 0) {
2584 		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2585 		vtnet_init_locked(sc);
2586 	} else
2587 		callout_schedule(&sc->vtnet_tick_ch, hz);
2588 }
2589 
2590 static void
2591 vtnet_start_taskqueues(struct vtnet_softc *sc)
2592 {
2593 	device_t dev;
2594 	struct vtnet_rxq *rxq;
2595 	struct vtnet_txq *txq;
2596 	int i, error;
2597 
2598 	dev = sc->vtnet_dev;
2599 
2600 	/*
2601 	 * Errors here are very difficult to recover from - we cannot
2602 	 * easily fail because, if this is during boot, we will hang
2603 	 * when freeing any successfully started taskqueues because
2604 	 * the scheduler isn't up yet.
2605 	 *
2606 	 * Most drivers just ignore the return value - it only fails
2607 	 * with ENOMEM so an error is not likely.
2608 	 */
2609 	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
2610 		rxq = &sc->vtnet_rxqs[i];
2611 		error = taskqueue_start_threads(&rxq->vtnrx_tq, 1, PI_NET,
2612 		    "%s rxq %d", device_get_nameunit(dev), rxq->vtnrx_id);
2613 		if (error) {
2614 			device_printf(dev, "failed to start rx taskq %d\n",
2615 			    rxq->vtnrx_id);
2616 		}
2617 
2618 		txq = &sc->vtnet_txqs[i];
2619 		error = taskqueue_start_threads(&txq->vtntx_tq, 1, PI_NET,
2620 		    "%s txq %d", device_get_nameunit(dev), txq->vtntx_id);
2621 		if (error) {
2622 			device_printf(dev, "failed to start tx taskq %d\n",
2623 			    txq->vtntx_id);
2624 		}
2625 	}
2626 }
2627 
2628 static void
2629 vtnet_free_taskqueues(struct vtnet_softc *sc)
2630 {
2631 	struct vtnet_rxq *rxq;
2632 	struct vtnet_txq *txq;
2633 	int i;
2634 
2635 	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
2636 		rxq = &sc->vtnet_rxqs[i];
2637 		if (rxq->vtnrx_tq != NULL) {
2638 			taskqueue_free(rxq->vtnrx_tq);
2639 			rxq->vtnrx_vq = NULL;
2640 		}
2641 
2642 		txq = &sc->vtnet_txqs[i];
2643 		if (txq->vtntx_tq != NULL) {
2644 			taskqueue_free(txq->vtntx_tq);
2645 			txq->vtntx_tq = NULL;
2646 		}
2647 	}
2648 }
2649 
2650 static void
2651 vtnet_drain_taskqueues(struct vtnet_softc *sc)
2652 {
2653 	struct vtnet_rxq *rxq;
2654 	struct vtnet_txq *txq;
2655 	int i;
2656 
2657 	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
2658 		rxq = &sc->vtnet_rxqs[i];
2659 		if (rxq->vtnrx_tq != NULL)
2660 			taskqueue_drain(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
2661 
2662 		txq = &sc->vtnet_txqs[i];
2663 		if (txq->vtntx_tq != NULL) {
2664 			taskqueue_drain(txq->vtntx_tq, &txq->vtntx_intrtask);
2665 #ifndef VTNET_LEGACY_TX
2666 			taskqueue_drain(txq->vtntx_tq, &txq->vtntx_defrtask);
2667 #endif
2668 		}
2669 	}
2670 }
2671 
2672 static void
2673 vtnet_drain_rxtx_queues(struct vtnet_softc *sc)
2674 {
2675 	struct vtnet_rxq *rxq;
2676 	struct vtnet_txq *txq;
2677 	int i;
2678 
2679 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2680 		rxq = &sc->vtnet_rxqs[i];
2681 		vtnet_rxq_free_mbufs(rxq);
2682 
2683 		txq = &sc->vtnet_txqs[i];
2684 		vtnet_txq_free_mbufs(txq);
2685 	}
2686 }
2687 
2688 static void
2689 vtnet_stop_rendezvous(struct vtnet_softc *sc)
2690 {
2691 	struct vtnet_rxq *rxq;
2692 	struct vtnet_txq *txq;
2693 	int i;
2694 
2695 	/*
2696 	 * Lock and unlock the per-queue mutex so we known the stop
2697 	 * state is visible. Doing only the active queues should be
2698 	 * sufficient, but it does not cost much extra to do all the
2699 	 * queues. Note we hold the core mutex here too.
2700 	 */
2701 	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
2702 		rxq = &sc->vtnet_rxqs[i];
2703 		VTNET_RXQ_LOCK(rxq);
2704 		VTNET_RXQ_UNLOCK(rxq);
2705 
2706 		txq = &sc->vtnet_txqs[i];
2707 		VTNET_TXQ_LOCK(txq);
2708 		VTNET_TXQ_UNLOCK(txq);
2709 	}
2710 }
2711 
2712 static void
2713 vtnet_stop(struct vtnet_softc *sc)
2714 {
2715 	device_t dev;
2716 	struct ifnet *ifp;
2717 
2718 	dev = sc->vtnet_dev;
2719 	ifp = sc->vtnet_ifp;
2720 
2721 	VTNET_CORE_LOCK_ASSERT(sc);
2722 
2723 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2724 	sc->vtnet_link_active = 0;
2725 	callout_stop(&sc->vtnet_tick_ch);
2726 
2727 	/* Only advisory. */
2728 	vtnet_disable_interrupts(sc);
2729 
2730 	/*
2731 	 * Stop the host adapter. This resets it to the pre-initialized
2732 	 * state. It will not generate any interrupts until after it is
2733 	 * reinitialized.
2734 	 */
2735 	virtio_stop(dev);
2736 	vtnet_stop_rendezvous(sc);
2737 
2738 	/* Free any mbufs left in the virtqueues. */
2739 	vtnet_drain_rxtx_queues(sc);
2740 }
2741 
2742 static int
2743 vtnet_virtio_reinit(struct vtnet_softc *sc)
2744 {
2745 	device_t dev;
2746 	struct ifnet *ifp;
2747 	uint64_t features;
2748 	int mask, error;
2749 
2750 	dev = sc->vtnet_dev;
2751 	ifp = sc->vtnet_ifp;
2752 	features = sc->vtnet_features;
2753 
2754 	mask = 0;
2755 #if defined(INET)
2756 	mask |= IFCAP_RXCSUM;
2757 #endif
2758 #if defined (INET6)
2759 	mask |= IFCAP_RXCSUM_IPV6;
2760 #endif
2761 
2762 	/*
2763 	 * Re-negotiate with the host, removing any disabled receive
2764 	 * features. Transmit features are disabled only on our side
2765 	 * via if_capenable and if_hwassist.
2766 	 */
2767 
2768 	if (ifp->if_capabilities & mask) {
2769 		/*
2770 		 * We require both IPv4 and IPv6 offloading to be enabled
2771 		 * in order to negotiated it: VirtIO does not distinguish
2772 		 * between the two.
2773 		 */
2774 		if ((ifp->if_capenable & mask) != mask)
2775 			features &= ~VIRTIO_NET_F_GUEST_CSUM;
2776 	}
2777 
2778 	if (ifp->if_capabilities & IFCAP_LRO) {
2779 		if ((ifp->if_capenable & IFCAP_LRO) == 0)
2780 			features &= ~VTNET_LRO_FEATURES;
2781 	}
2782 
2783 	if (ifp->if_capabilities & IFCAP_VLAN_HWFILTER) {
2784 		if ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0)
2785 			features &= ~VIRTIO_NET_F_CTRL_VLAN;
2786 	}
2787 
2788 	error = virtio_reinit(dev, features);
2789 	if (error)
2790 		device_printf(dev, "virtio reinit error %d\n", error);
2791 
2792 	return (error);
2793 }
2794 
2795 static void
2796 vtnet_init_rx_filters(struct vtnet_softc *sc)
2797 {
2798 	struct ifnet *ifp;
2799 
2800 	ifp = sc->vtnet_ifp;
2801 
2802 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
2803 		/* Restore promiscuous and all-multicast modes. */
2804 		vtnet_rx_filter(sc);
2805 		/* Restore filtered MAC addresses. */
2806 		vtnet_rx_filter_mac(sc);
2807 	}
2808 
2809 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
2810 		vtnet_rx_filter_vlan(sc);
2811 }
2812 
2813 static int
2814 vtnet_init_rx_queues(struct vtnet_softc *sc)
2815 {
2816 	device_t dev;
2817 	struct vtnet_rxq *rxq;
2818 	int i, clsize, error;
2819 
2820 	dev = sc->vtnet_dev;
2821 
2822 	/*
2823 	 * Use the new cluster size if one has been set (via a MTU
2824 	 * change). Otherwise, use the standard 2K clusters.
2825 	 *
2826 	 * BMV: It might make sense to use page sized clusters as
2827 	 * the default (depending on the features negotiated).
2828 	 */
2829 	if (sc->vtnet_rx_new_clsize != 0) {
2830 		clsize = sc->vtnet_rx_new_clsize;
2831 		sc->vtnet_rx_new_clsize = 0;
2832 	} else
2833 		clsize = MCLBYTES;
2834 
2835 	sc->vtnet_rx_clsize = clsize;
2836 	sc->vtnet_rx_nmbufs = VTNET_NEEDED_RX_MBUFS(sc, clsize);
2837 
2838 	/* The first segment is reserved for the header. */
2839 	KASSERT(sc->vtnet_rx_nmbufs < VTNET_MAX_RX_SEGS,
2840 	    ("%s: too many rx mbufs %d", __func__, sc->vtnet_rx_nmbufs));
2841 
2842 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2843 		rxq = &sc->vtnet_rxqs[i];
2844 
2845 		/* Hold the lock to satisfy asserts. */
2846 		VTNET_RXQ_LOCK(rxq);
2847 		error = vtnet_rxq_populate(rxq);
2848 		VTNET_RXQ_UNLOCK(rxq);
2849 
2850 		if (error) {
2851 			device_printf(dev,
2852 			    "cannot allocate mbufs for Rx queue %d\n", i);
2853 			return (error);
2854 		}
2855 	}
2856 
2857 	return (0);
2858 }
2859 
2860 static int
2861 vtnet_init_tx_queues(struct vtnet_softc *sc)
2862 {
2863 	struct vtnet_txq *txq;
2864 	int i;
2865 
2866 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2867 		txq = &sc->vtnet_txqs[i];
2868 		txq->vtntx_watchdog = 0;
2869 	}
2870 
2871 	return (0);
2872 }
2873 
2874 static int
2875 vtnet_init_rxtx_queues(struct vtnet_softc *sc)
2876 {
2877 	int error;
2878 
2879 	error = vtnet_init_rx_queues(sc);
2880 	if (error)
2881 		return (error);
2882 
2883 	error = vtnet_init_tx_queues(sc);
2884 	if (error)
2885 		return (error);
2886 
2887 	return (0);
2888 }
2889 
2890 static void
2891 vtnet_set_active_vq_pairs(struct vtnet_softc *sc)
2892 {
2893 	device_t dev;
2894 	int npairs;
2895 
2896 	dev = sc->vtnet_dev;
2897 
2898 	if ((sc->vtnet_flags & VTNET_FLAG_MULTIQ) == 0) {
2899 		MPASS(sc->vtnet_max_vq_pairs == 1);
2900 		sc->vtnet_act_vq_pairs = 1;
2901 		return;
2902 	}
2903 
2904 	/* BMV: Just use the maximum configured for now. */
2905 	npairs = sc->vtnet_max_vq_pairs;
2906 
2907 	if (vtnet_ctrl_mq_cmd(sc, npairs) != 0) {
2908 		device_printf(dev,
2909 		    "cannot set active queue pairs to %d\n", npairs);
2910 		npairs = 1;
2911 	}
2912 
2913 	sc->vtnet_act_vq_pairs = npairs;
2914 }
2915 
2916 static int
2917 vtnet_reinit(struct vtnet_softc *sc)
2918 {
2919 	device_t dev;
2920 	struct ifnet *ifp;
2921 	int error;
2922 
2923 	dev = sc->vtnet_dev;
2924 	ifp = sc->vtnet_ifp;
2925 
2926 	/* Use the current MAC address. */
2927 	bcopy(IF_LLADDR(ifp), sc->vtnet_hwaddr, ETHER_ADDR_LEN);
2928 	vtnet_set_hwaddr(sc);
2929 
2930 	vtnet_set_active_vq_pairs(sc);
2931 
2932 	ifp->if_hwassist = 0;
2933 	if (ifp->if_capenable & IFCAP_TXCSUM)
2934 		ifp->if_hwassist |= VTNET_CSUM_OFFLOAD;
2935 	if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
2936 		ifp->if_hwassist |= VTNET_CSUM_OFFLOAD_IPV6;
2937 	if (ifp->if_capenable & IFCAP_TSO4)
2938 		ifp->if_hwassist |= CSUM_TSO;
2939 	if (ifp->if_capenable & IFCAP_TSO6)
2940 		ifp->if_hwassist |= CSUM_TSO; /* No CSUM_TSO_IPV6. */
2941 
2942 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ)
2943 		vtnet_init_rx_filters(sc);
2944 
2945 	error = vtnet_init_rxtx_queues(sc);
2946 	if (error)
2947 		return (error);
2948 
2949 	vtnet_enable_interrupts(sc);
2950 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
2951 
2952 	return (0);
2953 }
2954 
2955 static void
2956 vtnet_init_locked(struct vtnet_softc *sc)
2957 {
2958 	device_t dev;
2959 	struct ifnet *ifp;
2960 
2961 	dev = sc->vtnet_dev;
2962 	ifp = sc->vtnet_ifp;
2963 
2964 	VTNET_CORE_LOCK_ASSERT(sc);
2965 
2966 	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2967 		return;
2968 
2969 	vtnet_stop(sc);
2970 
2971 	/* Reinitialize with the host. */
2972 	if (vtnet_virtio_reinit(sc) != 0)
2973 		goto fail;
2974 
2975 	if (vtnet_reinit(sc) != 0)
2976 		goto fail;
2977 
2978 	virtio_reinit_complete(dev);
2979 
2980 	vtnet_update_link_status(sc);
2981 	callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc);
2982 
2983 	return;
2984 
2985 fail:
2986 	vtnet_stop(sc);
2987 }
2988 
2989 static void
2990 vtnet_init(void *xsc)
2991 {
2992 	struct vtnet_softc *sc;
2993 
2994 	sc = xsc;
2995 
2996 	VTNET_CORE_LOCK(sc);
2997 	vtnet_init_locked(sc);
2998 	VTNET_CORE_UNLOCK(sc);
2999 }
3000 
3001 static void
3002 vtnet_free_ctrl_vq(struct vtnet_softc *sc)
3003 {
3004 	struct virtqueue *vq;
3005 
3006 	vq = sc->vtnet_ctrl_vq;
3007 
3008 	/*
3009 	 * The control virtqueue is only polled and therefore it should
3010 	 * already be empty.
3011 	 */
3012 	KASSERT(virtqueue_empty(vq),
3013 	    ("%s: ctrl vq %p not empty", __func__, vq));
3014 }
3015 
3016 static void
3017 vtnet_exec_ctrl_cmd(struct vtnet_softc *sc, void *cookie,
3018     struct sglist *sg, int readable, int writable)
3019 {
3020 	struct virtqueue *vq;
3021 
3022 	vq = sc->vtnet_ctrl_vq;
3023 
3024 	VTNET_CORE_LOCK_ASSERT(sc);
3025 	KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_VQ,
3026 	    ("%s: CTRL_VQ feature not negotiated", __func__));
3027 
3028 	if (!virtqueue_empty(vq))
3029 		return;
3030 	if (virtqueue_enqueue(vq, cookie, sg, readable, writable) != 0)
3031 		return;
3032 
3033 	/*
3034 	 * Poll for the response, but the command is likely already
3035 	 * done when we return from the notify.
3036 	 */
3037 	virtqueue_notify(vq);
3038 	virtqueue_poll(vq, NULL);
3039 }
3040 
3041 static int
3042 vtnet_ctrl_mac_cmd(struct vtnet_softc *sc, uint8_t *hwaddr)
3043 {
3044 	struct virtio_net_ctrl_hdr hdr;
3045 	struct sglist_seg segs[3];
3046 	struct sglist sg;
3047 	uint8_t ack;
3048 	int error;
3049 
3050 	hdr.class = VIRTIO_NET_CTRL_MAC;
3051 	hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
3052 	ack = VIRTIO_NET_ERR;
3053 
3054 	sglist_init(&sg, 3, segs);
3055 	error = 0;
3056 	error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
3057 	error |= sglist_append(&sg, hwaddr, ETHER_ADDR_LEN);
3058 	error |= sglist_append(&sg, &ack, sizeof(uint8_t));
3059 	KASSERT(error == 0 && sg.sg_nseg == 3,
3060 	    ("%s: error %d adding set MAC msg to sglist", __func__, error));
3061 
3062 	vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
3063 
3064 	return (ack == VIRTIO_NET_OK ? 0 : EIO);
3065 }
3066 
3067 static int
3068 vtnet_ctrl_mq_cmd(struct vtnet_softc *sc, uint16_t npairs)
3069 {
3070 	struct sglist_seg segs[3];
3071 	struct sglist sg;
3072 	struct {
3073 		struct virtio_net_ctrl_hdr hdr;
3074 		uint8_t pad1;
3075 		struct virtio_net_ctrl_mq mq;
3076 		uint8_t pad2;
3077 		uint8_t ack;
3078 	} s;
3079 	int error;
3080 
3081 	s.hdr.class = VIRTIO_NET_CTRL_MQ;
3082 	s.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
3083 	s.mq.virtqueue_pairs = npairs;
3084 	s.ack = VIRTIO_NET_ERR;
3085 
3086 	sglist_init(&sg, 3, segs);
3087 	error = 0;
3088 	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3089 	error |= sglist_append(&sg, &s.mq, sizeof(struct virtio_net_ctrl_mq));
3090 	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3091 	KASSERT(error == 0 && sg.sg_nseg == 3,
3092 	    ("%s: error %d adding MQ message to sglist", __func__, error));
3093 
3094 	vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3095 
3096 	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3097 }
3098 
3099 static int
3100 vtnet_ctrl_rx_cmd(struct vtnet_softc *sc, int cmd, int on)
3101 {
3102 	struct sglist_seg segs[3];
3103 	struct sglist sg;
3104 	struct {
3105 		struct virtio_net_ctrl_hdr hdr;
3106 		uint8_t pad1;
3107 		uint8_t onoff;
3108 		uint8_t pad2;
3109 		uint8_t ack;
3110 	} s;
3111 	int error;
3112 
3113 	KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX,
3114 	    ("%s: CTRL_RX feature not negotiated", __func__));
3115 
3116 	s.hdr.class = VIRTIO_NET_CTRL_RX;
3117 	s.hdr.cmd = cmd;
3118 	s.onoff = !!on;
3119 	s.ack = VIRTIO_NET_ERR;
3120 
3121 	sglist_init(&sg, 3, segs);
3122 	error = 0;
3123 	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3124 	error |= sglist_append(&sg, &s.onoff, sizeof(uint8_t));
3125 	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3126 	KASSERT(error == 0 && sg.sg_nseg == 3,
3127 	    ("%s: error %d adding Rx message to sglist", __func__, error));
3128 
3129 	vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3130 
3131 	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3132 }
3133 
3134 static int
3135 vtnet_set_promisc(struct vtnet_softc *sc, int on)
3136 {
3137 
3138 	return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_PROMISC, on));
3139 }
3140 
3141 static int
3142 vtnet_set_allmulti(struct vtnet_softc *sc, int on)
3143 {
3144 
3145 	return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, on));
3146 }
3147 
3148 /*
3149  * The device defaults to promiscuous mode for backwards compatibility.
3150  * Turn it off at attach time if possible.
3151  */
3152 static void
3153 vtnet_attach_disable_promisc(struct vtnet_softc *sc)
3154 {
3155 	struct ifnet *ifp;
3156 
3157 	ifp = sc->vtnet_ifp;
3158 
3159 	VTNET_CORE_LOCK(sc);
3160 	if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0) {
3161 		ifp->if_flags |= IFF_PROMISC;
3162 	} else if (vtnet_set_promisc(sc, 0) != 0) {
3163 		ifp->if_flags |= IFF_PROMISC;
3164 		device_printf(sc->vtnet_dev,
3165 		    "cannot disable default promiscuous mode\n");
3166 	}
3167 	VTNET_CORE_UNLOCK(sc);
3168 }
3169 
3170 static void
3171 vtnet_rx_filter(struct vtnet_softc *sc)
3172 {
3173 	device_t dev;
3174 	struct ifnet *ifp;
3175 
3176 	dev = sc->vtnet_dev;
3177 	ifp = sc->vtnet_ifp;
3178 
3179 	VTNET_CORE_LOCK_ASSERT(sc);
3180 
3181 	if (vtnet_set_promisc(sc, ifp->if_flags & IFF_PROMISC) != 0)
3182 		device_printf(dev, "cannot %s promiscuous mode\n",
3183 		    ifp->if_flags & IFF_PROMISC ? "enable" : "disable");
3184 
3185 	if (vtnet_set_allmulti(sc, ifp->if_flags & IFF_ALLMULTI) != 0)
3186 		device_printf(dev, "cannot %s all-multicast mode\n",
3187 		    ifp->if_flags & IFF_ALLMULTI ? "enable" : "disable");
3188 }
3189 
3190 static void
3191 vtnet_rx_filter_mac(struct vtnet_softc *sc)
3192 {
3193 	struct virtio_net_ctrl_hdr hdr;
3194 	struct vtnet_mac_filter *filter;
3195 	struct sglist_seg segs[4];
3196 	struct sglist sg;
3197 	struct ifnet *ifp;
3198 	struct ifaddr *ifa;
3199 	struct ifmultiaddr *ifma;
3200 	int ucnt, mcnt, promisc, allmulti, error;
3201 	uint8_t ack;
3202 
3203 	ifp = sc->vtnet_ifp;
3204 	filter = sc->vtnet_mac_filter;
3205 	ucnt = 0;
3206 	mcnt = 0;
3207 	promisc = 0;
3208 	allmulti = 0;
3209 
3210 	VTNET_CORE_LOCK_ASSERT(sc);
3211 	KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX,
3212 	    ("%s: CTRL_RX feature not negotiated", __func__));
3213 
3214 	/* Unicast MAC addresses: */
3215 	if_addr_rlock(ifp);
3216 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
3217 		if (ifa->ifa_addr->sa_family != AF_LINK)
3218 			continue;
3219 		else if (memcmp(LLADDR((struct sockaddr_dl *)ifa->ifa_addr),
3220 		    sc->vtnet_hwaddr, ETHER_ADDR_LEN) == 0)
3221 			continue;
3222 		else if (ucnt == VTNET_MAX_MAC_ENTRIES) {
3223 			promisc = 1;
3224 			break;
3225 		}
3226 
3227 		bcopy(LLADDR((struct sockaddr_dl *)ifa->ifa_addr),
3228 		    &filter->vmf_unicast.macs[ucnt], ETHER_ADDR_LEN);
3229 		ucnt++;
3230 	}
3231 	if_addr_runlock(ifp);
3232 
3233 	if (promisc != 0) {
3234 		filter->vmf_unicast.nentries = 0;
3235 		if_printf(ifp, "more than %d MAC addresses assigned, "
3236 		    "falling back to promiscuous mode\n",
3237 		    VTNET_MAX_MAC_ENTRIES);
3238 	} else
3239 		filter->vmf_unicast.nentries = ucnt;
3240 
3241 	/* Multicast MAC addresses: */
3242 	if_maddr_rlock(ifp);
3243 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
3244 		if (ifma->ifma_addr->sa_family != AF_LINK)
3245 			continue;
3246 		else if (mcnt == VTNET_MAX_MAC_ENTRIES) {
3247 			allmulti = 1;
3248 			break;
3249 		}
3250 
3251 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
3252 		    &filter->vmf_multicast.macs[mcnt], ETHER_ADDR_LEN);
3253 		mcnt++;
3254 	}
3255 	if_maddr_runlock(ifp);
3256 
3257 	if (allmulti != 0) {
3258 		filter->vmf_multicast.nentries = 0;
3259 		if_printf(ifp, "more than %d multicast MAC addresses "
3260 		    "assigned, falling back to all-multicast mode\n",
3261 		    VTNET_MAX_MAC_ENTRIES);
3262 	} else
3263 		filter->vmf_multicast.nentries = mcnt;
3264 
3265 	if (promisc != 0 && allmulti != 0)
3266 		goto out;
3267 
3268 	hdr.class = VIRTIO_NET_CTRL_MAC;
3269 	hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
3270 	ack = VIRTIO_NET_ERR;
3271 
3272 	sglist_init(&sg, 4, segs);
3273 	error = 0;
3274 	error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
3275 	error |= sglist_append(&sg, &filter->vmf_unicast,
3276 	    sizeof(uint32_t) + filter->vmf_unicast.nentries * ETHER_ADDR_LEN);
3277 	error |= sglist_append(&sg, &filter->vmf_multicast,
3278 	    sizeof(uint32_t) + filter->vmf_multicast.nentries * ETHER_ADDR_LEN);
3279 	error |= sglist_append(&sg, &ack, sizeof(uint8_t));
3280 	KASSERT(error == 0 && sg.sg_nseg == 4,
3281 	    ("%s: error %d adding MAC filter msg to sglist", __func__, error));
3282 
3283 	vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
3284 
3285 	if (ack != VIRTIO_NET_OK)
3286 		if_printf(ifp, "error setting host MAC filter table\n");
3287 
3288 out:
3289 	if (promisc != 0 && vtnet_set_promisc(sc, 1) != 0)
3290 		if_printf(ifp, "cannot enable promiscuous mode\n");
3291 	if (allmulti != 0 && vtnet_set_allmulti(sc, 1) != 0)
3292 		if_printf(ifp, "cannot enable all-multicast mode\n");
3293 }
3294 
3295 static int
3296 vtnet_exec_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
3297 {
3298 	struct sglist_seg segs[3];
3299 	struct sglist sg;
3300 	struct {
3301 		struct virtio_net_ctrl_hdr hdr;
3302 		uint8_t pad1;
3303 		uint16_t tag;
3304 		uint8_t pad2;
3305 		uint8_t ack;
3306 	} s;
3307 	int error;
3308 
3309 	s.hdr.class = VIRTIO_NET_CTRL_VLAN;
3310 	s.hdr.cmd = add ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
3311 	s.tag = tag;
3312 	s.ack = VIRTIO_NET_ERR;
3313 
3314 	sglist_init(&sg, 3, segs);
3315 	error = 0;
3316 	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3317 	error |= sglist_append(&sg, &s.tag, sizeof(uint16_t));
3318 	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3319 	KASSERT(error == 0 && sg.sg_nseg == 3,
3320 	    ("%s: error %d adding VLAN message to sglist", __func__, error));
3321 
3322 	vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3323 
3324 	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3325 }
3326 
3327 static void
3328 vtnet_rx_filter_vlan(struct vtnet_softc *sc)
3329 {
3330 	uint32_t w;
3331 	uint16_t tag;
3332 	int i, bit;
3333 
3334 	VTNET_CORE_LOCK_ASSERT(sc);
3335 	KASSERT(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER,
3336 	    ("%s: VLAN_FILTER feature not negotiated", __func__));
3337 
3338 	/* Enable the filter for each configured VLAN. */
3339 	for (i = 0; i < VTNET_VLAN_FILTER_NWORDS; i++) {
3340 		w = sc->vtnet_vlan_filter[i];
3341 
3342 		while ((bit = ffs(w) - 1) != -1) {
3343 			w &= ~(1 << bit);
3344 			tag = sizeof(w) * CHAR_BIT * i + bit;
3345 
3346 			if (vtnet_exec_vlan_filter(sc, 1, tag) != 0) {
3347 				device_printf(sc->vtnet_dev,
3348 				    "cannot enable VLAN %d filter\n", tag);
3349 			}
3350 		}
3351 	}
3352 }
3353 
3354 static void
3355 vtnet_update_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
3356 {
3357 	struct ifnet *ifp;
3358 	int idx, bit;
3359 
3360 	ifp = sc->vtnet_ifp;
3361 	idx = (tag >> 5) & 0x7F;
3362 	bit = tag & 0x1F;
3363 
3364 	if (tag == 0 || tag > 4095)
3365 		return;
3366 
3367 	VTNET_CORE_LOCK(sc);
3368 
3369 	if (add)
3370 		sc->vtnet_vlan_filter[idx] |= (1 << bit);
3371 	else
3372 		sc->vtnet_vlan_filter[idx] &= ~(1 << bit);
3373 
3374 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER &&
3375 	    vtnet_exec_vlan_filter(sc, add, tag) != 0) {
3376 		device_printf(sc->vtnet_dev,
3377 		    "cannot %s VLAN %d %s the host filter table\n",
3378 		    add ? "add" : "remove", tag, add ? "to" : "from");
3379 	}
3380 
3381 	VTNET_CORE_UNLOCK(sc);
3382 }
3383 
3384 static void
3385 vtnet_register_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
3386 {
3387 
3388 	if (ifp->if_softc != arg)
3389 		return;
3390 
3391 	vtnet_update_vlan_filter(arg, 1, tag);
3392 }
3393 
3394 static void
3395 vtnet_unregister_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
3396 {
3397 
3398 	if (ifp->if_softc != arg)
3399 		return;
3400 
3401 	vtnet_update_vlan_filter(arg, 0, tag);
3402 }
3403 
3404 static int
3405 vtnet_is_link_up(struct vtnet_softc *sc)
3406 {
3407 	device_t dev;
3408 	struct ifnet *ifp;
3409 	uint16_t status;
3410 
3411 	dev = sc->vtnet_dev;
3412 	ifp = sc->vtnet_ifp;
3413 
3414 	if ((ifp->if_capabilities & IFCAP_LINKSTATE) == 0)
3415 		status = VIRTIO_NET_S_LINK_UP;
3416 	else
3417 		status = virtio_read_dev_config_2(dev,
3418 		    offsetof(struct virtio_net_config, status));
3419 
3420 	return ((status & VIRTIO_NET_S_LINK_UP) != 0);
3421 }
3422 
3423 static void
3424 vtnet_update_link_status(struct vtnet_softc *sc)
3425 {
3426 	struct ifnet *ifp;
3427 	int link;
3428 
3429 	ifp = sc->vtnet_ifp;
3430 
3431 	VTNET_CORE_LOCK_ASSERT(sc);
3432 	link = vtnet_is_link_up(sc);
3433 
3434 	/* Notify if the link status has changed. */
3435 	if (link != 0 && sc->vtnet_link_active == 0) {
3436 		sc->vtnet_link_active = 1;
3437 		if_link_state_change(ifp, LINK_STATE_UP);
3438 	} else if (link == 0 && sc->vtnet_link_active != 0) {
3439 		sc->vtnet_link_active = 0;
3440 		if_link_state_change(ifp, LINK_STATE_DOWN);
3441 	}
3442 }
3443 
3444 static int
3445 vtnet_ifmedia_upd(struct ifnet *ifp)
3446 {
3447 	struct vtnet_softc *sc;
3448 	struct ifmedia *ifm;
3449 
3450 	sc = ifp->if_softc;
3451 	ifm = &sc->vtnet_media;
3452 
3453 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
3454 		return (EINVAL);
3455 
3456 	return (0);
3457 }
3458 
3459 static void
3460 vtnet_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
3461 {
3462 	struct vtnet_softc *sc;
3463 
3464 	sc = ifp->if_softc;
3465 
3466 	ifmr->ifm_status = IFM_AVALID;
3467 	ifmr->ifm_active = IFM_ETHER;
3468 
3469 	VTNET_CORE_LOCK(sc);
3470 	if (vtnet_is_link_up(sc) != 0) {
3471 		ifmr->ifm_status |= IFM_ACTIVE;
3472 		ifmr->ifm_active |= VTNET_MEDIATYPE;
3473 	} else
3474 		ifmr->ifm_active |= IFM_NONE;
3475 	VTNET_CORE_UNLOCK(sc);
3476 }
3477 
3478 static void
3479 vtnet_set_hwaddr(struct vtnet_softc *sc)
3480 {
3481 	device_t dev;
3482 
3483 	dev = sc->vtnet_dev;
3484 
3485 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_MAC) {
3486 		if (vtnet_ctrl_mac_cmd(sc, sc->vtnet_hwaddr) != 0)
3487 			device_printf(dev, "unable to set MAC address\n");
3488 	} else if (sc->vtnet_flags & VTNET_FLAG_MAC) {
3489 		virtio_write_device_config(dev,
3490 		    offsetof(struct virtio_net_config, mac),
3491 		    sc->vtnet_hwaddr, ETHER_ADDR_LEN);
3492 	}
3493 }
3494 
3495 static void
3496 vtnet_get_hwaddr(struct vtnet_softc *sc)
3497 {
3498 	device_t dev;
3499 
3500 	dev = sc->vtnet_dev;
3501 
3502 	if ((sc->vtnet_flags & VTNET_FLAG_MAC) == 0) {
3503 		/*
3504 		 * Generate a random locally administered unicast address.
3505 		 *
3506 		 * It would be nice to generate the same MAC address across
3507 		 * reboots, but it seems all the hosts currently available
3508 		 * support the MAC feature, so this isn't too important.
3509 		 */
3510 		sc->vtnet_hwaddr[0] = 0xB2;
3511 		arc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1, 0);
3512 		vtnet_set_hwaddr(sc);
3513 		return;
3514 	}
3515 
3516 	virtio_read_device_config(dev, offsetof(struct virtio_net_config, mac),
3517 	    sc->vtnet_hwaddr, ETHER_ADDR_LEN);
3518 }
3519 
3520 static void
3521 vtnet_vlan_tag_remove(struct mbuf *m)
3522 {
3523 	struct ether_vlan_header *evh;
3524 
3525 	evh = mtod(m, struct ether_vlan_header *);
3526 	m->m_pkthdr.ether_vtag = ntohs(evh->evl_tag);
3527 	m->m_flags |= M_VLANTAG;
3528 
3529 	/* Strip the 802.1Q header. */
3530 	bcopy((char *) evh, (char *) evh + ETHER_VLAN_ENCAP_LEN,
3531 	    ETHER_HDR_LEN - ETHER_TYPE_LEN);
3532 	m_adj(m, ETHER_VLAN_ENCAP_LEN);
3533 }
3534 
3535 static void
3536 vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *ctx,
3537     struct sysctl_oid_list *child, struct vtnet_rxq *rxq)
3538 {
3539 	struct sysctl_oid *node;
3540 	struct sysctl_oid_list *list;
3541 	struct vtnet_rxq_stats *stats;
3542 	char namebuf[16];
3543 
3544 	snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vtnrx_id);
3545 	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
3546 	    CTLFLAG_RD, NULL, "Receive Queue");
3547 	list = SYSCTL_CHILDREN(node);
3548 
3549 	stats = &rxq->vtnrx_stats;
3550 
3551 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD,
3552 	    &stats->vrxs_ipackets, "Receive packets");
3553 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD,
3554 	    &stats->vrxs_ibytes, "Receive bytes");
3555 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD,
3556 	    &stats->vrxs_iqdrops, "Receive drops");
3557 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD,
3558 	    &stats->vrxs_ierrors, "Receive errors");
3559 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
3560 	    &stats->vrxs_csum, "Receive checksum offloaded");
3561 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum_failed", CTLFLAG_RD,
3562 	    &stats->vrxs_csum_failed, "Receive checksum offload failed");
3563 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD,
3564 	    &stats->vrxs_rescheduled,
3565 	    "Receive interrupt handler rescheduled");
3566 }
3567 
3568 static void
3569 vtnet_setup_txq_sysctl(struct sysctl_ctx_list *ctx,
3570     struct sysctl_oid_list *child, struct vtnet_txq *txq)
3571 {
3572 	struct sysctl_oid *node;
3573 	struct sysctl_oid_list *list;
3574 	struct vtnet_txq_stats *stats;
3575 	char namebuf[16];
3576 
3577 	snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vtntx_id);
3578 	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
3579 	    CTLFLAG_RD, NULL, "Transmit Queue");
3580 	list = SYSCTL_CHILDREN(node);
3581 
3582 	stats = &txq->vtntx_stats;
3583 
3584 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD,
3585 	    &stats->vtxs_opackets, "Transmit packets");
3586 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD,
3587 	    &stats->vtxs_obytes, "Transmit bytes");
3588 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD,
3589 	    &stats->vtxs_omcasts, "Transmit multicasts");
3590 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
3591 	    &stats->vtxs_csum, "Transmit checksum offloaded");
3592 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD,
3593 	    &stats->vtxs_tso, "Transmit segmentation offloaded");
3594 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "collapsed", CTLFLAG_RD,
3595 	    &stats->vtxs_collapsed, "Transmit mbufs collapsed");
3596 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD,
3597 	    &stats->vtxs_rescheduled,
3598 	    "Transmit interrupt handler rescheduled");
3599 }
3600 
3601 static void
3602 vtnet_setup_queue_sysctl(struct vtnet_softc *sc)
3603 {
3604 	device_t dev;
3605 	struct sysctl_ctx_list *ctx;
3606 	struct sysctl_oid *tree;
3607 	struct sysctl_oid_list *child;
3608 	int i;
3609 
3610 	dev = sc->vtnet_dev;
3611 	ctx = device_get_sysctl_ctx(dev);
3612 	tree = device_get_sysctl_tree(dev);
3613 	child = SYSCTL_CHILDREN(tree);
3614 
3615 	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
3616 		vtnet_setup_rxq_sysctl(ctx, child, &sc->vtnet_rxqs[i]);
3617 		vtnet_setup_txq_sysctl(ctx, child, &sc->vtnet_txqs[i]);
3618 	}
3619 }
3620 
3621 static void
3622 vtnet_setup_stat_sysctl(struct sysctl_ctx_list *ctx,
3623     struct sysctl_oid_list *child, struct vtnet_softc *sc)
3624 {
3625 	struct vtnet_statistics *stats;
3626 
3627 	stats = &sc->vtnet_stats;
3628 
3629 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "mbuf_alloc_failed",
3630 	    CTLFLAG_RD, &stats->mbuf_alloc_failed,
3631 	    "Mbuf cluster allocation failures");
3632 
3633 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_frame_too_large",
3634 	    CTLFLAG_RD, &stats->rx_frame_too_large,
3635 	    "Received frame larger than the mbuf chain");
3636 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_enq_replacement_failed",
3637 	    CTLFLAG_RD, &stats->rx_enq_replacement_failed,
3638 	    "Enqueuing the replacement receive mbuf failed");
3639 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_mergeable_failed",
3640 	    CTLFLAG_RD, &stats->rx_mergeable_failed,
3641 	    "Mergeable buffers receive failures");
3642 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ethtype",
3643 	    CTLFLAG_RD, &stats->rx_csum_bad_ethtype,
3644 	    "Received checksum offloaded buffer with unsupported "
3645 	    "Ethernet type");
3646 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ipproto",
3647 	    CTLFLAG_RD, &stats->rx_csum_bad_ipproto,
3648 	    "Received checksum offloaded buffer with incorrect IP protocol");
3649 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_offset",
3650 	    CTLFLAG_RD, &stats->rx_csum_bad_offset,
3651 	    "Received checksum offloaded buffer with incorrect offset");
3652 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_proto",
3653 	    CTLFLAG_RD, &stats->rx_csum_bad_proto,
3654 	    "Received checksum offloaded buffer with incorrect protocol");
3655 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_failed",
3656 	    CTLFLAG_RD, &stats->rx_csum_failed,
3657 	    "Received buffer checksum offload failed");
3658 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_offloaded",
3659 	    CTLFLAG_RD, &stats->rx_csum_offloaded,
3660 	    "Received buffer checksum offload succeeded");
3661 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_task_rescheduled",
3662 	    CTLFLAG_RD, &stats->rx_task_rescheduled,
3663 	    "Times the receive interrupt task rescheduled itself");
3664 
3665 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_bad_ethtype",
3666 	    CTLFLAG_RD, &stats->tx_csum_bad_ethtype,
3667 	    "Aborted transmit of checksum offloaded buffer with unknown "
3668 	    "Ethernet type");
3669 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_bad_ethtype",
3670 	    CTLFLAG_RD, &stats->tx_tso_bad_ethtype,
3671 	    "Aborted transmit of TSO buffer with unknown Ethernet type");
3672 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_not_tcp",
3673 	    CTLFLAG_RD, &stats->tx_tso_not_tcp,
3674 	    "Aborted transmit of TSO buffer with non TCP protocol");
3675 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_offloaded",
3676 	    CTLFLAG_RD, &stats->tx_csum_offloaded,
3677 	    "Offloaded checksum of transmitted buffer");
3678 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_offloaded",
3679 	    CTLFLAG_RD, &stats->tx_tso_offloaded,
3680 	    "Segmentation offload of transmitted buffer");
3681 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_task_rescheduled",
3682 	    CTLFLAG_RD, &stats->tx_task_rescheduled,
3683 	    "Times the transmit interrupt task rescheduled itself");
3684 }
3685 
3686 static void
3687 vtnet_setup_sysctl(struct vtnet_softc *sc)
3688 {
3689 	device_t dev;
3690 	struct sysctl_ctx_list *ctx;
3691 	struct sysctl_oid *tree;
3692 	struct sysctl_oid_list *child;
3693 
3694 	dev = sc->vtnet_dev;
3695 	ctx = device_get_sysctl_ctx(dev);
3696 	tree = device_get_sysctl_tree(dev);
3697 	child = SYSCTL_CHILDREN(tree);
3698 
3699 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_vq_pairs",
3700 	    CTLFLAG_RD, &sc->vtnet_max_vq_pairs, 0,
3701 	    "Maximum number of supported virtqueue pairs");
3702 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "act_vq_pairs",
3703 	    CTLFLAG_RD, &sc->vtnet_act_vq_pairs, 0,
3704 	    "Number of active virtqueue pairs");
3705 
3706 	vtnet_setup_stat_sysctl(ctx, child, sc);
3707 }
3708 
3709 static int
3710 vtnet_rxq_enable_intr(struct vtnet_rxq *rxq)
3711 {
3712 
3713 	return (virtqueue_enable_intr(rxq->vtnrx_vq));
3714 }
3715 
3716 static void
3717 vtnet_rxq_disable_intr(struct vtnet_rxq *rxq)
3718 {
3719 
3720 	virtqueue_disable_intr(rxq->vtnrx_vq);
3721 }
3722 
3723 static int
3724 vtnet_txq_enable_intr(struct vtnet_txq *txq)
3725 {
3726 
3727 	return (virtqueue_postpone_intr(txq->vtntx_vq, VQ_POSTPONE_LONG));
3728 }
3729 
3730 static void
3731 vtnet_txq_disable_intr(struct vtnet_txq *txq)
3732 {
3733 
3734 	virtqueue_disable_intr(txq->vtntx_vq);
3735 }
3736 
3737 static void
3738 vtnet_enable_rx_interrupts(struct vtnet_softc *sc)
3739 {
3740 	int i;
3741 
3742 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
3743 		vtnet_rxq_enable_intr(&sc->vtnet_rxqs[i]);
3744 }
3745 
3746 static void
3747 vtnet_enable_tx_interrupts(struct vtnet_softc *sc)
3748 {
3749 	int i;
3750 
3751 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
3752 		vtnet_txq_enable_intr(&sc->vtnet_txqs[i]);
3753 }
3754 
3755 static void
3756 vtnet_enable_interrupts(struct vtnet_softc *sc)
3757 {
3758 
3759 	vtnet_enable_rx_interrupts(sc);
3760 	vtnet_enable_tx_interrupts(sc);
3761 }
3762 
3763 static void
3764 vtnet_disable_rx_interrupts(struct vtnet_softc *sc)
3765 {
3766 	int i;
3767 
3768 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
3769 		vtnet_rxq_disable_intr(&sc->vtnet_rxqs[i]);
3770 }
3771 
3772 static void
3773 vtnet_disable_tx_interrupts(struct vtnet_softc *sc)
3774 {
3775 	int i;
3776 
3777 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
3778 		vtnet_txq_disable_intr(&sc->vtnet_txqs[i]);
3779 }
3780 
3781 static void
3782 vtnet_disable_interrupts(struct vtnet_softc *sc)
3783 {
3784 
3785 	vtnet_disable_rx_interrupts(sc);
3786 	vtnet_disable_tx_interrupts(sc);
3787 }
3788 
3789 static int
3790 vtnet_tunable_int(struct vtnet_softc *sc, const char *knob, int def)
3791 {
3792 	char path[64];
3793 
3794 	snprintf(path, sizeof(path),
3795 	    "hw.vtnet.%d.%s", device_get_unit(sc->vtnet_dev), knob);
3796 	TUNABLE_INT_FETCH(path, &def);
3797 
3798 	return (def);
3799 }
3800