xref: /freebsd/sys/dev/virtio/network/if_vtnet.c (revision 84dfba8d183d31e3412639ecb4b8ad4433cf7e80)
1 /*-
2  * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice unmodified, this list of conditions, and the following
10  *    disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 /* Driver for VirtIO network devices. */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/eventhandler.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/sockio.h>
37 #include <sys/mbuf.h>
38 #include <sys/malloc.h>
39 #include <sys/module.h>
40 #include <sys/socket.h>
41 #include <sys/sysctl.h>
42 #include <sys/random.h>
43 #include <sys/sglist.h>
44 #include <sys/lock.h>
45 #include <sys/mutex.h>
46 #include <sys/taskqueue.h>
47 #include <sys/smp.h>
48 #include <machine/smp.h>
49 
50 #include <vm/uma.h>
51 
52 #include <net/ethernet.h>
53 #include <net/if.h>
54 #include <net/if_var.h>
55 #include <net/if_arp.h>
56 #include <net/if_dl.h>
57 #include <net/if_types.h>
58 #include <net/if_media.h>
59 #include <net/if_vlan_var.h>
60 
61 #include <net/bpf.h>
62 
63 #include <netinet/in_systm.h>
64 #include <netinet/in.h>
65 #include <netinet/ip.h>
66 #include <netinet/ip6.h>
67 #include <netinet6/ip6_var.h>
68 #include <netinet/udp.h>
69 #include <netinet/tcp.h>
70 #include <netinet/sctp.h>
71 
72 #include <machine/bus.h>
73 #include <machine/resource.h>
74 #include <sys/bus.h>
75 #include <sys/rman.h>
76 
77 #include <dev/virtio/virtio.h>
78 #include <dev/virtio/virtqueue.h>
79 #include <dev/virtio/network/virtio_net.h>
80 #include <dev/virtio/network/if_vtnetvar.h>
81 
82 #include "virtio_if.h"
83 
84 #include "opt_inet.h"
85 #include "opt_inet6.h"
86 
87 static int	vtnet_modevent(module_t, int, void *);
88 
89 static int	vtnet_probe(device_t);
90 static int	vtnet_attach(device_t);
91 static int	vtnet_detach(device_t);
92 static int	vtnet_suspend(device_t);
93 static int	vtnet_resume(device_t);
94 static int	vtnet_shutdown(device_t);
95 static int	vtnet_attach_completed(device_t);
96 static int	vtnet_config_change(device_t);
97 
98 static void	vtnet_negotiate_features(struct vtnet_softc *);
99 static void	vtnet_setup_features(struct vtnet_softc *);
100 static int	vtnet_init_rxq(struct vtnet_softc *, int);
101 static int	vtnet_init_txq(struct vtnet_softc *, int);
102 static int	vtnet_alloc_rxtx_queues(struct vtnet_softc *);
103 static void	vtnet_free_rxtx_queues(struct vtnet_softc *);
104 static int	vtnet_alloc_rx_filters(struct vtnet_softc *);
105 static void	vtnet_free_rx_filters(struct vtnet_softc *);
106 static int	vtnet_alloc_virtqueues(struct vtnet_softc *);
107 static int	vtnet_setup_interface(struct vtnet_softc *);
108 static int	vtnet_change_mtu(struct vtnet_softc *, int);
109 static int	vtnet_ioctl(struct ifnet *, u_long, caddr_t);
110 
111 static int	vtnet_rxq_populate(struct vtnet_rxq *);
112 static void	vtnet_rxq_free_mbufs(struct vtnet_rxq *);
113 static struct mbuf *
114 		vtnet_rx_alloc_buf(struct vtnet_softc *, int , struct mbuf **);
115 static int	vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *,
116 		    struct mbuf *, int);
117 static int	vtnet_rxq_replace_buf(struct vtnet_rxq *, struct mbuf *, int);
118 static int	vtnet_rxq_enqueue_buf(struct vtnet_rxq *, struct mbuf *);
119 static int	vtnet_rxq_new_buf(struct vtnet_rxq *);
120 static int	vtnet_rxq_csum(struct vtnet_rxq *, struct mbuf *,
121 		     struct virtio_net_hdr *);
122 static void	vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *, int);
123 static void	vtnet_rxq_discard_buf(struct vtnet_rxq *, struct mbuf *);
124 static int	vtnet_rxq_merged_eof(struct vtnet_rxq *, struct mbuf *, int);
125 static void	vtnet_rxq_input(struct vtnet_rxq *, struct mbuf *,
126 		    struct virtio_net_hdr *);
127 static int	vtnet_rxq_eof(struct vtnet_rxq *);
128 static void	vtnet_rx_vq_intr(void *);
129 static void	vtnet_rxq_tq_intr(void *, int);
130 
131 static void	vtnet_txq_free_mbufs(struct vtnet_txq *);
132 static int	vtnet_txq_offload_ctx(struct vtnet_txq *, struct mbuf *,
133 		    int *, int *, int *);
134 static int	vtnet_txq_offload_tso(struct vtnet_txq *, struct mbuf *, int,
135 		    int, struct virtio_net_hdr *);
136 static struct mbuf *
137 		vtnet_txq_offload(struct vtnet_txq *, struct mbuf *,
138 		    struct virtio_net_hdr *);
139 static int	vtnet_txq_enqueue_buf(struct vtnet_txq *, struct mbuf **,
140 		    struct vtnet_tx_header *);
141 static int	vtnet_txq_encap(struct vtnet_txq *, struct mbuf **);
142 #ifdef VTNET_LEGACY_TX
143 static void	vtnet_start_locked(struct vtnet_txq *, struct ifnet *);
144 static void	vtnet_start(struct ifnet *);
145 #else
146 static int	vtnet_txq_mq_start_locked(struct vtnet_txq *, struct mbuf *);
147 static int	vtnet_txq_mq_start(struct ifnet *, struct mbuf *);
148 static void	vtnet_txq_tq_deferred(void *, int);
149 #endif
150 static void	vtnet_txq_tq_intr(void *, int);
151 static void	vtnet_txq_eof(struct vtnet_txq *);
152 static void	vtnet_tx_vq_intr(void *);
153 static void	vtnet_tx_start_all(struct vtnet_softc *);
154 
155 #ifndef VTNET_LEGACY_TX
156 static void	vtnet_qflush(struct ifnet *);
157 #endif
158 
159 static int	vtnet_watchdog(struct vtnet_txq *);
160 static void	vtnet_rxq_accum_stats(struct vtnet_rxq *,
161 		    struct vtnet_rxq_stats *);
162 static void	vtnet_txq_accum_stats(struct vtnet_txq *,
163 		    struct vtnet_txq_stats *);
164 static void	vtnet_accumulate_stats(struct vtnet_softc *);
165 static void	vtnet_tick(void *);
166 
167 static void	vtnet_start_taskqueues(struct vtnet_softc *);
168 static void	vtnet_free_taskqueues(struct vtnet_softc *);
169 static void	vtnet_drain_taskqueues(struct vtnet_softc *);
170 
171 static void	vtnet_drain_rxtx_queues(struct vtnet_softc *);
172 static void	vtnet_stop_rendezvous(struct vtnet_softc *);
173 static void	vtnet_stop(struct vtnet_softc *);
174 static int	vtnet_virtio_reinit(struct vtnet_softc *);
175 static void	vtnet_init_rx_filters(struct vtnet_softc *);
176 static int	vtnet_init_rx_queues(struct vtnet_softc *);
177 static int	vtnet_init_tx_queues(struct vtnet_softc *);
178 static int	vtnet_init_rxtx_queues(struct vtnet_softc *);
179 static void	vtnet_set_active_vq_pairs(struct vtnet_softc *);
180 static int	vtnet_reinit(struct vtnet_softc *);
181 static void	vtnet_init_locked(struct vtnet_softc *);
182 static void	vtnet_init(void *);
183 
184 static void	vtnet_free_ctrl_vq(struct vtnet_softc *);
185 static void	vtnet_exec_ctrl_cmd(struct vtnet_softc *, void *,
186 		    struct sglist *, int, int);
187 static int	vtnet_ctrl_mac_cmd(struct vtnet_softc *, uint8_t *);
188 static int	vtnet_ctrl_mq_cmd(struct vtnet_softc *, uint16_t);
189 static int	vtnet_ctrl_rx_cmd(struct vtnet_softc *, int, int);
190 static int	vtnet_set_promisc(struct vtnet_softc *, int);
191 static int	vtnet_set_allmulti(struct vtnet_softc *, int);
192 static void	vtnet_attach_disable_promisc(struct vtnet_softc *);
193 static void	vtnet_rx_filter(struct vtnet_softc *);
194 static void	vtnet_rx_filter_mac(struct vtnet_softc *);
195 static int	vtnet_exec_vlan_filter(struct vtnet_softc *, int, uint16_t);
196 static void	vtnet_rx_filter_vlan(struct vtnet_softc *);
197 static void	vtnet_update_vlan_filter(struct vtnet_softc *, int, uint16_t);
198 static void	vtnet_register_vlan(void *, struct ifnet *, uint16_t);
199 static void	vtnet_unregister_vlan(void *, struct ifnet *, uint16_t);
200 
201 static int	vtnet_is_link_up(struct vtnet_softc *);
202 static void	vtnet_update_link_status(struct vtnet_softc *);
203 static int	vtnet_ifmedia_upd(struct ifnet *);
204 static void	vtnet_ifmedia_sts(struct ifnet *, struct ifmediareq *);
205 static void	vtnet_get_hwaddr(struct vtnet_softc *);
206 static void	vtnet_set_hwaddr(struct vtnet_softc *);
207 static void	vtnet_vlan_tag_remove(struct mbuf *);
208 
209 static void	vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *,
210 		    struct sysctl_oid_list *, struct vtnet_rxq *);
211 static void	vtnet_setup_txq_sysctl(struct sysctl_ctx_list *,
212 		    struct sysctl_oid_list *, struct vtnet_txq *);
213 static void	vtnet_setup_queue_sysctl(struct vtnet_softc *);
214 static void	vtnet_setup_sysctl(struct vtnet_softc *);
215 
216 static int	vtnet_rxq_enable_intr(struct vtnet_rxq *);
217 static void	vtnet_rxq_disable_intr(struct vtnet_rxq *);
218 static int	vtnet_txq_enable_intr(struct vtnet_txq *);
219 static void	vtnet_txq_disable_intr(struct vtnet_txq *);
220 static void	vtnet_enable_rx_interrupts(struct vtnet_softc *);
221 static void	vtnet_enable_tx_interrupts(struct vtnet_softc *);
222 static void	vtnet_enable_interrupts(struct vtnet_softc *);
223 static void	vtnet_disable_rx_interrupts(struct vtnet_softc *);
224 static void	vtnet_disable_tx_interrupts(struct vtnet_softc *);
225 static void	vtnet_disable_interrupts(struct vtnet_softc *);
226 
227 static int	vtnet_tunable_int(struct vtnet_softc *, const char *, int);
228 
229 /* Tunables. */
230 static int vtnet_csum_disable = 0;
231 TUNABLE_INT("hw.vtnet.csum_disable", &vtnet_csum_disable);
232 static int vtnet_tso_disable = 0;
233 TUNABLE_INT("hw.vtnet.tso_disable", &vtnet_tso_disable);
234 static int vtnet_lro_disable = 0;
235 TUNABLE_INT("hw.vtnet.lro_disable", &vtnet_lro_disable);
236 static int vtnet_mq_disable = 0;
237 TUNABLE_INT("hw.vtnet.mq_disable", &vtnet_mq_disable);
238 static int vtnet_mq_max_pairs = 0;
239 TUNABLE_INT("hw.vtnet.mq_max_pairs", &vtnet_mq_max_pairs);
240 static int vtnet_rx_process_limit = 512;
241 TUNABLE_INT("hw.vtnet.rx_process_limit", &vtnet_rx_process_limit);
242 
243 /*
244  * Reducing the number of transmit completed interrupts can improve
245  * performance. To do so, the define below keeps the Tx vq interrupt
246  * disabled and adds calls to vtnet_txeof() in the start and watchdog
247  * paths. The price to pay for this is the m_free'ing of transmitted
248  * mbufs may be delayed until the watchdog fires.
249  *
250  * BMV: Reintroduce this later as a run-time option, if it makes
251  * sense after the EVENT_IDX feature is supported.
252  *
253  * #define VTNET_TX_INTR_MODERATION
254  */
255 
256 static uma_zone_t vtnet_tx_header_zone;
257 
258 static struct virtio_feature_desc vtnet_feature_desc[] = {
259 	{ VIRTIO_NET_F_CSUM,		"TxChecksum"	},
260 	{ VIRTIO_NET_F_GUEST_CSUM,	"RxChecksum"	},
261 	{ VIRTIO_NET_F_MAC,		"MacAddress"	},
262 	{ VIRTIO_NET_F_GSO,		"TxAllGSO"	},
263 	{ VIRTIO_NET_F_GUEST_TSO4,	"RxTSOv4"	},
264 	{ VIRTIO_NET_F_GUEST_TSO6,	"RxTSOv6"	},
265 	{ VIRTIO_NET_F_GUEST_ECN,	"RxECN"		},
266 	{ VIRTIO_NET_F_GUEST_UFO,	"RxUFO"		},
267 	{ VIRTIO_NET_F_HOST_TSO4,	"TxTSOv4"	},
268 	{ VIRTIO_NET_F_HOST_TSO6,	"TxTSOv6"	},
269 	{ VIRTIO_NET_F_HOST_ECN,	"TxTSOECN"	},
270 	{ VIRTIO_NET_F_HOST_UFO,	"TxUFO"		},
271 	{ VIRTIO_NET_F_MRG_RXBUF,	"MrgRxBuf"	},
272 	{ VIRTIO_NET_F_STATUS,		"Status"	},
273 	{ VIRTIO_NET_F_CTRL_VQ,		"ControlVq"	},
274 	{ VIRTIO_NET_F_CTRL_RX,		"RxMode"	},
275 	{ VIRTIO_NET_F_CTRL_VLAN,	"VLanFilter"	},
276 	{ VIRTIO_NET_F_CTRL_RX_EXTRA,	"RxModeExtra"	},
277 	{ VIRTIO_NET_F_GUEST_ANNOUNCE,	"GuestAnnounce"	},
278 	{ VIRTIO_NET_F_MQ,		"Multiqueue"	},
279 	{ VIRTIO_NET_F_CTRL_MAC_ADDR,	"SetMacAddress"	},
280 
281 	{ 0, NULL }
282 };
283 
284 static device_method_t vtnet_methods[] = {
285 	/* Device methods. */
286 	DEVMETHOD(device_probe,			vtnet_probe),
287 	DEVMETHOD(device_attach,		vtnet_attach),
288 	DEVMETHOD(device_detach,		vtnet_detach),
289 	DEVMETHOD(device_suspend,		vtnet_suspend),
290 	DEVMETHOD(device_resume,		vtnet_resume),
291 	DEVMETHOD(device_shutdown,		vtnet_shutdown),
292 
293 	/* VirtIO methods. */
294 	DEVMETHOD(virtio_attach_completed,	vtnet_attach_completed),
295 	DEVMETHOD(virtio_config_change,		vtnet_config_change),
296 
297 	DEVMETHOD_END
298 };
299 
300 static driver_t vtnet_driver = {
301 	"vtnet",
302 	vtnet_methods,
303 	sizeof(struct vtnet_softc)
304 };
305 static devclass_t vtnet_devclass;
306 
307 DRIVER_MODULE(vtnet, virtio_pci, vtnet_driver, vtnet_devclass,
308     vtnet_modevent, 0);
309 MODULE_VERSION(vtnet, 1);
310 MODULE_DEPEND(vtnet, virtio, 1, 1, 1);
311 
312 static int
313 vtnet_modevent(module_t mod, int type, void *unused)
314 {
315 	int error;
316 
317 	error = 0;
318 
319 	switch (type) {
320 	case MOD_LOAD:
321 		vtnet_tx_header_zone = uma_zcreate("vtnet_tx_hdr",
322 		    sizeof(struct vtnet_tx_header),
323 		    NULL, NULL, NULL, NULL, 0, 0);
324 		break;
325 	case MOD_QUIESCE:
326 	case MOD_UNLOAD:
327 		if (uma_zone_get_cur(vtnet_tx_header_zone) > 0)
328 			error = EBUSY;
329 		else if (type == MOD_UNLOAD) {
330 			uma_zdestroy(vtnet_tx_header_zone);
331 			vtnet_tx_header_zone = NULL;
332 		}
333 		break;
334 	case MOD_SHUTDOWN:
335 		break;
336 	default:
337 		error = EOPNOTSUPP;
338 		break;
339 	}
340 
341 	return (error);
342 }
343 
344 static int
345 vtnet_probe(device_t dev)
346 {
347 
348 	if (virtio_get_device_type(dev) != VIRTIO_ID_NETWORK)
349 		return (ENXIO);
350 
351 	device_set_desc(dev, "VirtIO Networking Adapter");
352 
353 	return (BUS_PROBE_DEFAULT);
354 }
355 
356 static int
357 vtnet_attach(device_t dev)
358 {
359 	struct vtnet_softc *sc;
360 	int error;
361 
362 	sc = device_get_softc(dev);
363 	sc->vtnet_dev = dev;
364 
365 	/* Register our feature descriptions. */
366 	virtio_set_feature_desc(dev, vtnet_feature_desc);
367 
368 	VTNET_CORE_LOCK_INIT(sc);
369 	callout_init_mtx(&sc->vtnet_tick_ch, VTNET_CORE_MTX(sc), 0);
370 
371 	vtnet_setup_sysctl(sc);
372 	vtnet_setup_features(sc);
373 
374 	error = vtnet_alloc_rx_filters(sc);
375 	if (error) {
376 		device_printf(dev, "cannot allocate Rx filters\n");
377 		goto fail;
378 	}
379 
380 	error = vtnet_alloc_rxtx_queues(sc);
381 	if (error) {
382 		device_printf(dev, "cannot allocate queues\n");
383 		goto fail;
384 	}
385 
386 	error = vtnet_alloc_virtqueues(sc);
387 	if (error) {
388 		device_printf(dev, "cannot allocate virtqueues\n");
389 		goto fail;
390 	}
391 
392 	error = vtnet_setup_interface(sc);
393 	if (error) {
394 		device_printf(dev, "cannot setup interface\n");
395 		goto fail;
396 	}
397 
398 	error = virtio_setup_intr(dev, INTR_TYPE_NET);
399 	if (error) {
400 		device_printf(dev, "cannot setup virtqueue interrupts\n");
401 		/* BMV: This will crash if during boot! */
402 		ether_ifdetach(sc->vtnet_ifp);
403 		goto fail;
404 	}
405 
406 	vtnet_start_taskqueues(sc);
407 
408 fail:
409 	if (error)
410 		vtnet_detach(dev);
411 
412 	return (error);
413 }
414 
415 static int
416 vtnet_detach(device_t dev)
417 {
418 	struct vtnet_softc *sc;
419 	struct ifnet *ifp;
420 
421 	sc = device_get_softc(dev);
422 	ifp = sc->vtnet_ifp;
423 
424 	if (device_is_attached(dev)) {
425 		VTNET_CORE_LOCK(sc);
426 		vtnet_stop(sc);
427 		VTNET_CORE_UNLOCK(sc);
428 
429 		callout_drain(&sc->vtnet_tick_ch);
430 		vtnet_drain_taskqueues(sc);
431 
432 		ether_ifdetach(ifp);
433 	}
434 
435 	vtnet_free_taskqueues(sc);
436 
437 	if (sc->vtnet_vlan_attach != NULL) {
438 		EVENTHANDLER_DEREGISTER(vlan_config, sc->vtnet_vlan_attach);
439 		sc->vtnet_vlan_attach = NULL;
440 	}
441 	if (sc->vtnet_vlan_detach != NULL) {
442 		EVENTHANDLER_DEREGISTER(vlan_unconfg, sc->vtnet_vlan_detach);
443 		sc->vtnet_vlan_detach = NULL;
444 	}
445 
446 	ifmedia_removeall(&sc->vtnet_media);
447 
448 	if (ifp != NULL) {
449 		if_free(ifp);
450 		sc->vtnet_ifp = NULL;
451 	}
452 
453 	vtnet_free_rxtx_queues(sc);
454 	vtnet_free_rx_filters(sc);
455 
456 	if (sc->vtnet_ctrl_vq != NULL)
457 		vtnet_free_ctrl_vq(sc);
458 
459 	VTNET_CORE_LOCK_DESTROY(sc);
460 
461 	return (0);
462 }
463 
464 static int
465 vtnet_suspend(device_t dev)
466 {
467 	struct vtnet_softc *sc;
468 
469 	sc = device_get_softc(dev);
470 
471 	VTNET_CORE_LOCK(sc);
472 	vtnet_stop(sc);
473 	sc->vtnet_flags |= VTNET_FLAG_SUSPENDED;
474 	VTNET_CORE_UNLOCK(sc);
475 
476 	return (0);
477 }
478 
479 static int
480 vtnet_resume(device_t dev)
481 {
482 	struct vtnet_softc *sc;
483 	struct ifnet *ifp;
484 
485 	sc = device_get_softc(dev);
486 	ifp = sc->vtnet_ifp;
487 
488 	VTNET_CORE_LOCK(sc);
489 	if (ifp->if_flags & IFF_UP)
490 		vtnet_init_locked(sc);
491 	sc->vtnet_flags &= ~VTNET_FLAG_SUSPENDED;
492 	VTNET_CORE_UNLOCK(sc);
493 
494 	return (0);
495 }
496 
497 static int
498 vtnet_shutdown(device_t dev)
499 {
500 
501 	/*
502 	 * Suspend already does all of what we need to
503 	 * do here; we just never expect to be resumed.
504 	 */
505 	return (vtnet_suspend(dev));
506 }
507 
508 static int
509 vtnet_attach_completed(device_t dev)
510 {
511 
512 	vtnet_attach_disable_promisc(device_get_softc(dev));
513 
514 	return (0);
515 }
516 
517 static int
518 vtnet_config_change(device_t dev)
519 {
520 	struct vtnet_softc *sc;
521 
522 	sc = device_get_softc(dev);
523 
524 	VTNET_CORE_LOCK(sc);
525 	vtnet_update_link_status(sc);
526 	if (sc->vtnet_link_active != 0)
527 		vtnet_tx_start_all(sc);
528 	VTNET_CORE_UNLOCK(sc);
529 
530 	return (0);
531 }
532 
533 static void
534 vtnet_negotiate_features(struct vtnet_softc *sc)
535 {
536 	device_t dev;
537 	uint64_t mask, features;
538 
539 	dev = sc->vtnet_dev;
540 	mask = 0;
541 
542 	/*
543 	 * TSO and LRO are only available when their corresponding checksum
544 	 * offload feature is also negotiated.
545 	 */
546 	if (vtnet_tunable_int(sc, "csum_disable", vtnet_csum_disable)) {
547 		mask |= VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM;
548 		mask |= VTNET_TSO_FEATURES | VTNET_LRO_FEATURES;
549 	}
550 	if (vtnet_tunable_int(sc, "tso_disable", vtnet_tso_disable))
551 		mask |= VTNET_TSO_FEATURES;
552 	if (vtnet_tunable_int(sc, "lro_disable", vtnet_lro_disable))
553 		mask |= VTNET_LRO_FEATURES;
554 	if (vtnet_tunable_int(sc, "mq_disable", vtnet_mq_disable))
555 		mask |= VIRTIO_NET_F_MQ;
556 #ifdef VTNET_LEGACY_TX
557 	mask |= VIRTIO_NET_F_MQ;
558 #endif
559 
560 	features = VTNET_FEATURES & ~mask;
561 	sc->vtnet_features = virtio_negotiate_features(dev, features);
562 
563 	if (virtio_with_feature(dev, VTNET_LRO_FEATURES) == 0)
564 		return;
565 	if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF))
566 		return;
567 
568 	/*
569 	 * LRO without mergeable buffers requires special care. This is not
570 	 * ideal because every receive buffer must be large enough to hold
571 	 * the maximum TCP packet, the Ethernet header, and the header. This
572 	 * requires up to 34 descriptors with MCLBYTES clusters. If we do
573 	 * not have indirect descriptors, LRO is disabled since the virtqueue
574 	 * will not contain very many receive buffers.
575 	 */
576 	if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC) == 0) {
577 		device_printf(dev,
578 		    "LRO disabled due to both mergeable buffers and indirect "
579 		    "descriptors not negotiated\n");
580 
581 		features &= ~VTNET_LRO_FEATURES;
582 		sc->vtnet_features = virtio_negotiate_features(dev, features);
583 	} else
584 		sc->vtnet_flags |= VTNET_FLAG_LRO_NOMRG;
585 }
586 
587 static void
588 vtnet_setup_features(struct vtnet_softc *sc)
589 {
590 	device_t dev;
591 	int max_pairs, max;
592 
593 	dev = sc->vtnet_dev;
594 
595 	vtnet_negotiate_features(sc);
596 
597 	if (virtio_with_feature(dev, VIRTIO_RING_F_EVENT_IDX))
598 		sc->vtnet_flags |= VTNET_FLAG_EVENT_IDX;
599 
600 	if (virtio_with_feature(dev, VIRTIO_NET_F_MAC)) {
601 		/* This feature should always be negotiated. */
602 		sc->vtnet_flags |= VTNET_FLAG_MAC;
603 	}
604 
605 	if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) {
606 		sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS;
607 		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
608 	} else
609 		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
610 
611 	if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VQ)) {
612 		sc->vtnet_flags |= VTNET_FLAG_CTRL_VQ;
613 
614 		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX))
615 			sc->vtnet_flags |= VTNET_FLAG_CTRL_RX;
616 		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VLAN))
617 			sc->vtnet_flags |= VTNET_FLAG_VLAN_FILTER;
618 		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_MAC_ADDR))
619 			sc->vtnet_flags |= VTNET_FLAG_CTRL_MAC;
620 	}
621 
622 	if (virtio_with_feature(dev, VIRTIO_NET_F_MQ) &&
623 	    sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
624 		max_pairs = virtio_read_dev_config_2(dev,
625 		    offsetof(struct virtio_net_config, max_virtqueue_pairs));
626 		if (max_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
627 		    max_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX)
628 			max_pairs = 1;
629 	} else
630 		max_pairs = 1;
631 
632 	if (max_pairs > 1) {
633 		/*
634 		 * Limit the maximum number of queue pairs to the number of
635 		 * CPUs or the configured maximum. The actual number of
636 		 * queues that get used may be less.
637 		 */
638 		max = vtnet_tunable_int(sc, "mq_max_pairs", vtnet_mq_max_pairs);
639 		if (max > 0 && max_pairs > max)
640 			max_pairs = max;
641 		if (max_pairs > mp_ncpus)
642 			max_pairs = mp_ncpus;
643 		if (max_pairs > VTNET_MAX_QUEUE_PAIRS)
644 			max_pairs = VTNET_MAX_QUEUE_PAIRS;
645 		if (max_pairs > 1)
646 			sc->vtnet_flags |= VTNET_FLAG_MULTIQ;
647 	}
648 
649 	sc->vtnet_max_vq_pairs = max_pairs;
650 }
651 
652 static int
653 vtnet_init_rxq(struct vtnet_softc *sc, int id)
654 {
655 	struct vtnet_rxq *rxq;
656 
657 	rxq = &sc->vtnet_rxqs[id];
658 
659 	snprintf(rxq->vtnrx_name, sizeof(rxq->vtnrx_name), "%s-rx%d",
660 	    device_get_nameunit(sc->vtnet_dev), id);
661 	mtx_init(&rxq->vtnrx_mtx, rxq->vtnrx_name, NULL, MTX_DEF);
662 
663 	rxq->vtnrx_sc = sc;
664 	rxq->vtnrx_id = id;
665 
666 	TASK_INIT(&rxq->vtnrx_intrtask, 0, vtnet_rxq_tq_intr, rxq);
667 	rxq->vtnrx_tq = taskqueue_create(rxq->vtnrx_name, M_NOWAIT,
668 	    taskqueue_thread_enqueue, &rxq->vtnrx_tq);
669 
670 	return (rxq->vtnrx_tq == NULL ? ENOMEM : 0);
671 }
672 
673 static int
674 vtnet_init_txq(struct vtnet_softc *sc, int id)
675 {
676 	struct vtnet_txq *txq;
677 
678 	txq = &sc->vtnet_txqs[id];
679 
680 	snprintf(txq->vtntx_name, sizeof(txq->vtntx_name), "%s-tx%d",
681 	    device_get_nameunit(sc->vtnet_dev), id);
682 	mtx_init(&txq->vtntx_mtx, txq->vtntx_name, NULL, MTX_DEF);
683 
684 	txq->vtntx_sc = sc;
685 	txq->vtntx_id = id;
686 
687 #ifndef VTNET_LEGACY_TX
688 	txq->vtntx_br = buf_ring_alloc(VTNET_DEFAULT_BUFRING_SIZE, M_DEVBUF,
689 	    M_NOWAIT, &txq->vtntx_mtx);
690 	if (txq->vtntx_br == NULL)
691 		return (ENOMEM);
692 
693 	TASK_INIT(&txq->vtntx_defrtask, 0, vtnet_txq_tq_deferred, txq);
694 #endif
695 	TASK_INIT(&txq->vtntx_intrtask, 0, vtnet_txq_tq_intr, txq);
696 	txq->vtntx_tq = taskqueue_create(txq->vtntx_name, M_NOWAIT,
697 	    taskqueue_thread_enqueue, &txq->vtntx_tq);
698 	if (txq->vtntx_tq == NULL)
699 		return (ENOMEM);
700 
701 	return (0);
702 }
703 
704 static int
705 vtnet_alloc_rxtx_queues(struct vtnet_softc *sc)
706 {
707 	int i, npairs, error;
708 
709 	npairs = sc->vtnet_max_vq_pairs;
710 
711 	sc->vtnet_rxqs = malloc(sizeof(struct vtnet_rxq) * npairs, M_DEVBUF,
712 	    M_NOWAIT | M_ZERO);
713 	sc->vtnet_txqs = malloc(sizeof(struct vtnet_txq) * npairs, M_DEVBUF,
714 	    M_NOWAIT | M_ZERO);
715 	if (sc->vtnet_rxqs == NULL || sc->vtnet_txqs == NULL)
716 		return (ENOMEM);
717 
718 	for (i = 0; i < npairs; i++) {
719 		error = vtnet_init_rxq(sc, i);
720 		if (error)
721 			return (error);
722 		error = vtnet_init_txq(sc, i);
723 		if (error)
724 			return (error);
725 	}
726 
727 	vtnet_setup_queue_sysctl(sc);
728 
729 	return (0);
730 }
731 
732 static void
733 vtnet_destroy_rxq(struct vtnet_rxq *rxq)
734 {
735 
736 	rxq->vtnrx_sc = NULL;
737 	rxq->vtnrx_id = -1;
738 
739 	if (mtx_initialized(&rxq->vtnrx_mtx) != 0)
740 		mtx_destroy(&rxq->vtnrx_mtx);
741 }
742 
743 static void
744 vtnet_destroy_txq(struct vtnet_txq *txq)
745 {
746 
747 	txq->vtntx_sc = NULL;
748 	txq->vtntx_id = -1;
749 
750 #ifndef VTNET_LEGACY_TX
751 	if (txq->vtntx_br != NULL) {
752 		buf_ring_free(txq->vtntx_br, M_DEVBUF);
753 		txq->vtntx_br = NULL;
754 	}
755 #endif
756 
757 	if (mtx_initialized(&txq->vtntx_mtx) != 0)
758 		mtx_destroy(&txq->vtntx_mtx);
759 }
760 
761 static void
762 vtnet_free_rxtx_queues(struct vtnet_softc *sc)
763 {
764 	int i;
765 
766 	if (sc->vtnet_rxqs != NULL) {
767 		for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
768 			vtnet_destroy_rxq(&sc->vtnet_rxqs[i]);
769 		free(sc->vtnet_rxqs, M_DEVBUF);
770 		sc->vtnet_rxqs = NULL;
771 	}
772 
773 	if (sc->vtnet_txqs != NULL) {
774 		for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
775 			vtnet_destroy_txq(&sc->vtnet_txqs[i]);
776 		free(sc->vtnet_txqs, M_DEVBUF);
777 		sc->vtnet_txqs = NULL;
778 	}
779 }
780 
781 static int
782 vtnet_alloc_rx_filters(struct vtnet_softc *sc)
783 {
784 
785 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
786 		sc->vtnet_mac_filter = malloc(sizeof(struct vtnet_mac_filter),
787 		    M_DEVBUF, M_NOWAIT | M_ZERO);
788 		if (sc->vtnet_mac_filter == NULL)
789 			return (ENOMEM);
790 	}
791 
792 	if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
793 		sc->vtnet_vlan_filter = malloc(sizeof(uint32_t) *
794 		    VTNET_VLAN_FILTER_NWORDS, M_DEVBUF, M_NOWAIT | M_ZERO);
795 		if (sc->vtnet_vlan_filter == NULL)
796 			return (ENOMEM);
797 	}
798 
799 	return (0);
800 }
801 
802 static void
803 vtnet_free_rx_filters(struct vtnet_softc *sc)
804 {
805 
806 	if (sc->vtnet_mac_filter != NULL) {
807 		free(sc->vtnet_mac_filter, M_DEVBUF);
808 		sc->vtnet_mac_filter = NULL;
809 	}
810 
811 	if (sc->vtnet_vlan_filter != NULL) {
812 		free(sc->vtnet_vlan_filter, M_DEVBUF);
813 		sc->vtnet_vlan_filter = NULL;
814 	}
815 }
816 
817 static int
818 vtnet_alloc_virtqueues(struct vtnet_softc *sc)
819 {
820 	device_t dev;
821 	struct vq_alloc_info *info;
822 	struct vtnet_rxq *rxq;
823 	struct vtnet_txq *txq;
824 	int i, idx, flags, nvqs, rxsegs, error;
825 
826 	dev = sc->vtnet_dev;
827 	flags = 0;
828 
829 	/*
830 	 * Indirect descriptors are not needed for the Rx virtqueue when
831 	 * mergeable buffers are negotiated. The header is placed inline
832 	 * with the data, not in a separate descriptor, and mbuf clusters
833 	 * are always physically contiguous.
834 	 */
835 	if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS)
836 		rxsegs = 0;
837 	else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
838 		rxsegs = VTNET_MAX_RX_SEGS;
839 	else
840 		rxsegs = VTNET_MIN_RX_SEGS;
841 
842 	nvqs = sc->vtnet_max_vq_pairs * 2;
843 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ)
844 		nvqs++;
845 
846 	info = malloc(sizeof(struct vq_alloc_info) * nvqs , M_TEMP, M_NOWAIT);
847 	if (info == NULL)
848 		return (ENOMEM);
849 
850 	for (i = 0, idx = 0; i < sc->vtnet_max_vq_pairs; i++, idx+=2) {
851 		rxq = &sc->vtnet_rxqs[i];
852 		VQ_ALLOC_INFO_INIT(&info[idx], rxsegs,
853 		    vtnet_rx_vq_intr, rxq, &rxq->vtnrx_vq,
854 		    "%s-%d rx", device_get_nameunit(dev), rxq->vtnrx_id);
855 
856 		txq = &sc->vtnet_txqs[i];
857 		VQ_ALLOC_INFO_INIT(&info[idx+1], VTNET_MAX_TX_SEGS,
858 		    vtnet_tx_vq_intr, txq, &txq->vtntx_vq,
859 		    "%s-%d tx", device_get_nameunit(dev), txq->vtntx_id);
860 	}
861 
862 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
863 		VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, NULL,
864 		    &sc->vtnet_ctrl_vq, "%s ctrl", device_get_nameunit(dev));
865 	}
866 
867 	/*
868 	 * Enable interrupt binding if this is multiqueue. This only matters
869 	 * when per-vq MSIX is available.
870 	 */
871 	if (sc->vtnet_flags & VTNET_FLAG_MULTIQ)
872 		flags |= 0;
873 
874 	error = virtio_alloc_virtqueues(dev, flags, nvqs, info);
875 	free(info, M_TEMP);
876 
877 	return (error);
878 }
879 
880 static int
881 vtnet_setup_interface(struct vtnet_softc *sc)
882 {
883 	device_t dev;
884 	struct ifnet *ifp;
885 	int limit;
886 
887 	dev = sc->vtnet_dev;
888 
889 	ifp = sc->vtnet_ifp = if_alloc(IFT_ETHER);
890 	if (ifp == NULL) {
891 		device_printf(dev, "cannot allocate ifnet structure\n");
892 		return (ENOSPC);
893 	}
894 
895 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
896 	if_initbaudrate(ifp, IF_Gbps(10));	/* Approx. */
897 	ifp->if_softc = sc;
898 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
899 	ifp->if_init = vtnet_init;
900 	ifp->if_ioctl = vtnet_ioctl;
901 
902 #ifndef VTNET_LEGACY_TX
903 	ifp->if_transmit = vtnet_txq_mq_start;
904 	ifp->if_qflush = vtnet_qflush;
905 #else
906 	struct virtqueue *vq = sc->vtnet_txqs[0].vtntx_vq;
907 	ifp->if_start = vtnet_start;
908 	IFQ_SET_MAXLEN(&ifp->if_snd, virtqueue_size(vq) - 1);
909 	ifp->if_snd.ifq_drv_maxlen = virtqueue_size(vq) - 1;
910 	IFQ_SET_READY(&ifp->if_snd);
911 #endif
912 
913 	ifmedia_init(&sc->vtnet_media, IFM_IMASK, vtnet_ifmedia_upd,
914 	    vtnet_ifmedia_sts);
915 	ifmedia_add(&sc->vtnet_media, VTNET_MEDIATYPE, 0, NULL);
916 	ifmedia_set(&sc->vtnet_media, VTNET_MEDIATYPE);
917 
918 	/* Read (or generate) the MAC address for the adapter. */
919 	vtnet_get_hwaddr(sc);
920 
921 	ether_ifattach(ifp, sc->vtnet_hwaddr);
922 
923 	if (virtio_with_feature(dev, VIRTIO_NET_F_STATUS))
924 		ifp->if_capabilities |= IFCAP_LINKSTATE;
925 
926 	/* Tell the upper layer(s) we support long frames. */
927 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
928 	ifp->if_capabilities |= IFCAP_JUMBO_MTU | IFCAP_VLAN_MTU;
929 
930 	if (virtio_with_feature(dev, VIRTIO_NET_F_CSUM)) {
931 		ifp->if_capabilities |= IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6;
932 
933 		if (virtio_with_feature(dev, VIRTIO_NET_F_GSO)) {
934 			ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_TSO6;
935 			sc->vtnet_flags |= VTNET_FLAG_TSO_ECN;
936 		} else {
937 			if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4))
938 				ifp->if_capabilities |= IFCAP_TSO4;
939 			if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
940 				ifp->if_capabilities |= IFCAP_TSO6;
941 			if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN))
942 				sc->vtnet_flags |= VTNET_FLAG_TSO_ECN;
943 		}
944 
945 		if (ifp->if_capabilities & IFCAP_TSO)
946 			ifp->if_capabilities |= IFCAP_VLAN_HWTSO;
947 	}
948 
949 	if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM))
950 		ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6;
951 
952 	if (ifp->if_capabilities & IFCAP_HWCSUM) {
953 		/*
954 		 * VirtIO does not support VLAN tagging, but we can fake
955 		 * it by inserting and removing the 802.1Q header during
956 		 * transmit and receive. We are then able to do checksum
957 		 * offloading of VLAN frames.
958 		 */
959 		ifp->if_capabilities |=
960 		    IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
961 	}
962 
963 	ifp->if_capenable = ifp->if_capabilities;
964 
965 	/*
966 	 * Capabilities after here are not enabled by default.
967 	 */
968 
969 	if (ifp->if_capabilities & IFCAP_RXCSUM) {
970 		if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) ||
971 		    virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6))
972 			ifp->if_capabilities |= IFCAP_LRO;
973 	}
974 
975 	if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
976 		ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
977 
978 		sc->vtnet_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
979 		    vtnet_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
980 		sc->vtnet_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
981 		    vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
982 	}
983 
984 	limit = vtnet_tunable_int(sc, "rx_process_limit",
985 	    vtnet_rx_process_limit);
986 	if (limit < 0)
987 		limit = INT_MAX;
988 	sc->vtnet_rx_process_limit = limit;
989 
990 	return (0);
991 }
992 
993 static int
994 vtnet_change_mtu(struct vtnet_softc *sc, int new_mtu)
995 {
996 	struct ifnet *ifp;
997 	int frame_size, clsize;
998 
999 	ifp = sc->vtnet_ifp;
1000 
1001 	if (new_mtu < ETHERMIN || new_mtu > VTNET_MAX_MTU)
1002 		return (EINVAL);
1003 
1004 	frame_size = sc->vtnet_hdr_size + sizeof(struct ether_vlan_header) +
1005 	    new_mtu;
1006 
1007 	/*
1008 	 * Based on the new MTU (and hence frame size) determine which
1009 	 * cluster size is most appropriate for the receive queues.
1010 	 */
1011 	if (frame_size <= MCLBYTES) {
1012 		clsize = MCLBYTES;
1013 	} else if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
1014 		/* Avoid going past 9K jumbos. */
1015 		if (frame_size > MJUM9BYTES)
1016 			return (EINVAL);
1017 		clsize = MJUM9BYTES;
1018 	} else
1019 		clsize = MJUMPAGESIZE;
1020 
1021 	ifp->if_mtu = new_mtu;
1022 	sc->vtnet_rx_new_clsize = clsize;
1023 
1024 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1025 		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1026 		vtnet_init_locked(sc);
1027 	}
1028 
1029 	return (0);
1030 }
1031 
1032 static int
1033 vtnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1034 {
1035 	struct vtnet_softc *sc;
1036 	struct ifreq *ifr;
1037 	int reinit, mask, error;
1038 
1039 	sc = ifp->if_softc;
1040 	ifr = (struct ifreq *) data;
1041 	error = 0;
1042 
1043 	switch (cmd) {
1044 	case SIOCSIFMTU:
1045 		if (ifp->if_mtu != ifr->ifr_mtu) {
1046 			VTNET_CORE_LOCK(sc);
1047 			error = vtnet_change_mtu(sc, ifr->ifr_mtu);
1048 			VTNET_CORE_UNLOCK(sc);
1049 		}
1050 		break;
1051 
1052 	case SIOCSIFFLAGS:
1053 		VTNET_CORE_LOCK(sc);
1054 		if ((ifp->if_flags & IFF_UP) == 0) {
1055 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1056 				vtnet_stop(sc);
1057 		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1058 			if ((ifp->if_flags ^ sc->vtnet_if_flags) &
1059 			    (IFF_PROMISC | IFF_ALLMULTI)) {
1060 				if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX)
1061 					vtnet_rx_filter(sc);
1062 				else
1063 					error = ENOTSUP;
1064 			}
1065 		} else
1066 			vtnet_init_locked(sc);
1067 
1068 		if (error == 0)
1069 			sc->vtnet_if_flags = ifp->if_flags;
1070 		VTNET_CORE_UNLOCK(sc);
1071 		break;
1072 
1073 	case SIOCADDMULTI:
1074 	case SIOCDELMULTI:
1075 		if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0)
1076 			break;
1077 		VTNET_CORE_LOCK(sc);
1078 		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1079 			vtnet_rx_filter_mac(sc);
1080 		VTNET_CORE_UNLOCK(sc);
1081 		break;
1082 
1083 	case SIOCSIFMEDIA:
1084 	case SIOCGIFMEDIA:
1085 		error = ifmedia_ioctl(ifp, ifr, &sc->vtnet_media, cmd);
1086 		break;
1087 
1088 	case SIOCSIFCAP:
1089 		VTNET_CORE_LOCK(sc);
1090 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1091 
1092 		if (mask & IFCAP_TXCSUM)
1093 			ifp->if_capenable ^= IFCAP_TXCSUM;
1094 		if (mask & IFCAP_TXCSUM_IPV6)
1095 			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
1096 		if (mask & IFCAP_TSO4)
1097 			ifp->if_capenable ^= IFCAP_TSO4;
1098 		if (mask & IFCAP_TSO6)
1099 			ifp->if_capenable ^= IFCAP_TSO6;
1100 
1101 		if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO |
1102 		    IFCAP_VLAN_HWFILTER)) {
1103 			/* These Rx features require us to renegotiate. */
1104 			reinit = 1;
1105 
1106 			if (mask & IFCAP_RXCSUM)
1107 				ifp->if_capenable ^= IFCAP_RXCSUM;
1108 			if (mask & IFCAP_RXCSUM_IPV6)
1109 				ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
1110 			if (mask & IFCAP_LRO)
1111 				ifp->if_capenable ^= IFCAP_LRO;
1112 			if (mask & IFCAP_VLAN_HWFILTER)
1113 				ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1114 		} else
1115 			reinit = 0;
1116 
1117 		if (mask & IFCAP_VLAN_HWTSO)
1118 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1119 		if (mask & IFCAP_VLAN_HWTAGGING)
1120 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1121 
1122 		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1123 			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1124 			vtnet_init_locked(sc);
1125 		}
1126 
1127 		VTNET_CORE_UNLOCK(sc);
1128 		VLAN_CAPABILITIES(ifp);
1129 
1130 		break;
1131 
1132 	default:
1133 		error = ether_ioctl(ifp, cmd, data);
1134 		break;
1135 	}
1136 
1137 	VTNET_CORE_LOCK_ASSERT_NOTOWNED(sc);
1138 
1139 	return (error);
1140 }
1141 
1142 static int
1143 vtnet_rxq_populate(struct vtnet_rxq *rxq)
1144 {
1145 	struct virtqueue *vq;
1146 	int nbufs, error;
1147 
1148 	vq = rxq->vtnrx_vq;
1149 	error = ENOSPC;
1150 
1151 	for (nbufs = 0; !virtqueue_full(vq); nbufs++) {
1152 		error = vtnet_rxq_new_buf(rxq);
1153 		if (error)
1154 			break;
1155 	}
1156 
1157 	if (nbufs > 0) {
1158 		virtqueue_notify(vq);
1159 		/*
1160 		 * EMSGSIZE signifies the virtqueue did not have enough
1161 		 * entries available to hold the last mbuf. This is not
1162 		 * an error.
1163 		 */
1164 		if (error == EMSGSIZE)
1165 			error = 0;
1166 	}
1167 
1168 	return (error);
1169 }
1170 
1171 static void
1172 vtnet_rxq_free_mbufs(struct vtnet_rxq *rxq)
1173 {
1174 	struct virtqueue *vq;
1175 	struct mbuf *m;
1176 	int last;
1177 
1178 	vq = rxq->vtnrx_vq;
1179 	last = 0;
1180 
1181 	while ((m = virtqueue_drain(vq, &last)) != NULL)
1182 		m_freem(m);
1183 
1184 	KASSERT(virtqueue_empty(vq),
1185 	    ("%s: mbufs remaining in rx queue %p", __func__, rxq));
1186 }
1187 
1188 static struct mbuf *
1189 vtnet_rx_alloc_buf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp)
1190 {
1191 	struct mbuf *m_head, *m_tail, *m;
1192 	int i, clsize;
1193 
1194 	clsize = sc->vtnet_rx_clsize;
1195 
1196 	KASSERT(nbufs == 1 || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
1197 	    ("%s: chained mbuf %d request without LRO_NOMRG", __func__, nbufs));
1198 
1199 	m_head = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, clsize);
1200 	if (m_head == NULL)
1201 		goto fail;
1202 
1203 	m_head->m_len = clsize;
1204 	m_tail = m_head;
1205 
1206 	/* Allocate the rest of the chain. */
1207 	for (i = 1; i < nbufs; i++) {
1208 		m = m_getjcl(M_NOWAIT, MT_DATA, 0, clsize);
1209 		if (m == NULL)
1210 			goto fail;
1211 
1212 		m->m_len = clsize;
1213 		m_tail->m_next = m;
1214 		m_tail = m;
1215 	}
1216 
1217 	if (m_tailp != NULL)
1218 		*m_tailp = m_tail;
1219 
1220 	return (m_head);
1221 
1222 fail:
1223 	sc->vtnet_stats.mbuf_alloc_failed++;
1224 	m_freem(m_head);
1225 
1226 	return (NULL);
1227 }
1228 
1229 /*
1230  * Slow path for when LRO without mergeable buffers is negotiated.
1231  */
1232 static int
1233 vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *rxq, struct mbuf *m0,
1234     int len0)
1235 {
1236 	struct vtnet_softc *sc;
1237 	struct mbuf *m, *m_prev;
1238 	struct mbuf *m_new, *m_tail;
1239 	int len, clsize, nreplace, error;
1240 
1241 	sc = rxq->vtnrx_sc;
1242 	clsize = sc->vtnet_rx_clsize;
1243 
1244 	m_prev = NULL;
1245 	m_tail = NULL;
1246 	nreplace = 0;
1247 
1248 	m = m0;
1249 	len = len0;
1250 
1251 	/*
1252 	 * Since these mbuf chains are so large, we avoid allocating an
1253 	 * entire replacement chain if possible. When the received frame
1254 	 * did not consume the entire chain, the unused mbufs are moved
1255 	 * to the replacement chain.
1256 	 */
1257 	while (len > 0) {
1258 		/*
1259 		 * Something is seriously wrong if we received a frame
1260 		 * larger than the chain. Drop it.
1261 		 */
1262 		if (m == NULL) {
1263 			sc->vtnet_stats.rx_frame_too_large++;
1264 			return (EMSGSIZE);
1265 		}
1266 
1267 		/* We always allocate the same cluster size. */
1268 		KASSERT(m->m_len == clsize,
1269 		    ("%s: mbuf size %d is not the cluster size %d",
1270 		    __func__, m->m_len, clsize));
1271 
1272 		m->m_len = MIN(m->m_len, len);
1273 		len -= m->m_len;
1274 
1275 		m_prev = m;
1276 		m = m->m_next;
1277 		nreplace++;
1278 	}
1279 
1280 	KASSERT(nreplace <= sc->vtnet_rx_nmbufs,
1281 	    ("%s: too many replacement mbufs %d max %d", __func__, nreplace,
1282 	    sc->vtnet_rx_nmbufs));
1283 
1284 	m_new = vtnet_rx_alloc_buf(sc, nreplace, &m_tail);
1285 	if (m_new == NULL) {
1286 		m_prev->m_len = clsize;
1287 		return (ENOBUFS);
1288 	}
1289 
1290 	/*
1291 	 * Move any unused mbufs from the received chain onto the end
1292 	 * of the new chain.
1293 	 */
1294 	if (m_prev->m_next != NULL) {
1295 		m_tail->m_next = m_prev->m_next;
1296 		m_prev->m_next = NULL;
1297 	}
1298 
1299 	error = vtnet_rxq_enqueue_buf(rxq, m_new);
1300 	if (error) {
1301 		/*
1302 		 * BAD! We could not enqueue the replacement mbuf chain. We
1303 		 * must restore the m0 chain to the original state if it was
1304 		 * modified so we can subsequently discard it.
1305 		 *
1306 		 * NOTE: The replacement is suppose to be an identical copy
1307 		 * to the one just dequeued so this is an unexpected error.
1308 		 */
1309 		sc->vtnet_stats.rx_enq_replacement_failed++;
1310 
1311 		if (m_tail->m_next != NULL) {
1312 			m_prev->m_next = m_tail->m_next;
1313 			m_tail->m_next = NULL;
1314 		}
1315 
1316 		m_prev->m_len = clsize;
1317 		m_freem(m_new);
1318 	}
1319 
1320 	return (error);
1321 }
1322 
1323 static int
1324 vtnet_rxq_replace_buf(struct vtnet_rxq *rxq, struct mbuf *m, int len)
1325 {
1326 	struct vtnet_softc *sc;
1327 	struct mbuf *m_new;
1328 	int error;
1329 
1330 	sc = rxq->vtnrx_sc;
1331 
1332 	KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL,
1333 	    ("%s: chained mbuf without LRO_NOMRG", __func__));
1334 
1335 	if (m->m_next == NULL) {
1336 		/* Fast-path for the common case of just one mbuf. */
1337 		if (m->m_len < len)
1338 			return (EINVAL);
1339 
1340 		m_new = vtnet_rx_alloc_buf(sc, 1, NULL);
1341 		if (m_new == NULL)
1342 			return (ENOBUFS);
1343 
1344 		error = vtnet_rxq_enqueue_buf(rxq, m_new);
1345 		if (error) {
1346 			/*
1347 			 * The new mbuf is suppose to be an identical
1348 			 * copy of the one just dequeued so this is an
1349 			 * unexpected error.
1350 			 */
1351 			m_freem(m_new);
1352 			sc->vtnet_stats.rx_enq_replacement_failed++;
1353 		} else
1354 			m->m_len = len;
1355 	} else
1356 		error = vtnet_rxq_replace_lro_nomgr_buf(rxq, m, len);
1357 
1358 	return (error);
1359 }
1360 
1361 static int
1362 vtnet_rxq_enqueue_buf(struct vtnet_rxq *rxq, struct mbuf *m)
1363 {
1364 	struct sglist sg;
1365 	struct sglist_seg segs[VTNET_MAX_RX_SEGS];
1366 	struct vtnet_softc *sc;
1367 	struct vtnet_rx_header *rxhdr;
1368 	uint8_t *mdata;
1369 	int offset, error;
1370 
1371 	sc = rxq->vtnrx_sc;
1372 	mdata = mtod(m, uint8_t *);
1373 
1374 	VTNET_RXQ_LOCK_ASSERT(rxq);
1375 	KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL,
1376 	    ("%s: chained mbuf without LRO_NOMRG", __func__));
1377 	KASSERT(m->m_len == sc->vtnet_rx_clsize,
1378 	    ("%s: unexpected cluster size %d/%d", __func__, m->m_len,
1379 	     sc->vtnet_rx_clsize));
1380 
1381 	sglist_init(&sg, VTNET_MAX_RX_SEGS, segs);
1382 	if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
1383 		MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr));
1384 		rxhdr = (struct vtnet_rx_header *) mdata;
1385 		sglist_append(&sg, &rxhdr->vrh_hdr, sc->vtnet_hdr_size);
1386 		offset = sizeof(struct vtnet_rx_header);
1387 	} else
1388 		offset = 0;
1389 
1390 	sglist_append(&sg, mdata + offset, m->m_len - offset);
1391 	if (m->m_next != NULL) {
1392 		error = sglist_append_mbuf(&sg, m->m_next);
1393 		MPASS(error == 0);
1394 	}
1395 
1396 	error = virtqueue_enqueue(rxq->vtnrx_vq, m, &sg, 0, sg.sg_nseg);
1397 
1398 	return (error);
1399 }
1400 
1401 static int
1402 vtnet_rxq_new_buf(struct vtnet_rxq *rxq)
1403 {
1404 	struct vtnet_softc *sc;
1405 	struct mbuf *m;
1406 	int error;
1407 
1408 	sc = rxq->vtnrx_sc;
1409 
1410 	m = vtnet_rx_alloc_buf(sc, sc->vtnet_rx_nmbufs, NULL);
1411 	if (m == NULL)
1412 		return (ENOBUFS);
1413 
1414 	error = vtnet_rxq_enqueue_buf(rxq, m);
1415 	if (error)
1416 		m_freem(m);
1417 
1418 	return (error);
1419 }
1420 
1421 /*
1422  * Use the checksum offset in the VirtIO header to set the
1423  * correct CSUM_* flags.
1424  */
1425 static int
1426 vtnet_rxq_csum_by_offset(struct vtnet_rxq *rxq, struct mbuf *m,
1427     uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr)
1428 {
1429 	struct vtnet_softc *sc;
1430 #if defined(INET) || defined(INET6)
1431 	int offset = hdr->csum_start + hdr->csum_offset;
1432 #endif
1433 
1434 	sc = rxq->vtnrx_sc;
1435 
1436 	/* Only do a basic sanity check on the offset. */
1437 	switch (eth_type) {
1438 #if defined(INET)
1439 	case ETHERTYPE_IP:
1440 		if (__predict_false(offset < ip_start + sizeof(struct ip)))
1441 			return (1);
1442 		break;
1443 #endif
1444 #if defined(INET6)
1445 	case ETHERTYPE_IPV6:
1446 		if (__predict_false(offset < ip_start + sizeof(struct ip6_hdr)))
1447 			return (1);
1448 		break;
1449 #endif
1450 	default:
1451 		sc->vtnet_stats.rx_csum_bad_ethtype++;
1452 		return (1);
1453 	}
1454 
1455 	/*
1456 	 * Use the offset to determine the appropriate CSUM_* flags. This is
1457 	 * a bit dirty, but we can get by with it since the checksum offsets
1458 	 * happen to be different. We assume the host host does not do IPv4
1459 	 * header checksum offloading.
1460 	 */
1461 	switch (hdr->csum_offset) {
1462 	case offsetof(struct udphdr, uh_sum):
1463 	case offsetof(struct tcphdr, th_sum):
1464 		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1465 		m->m_pkthdr.csum_data = 0xFFFF;
1466 		break;
1467 	case offsetof(struct sctphdr, checksum):
1468 		m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
1469 		break;
1470 	default:
1471 		sc->vtnet_stats.rx_csum_bad_offset++;
1472 		return (1);
1473 	}
1474 
1475 	return (0);
1476 }
1477 
1478 static int
1479 vtnet_rxq_csum_by_parse(struct vtnet_rxq *rxq, struct mbuf *m,
1480     uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr)
1481 {
1482 	struct vtnet_softc *sc;
1483 	int offset, proto;
1484 
1485 	sc = rxq->vtnrx_sc;
1486 
1487 	switch (eth_type) {
1488 #if defined(INET)
1489 	case ETHERTYPE_IP: {
1490 		struct ip *ip;
1491 		if (__predict_false(m->m_len < ip_start + sizeof(struct ip)))
1492 			return (1);
1493 		ip = (struct ip *)(m->m_data + ip_start);
1494 		proto = ip->ip_p;
1495 		offset = ip_start + (ip->ip_hl << 2);
1496 		break;
1497 	}
1498 #endif
1499 #if defined(INET6)
1500 	case ETHERTYPE_IPV6:
1501 		if (__predict_false(m->m_len < ip_start +
1502 		    sizeof(struct ip6_hdr)))
1503 			return (1);
1504 		offset = ip6_lasthdr(m, ip_start, IPPROTO_IPV6, &proto);
1505 		if (__predict_false(offset < 0))
1506 			return (1);
1507 		break;
1508 #endif
1509 	default:
1510 		sc->vtnet_stats.rx_csum_bad_ethtype++;
1511 		return (1);
1512 	}
1513 
1514 	switch (proto) {
1515 	case IPPROTO_TCP:
1516 		if (__predict_false(m->m_len < offset + sizeof(struct tcphdr)))
1517 			return (1);
1518 		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1519 		m->m_pkthdr.csum_data = 0xFFFF;
1520 		break;
1521 	case IPPROTO_UDP:
1522 		if (__predict_false(m->m_len < offset + sizeof(struct udphdr)))
1523 			return (1);
1524 		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1525 		m->m_pkthdr.csum_data = 0xFFFF;
1526 		break;
1527 	case IPPROTO_SCTP:
1528 		if (__predict_false(m->m_len < offset + sizeof(struct sctphdr)))
1529 			return (1);
1530 		m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
1531 		break;
1532 	default:
1533 		/*
1534 		 * For the remaining protocols, FreeBSD does not support
1535 		 * checksum offloading, so the checksum will be recomputed.
1536 		 */
1537 #if 0
1538 		if_printf(sc->vtnet_ifp, "cksum offload of unsupported "
1539 		    "protocol eth_type=%#x proto=%d csum_start=%d "
1540 		    "csum_offset=%d\n", __func__, eth_type, proto,
1541 		    hdr->csum_start, hdr->csum_offset);
1542 #endif
1543 		break;
1544 	}
1545 
1546 	return (0);
1547 }
1548 
1549 /*
1550  * Set the appropriate CSUM_* flags. Unfortunately, the information
1551  * provided is not directly useful to us. The VirtIO header gives the
1552  * offset of the checksum, which is all Linux needs, but this is not
1553  * how FreeBSD does things. We are forced to peek inside the packet
1554  * a bit.
1555  *
1556  * It would be nice if VirtIO gave us the L4 protocol or if FreeBSD
1557  * could accept the offsets and let the stack figure it out.
1558  */
1559 static int
1560 vtnet_rxq_csum(struct vtnet_rxq *rxq, struct mbuf *m,
1561     struct virtio_net_hdr *hdr)
1562 {
1563 	struct ether_header *eh;
1564 	struct ether_vlan_header *evh;
1565 	uint16_t eth_type;
1566 	int offset, error;
1567 
1568 	eh = mtod(m, struct ether_header *);
1569 	eth_type = ntohs(eh->ether_type);
1570 	if (eth_type == ETHERTYPE_VLAN) {
1571 		/* BMV: We should handle nested VLAN tags too. */
1572 		evh = mtod(m, struct ether_vlan_header *);
1573 		eth_type = ntohs(evh->evl_proto);
1574 		offset = sizeof(struct ether_vlan_header);
1575 	} else
1576 		offset = sizeof(struct ether_header);
1577 
1578 	if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
1579 		error = vtnet_rxq_csum_by_offset(rxq, m, eth_type, offset, hdr);
1580 	else
1581 		error = vtnet_rxq_csum_by_parse(rxq, m, eth_type, offset, hdr);
1582 
1583 	return (error);
1584 }
1585 
1586 static void
1587 vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *rxq, int nbufs)
1588 {
1589 	struct mbuf *m;
1590 
1591 	while (--nbufs > 0) {
1592 		m = virtqueue_dequeue(rxq->vtnrx_vq, NULL);
1593 		if (m == NULL)
1594 			break;
1595 		vtnet_rxq_discard_buf(rxq, m);
1596 	}
1597 }
1598 
1599 static void
1600 vtnet_rxq_discard_buf(struct vtnet_rxq *rxq, struct mbuf *m)
1601 {
1602 	int error;
1603 
1604 	/*
1605 	 * Requeue the discarded mbuf. This should always be successful
1606 	 * since it was just dequeued.
1607 	 */
1608 	error = vtnet_rxq_enqueue_buf(rxq, m);
1609 	KASSERT(error == 0,
1610 	    ("%s: cannot requeue discarded mbuf %d", __func__, error));
1611 }
1612 
1613 static int
1614 vtnet_rxq_merged_eof(struct vtnet_rxq *rxq, struct mbuf *m_head, int nbufs)
1615 {
1616 	struct vtnet_softc *sc;
1617 	struct ifnet *ifp;
1618 	struct virtqueue *vq;
1619 	struct mbuf *m, *m_tail;
1620 	int len;
1621 
1622 	sc = rxq->vtnrx_sc;
1623 	vq = rxq->vtnrx_vq;
1624 	ifp = sc->vtnet_ifp;
1625 	m_tail = m_head;
1626 
1627 	while (--nbufs > 0) {
1628 		m = virtqueue_dequeue(vq, &len);
1629 		if (m == NULL) {
1630 			rxq->vtnrx_stats.vrxs_ierrors++;
1631 			goto fail;
1632 		}
1633 
1634 		if (vtnet_rxq_new_buf(rxq) != 0) {
1635 			rxq->vtnrx_stats.vrxs_iqdrops++;
1636 			vtnet_rxq_discard_buf(rxq, m);
1637 			if (nbufs > 1)
1638 				vtnet_rxq_discard_merged_bufs(rxq, nbufs);
1639 			goto fail;
1640 		}
1641 
1642 		if (m->m_len < len)
1643 			len = m->m_len;
1644 
1645 		m->m_len = len;
1646 		m->m_flags &= ~M_PKTHDR;
1647 
1648 		m_head->m_pkthdr.len += len;
1649 		m_tail->m_next = m;
1650 		m_tail = m;
1651 	}
1652 
1653 	return (0);
1654 
1655 fail:
1656 	sc->vtnet_stats.rx_mergeable_failed++;
1657 	m_freem(m_head);
1658 
1659 	return (1);
1660 }
1661 
1662 static void
1663 vtnet_rxq_input(struct vtnet_rxq *rxq, struct mbuf *m,
1664     struct virtio_net_hdr *hdr)
1665 {
1666 	struct vtnet_softc *sc;
1667 	struct ifnet *ifp;
1668 	struct ether_header *eh;
1669 
1670 	sc = rxq->vtnrx_sc;
1671 	ifp = sc->vtnet_ifp;
1672 
1673 	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1674 		eh = mtod(m, struct ether_header *);
1675 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1676 			vtnet_vlan_tag_remove(m);
1677 			/*
1678 			 * With the 802.1Q header removed, update the
1679 			 * checksum starting location accordingly.
1680 			 */
1681 			if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
1682 				hdr->csum_start -= ETHER_VLAN_ENCAP_LEN;
1683 		}
1684 	}
1685 
1686 	m->m_pkthdr.flowid = rxq->vtnrx_id;
1687 	m->m_flags |= M_FLOWID;
1688 
1689 	/*
1690 	 * BMV: FreeBSD does not have the UNNECESSARY and PARTIAL checksum
1691 	 * distinction that Linux does. Need to reevaluate if performing
1692 	 * offloading for the NEEDS_CSUM case is really appropriate.
1693 	 */
1694 	if (hdr->flags & (VIRTIO_NET_HDR_F_NEEDS_CSUM |
1695 	    VIRTIO_NET_HDR_F_DATA_VALID)) {
1696 		if (vtnet_rxq_csum(rxq, m, hdr) == 0)
1697 			rxq->vtnrx_stats.vrxs_csum++;
1698 		else
1699 			rxq->vtnrx_stats.vrxs_csum_failed++;
1700 	}
1701 
1702 	rxq->vtnrx_stats.vrxs_ipackets++;
1703 	rxq->vtnrx_stats.vrxs_ibytes += m->m_pkthdr.len;
1704 
1705 	VTNET_RXQ_UNLOCK(rxq);
1706 	(*ifp->if_input)(ifp, m);
1707 	VTNET_RXQ_LOCK(rxq);
1708 }
1709 
1710 static int
1711 vtnet_rxq_eof(struct vtnet_rxq *rxq)
1712 {
1713 	struct virtio_net_hdr lhdr, *hdr;
1714 	struct vtnet_softc *sc;
1715 	struct ifnet *ifp;
1716 	struct virtqueue *vq;
1717 	struct mbuf *m;
1718 	struct virtio_net_hdr_mrg_rxbuf *mhdr;
1719 	int len, deq, nbufs, adjsz, count;
1720 
1721 	sc = rxq->vtnrx_sc;
1722 	vq = rxq->vtnrx_vq;
1723 	ifp = sc->vtnet_ifp;
1724 	hdr = &lhdr;
1725 	deq = 0;
1726 	count = sc->vtnet_rx_process_limit;
1727 
1728 	VTNET_RXQ_LOCK_ASSERT(rxq);
1729 
1730 	while (count-- > 0) {
1731 		m = virtqueue_dequeue(vq, &len);
1732 		if (m == NULL)
1733 			break;
1734 		deq++;
1735 
1736 		if (len < sc->vtnet_hdr_size + ETHER_HDR_LEN) {
1737 			rxq->vtnrx_stats.vrxs_ierrors++;
1738 			vtnet_rxq_discard_buf(rxq, m);
1739 			continue;
1740 		}
1741 
1742 		if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
1743 			nbufs = 1;
1744 			adjsz = sizeof(struct vtnet_rx_header);
1745 			/*
1746 			 * Account for our pad inserted between the header
1747 			 * and the actual start of the frame.
1748 			 */
1749 			len += VTNET_RX_HEADER_PAD;
1750 		} else {
1751 			mhdr = mtod(m, struct virtio_net_hdr_mrg_rxbuf *);
1752 			nbufs = mhdr->num_buffers;
1753 			adjsz = sizeof(struct virtio_net_hdr_mrg_rxbuf);
1754 		}
1755 
1756 		if (vtnet_rxq_replace_buf(rxq, m, len) != 0) {
1757 			rxq->vtnrx_stats.vrxs_iqdrops++;
1758 			vtnet_rxq_discard_buf(rxq, m);
1759 			if (nbufs > 1)
1760 				vtnet_rxq_discard_merged_bufs(rxq, nbufs);
1761 			continue;
1762 		}
1763 
1764 		m->m_pkthdr.len = len;
1765 		m->m_pkthdr.rcvif = ifp;
1766 		m->m_pkthdr.csum_flags = 0;
1767 
1768 		if (nbufs > 1) {
1769 			/* Dequeue the rest of chain. */
1770 			if (vtnet_rxq_merged_eof(rxq, m, nbufs) != 0)
1771 				continue;
1772 		}
1773 
1774 		/*
1775 		 * Save copy of header before we strip it. For both mergeable
1776 		 * and non-mergeable, the header is at the beginning of the
1777 		 * mbuf data. We no longer need num_buffers, so always use a
1778 		 * regular header.
1779 		 *
1780 		 * BMV: Is this memcpy() expensive? We know the mbuf data is
1781 		 * still valid even after the m_adj().
1782 		 */
1783 		memcpy(hdr, mtod(m, void *), sizeof(struct virtio_net_hdr));
1784 		m_adj(m, adjsz);
1785 
1786 		vtnet_rxq_input(rxq, m, hdr);
1787 
1788 		/* Must recheck after dropping the Rx lock. */
1789 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1790 			break;
1791 	}
1792 
1793 	if (deq > 0)
1794 		virtqueue_notify(vq);
1795 
1796 	return (count > 0 ? 0 : EAGAIN);
1797 }
1798 
1799 static void
1800 vtnet_rx_vq_intr(void *xrxq)
1801 {
1802 	struct vtnet_softc *sc;
1803 	struct vtnet_rxq *rxq;
1804 	struct ifnet *ifp;
1805 	int tries, more;
1806 
1807 	rxq = xrxq;
1808 	sc = rxq->vtnrx_sc;
1809 	ifp = sc->vtnet_ifp;
1810 	tries = 0;
1811 
1812 	if (__predict_false(rxq->vtnrx_id >= sc->vtnet_act_vq_pairs)) {
1813 		/*
1814 		 * Ignore this interrupt. Either this is a spurious interrupt
1815 		 * or multiqueue without per-VQ MSIX so every queue needs to
1816 		 * be polled (a brain dead configuration we could try harder
1817 		 * to avoid).
1818 		 */
1819 		vtnet_rxq_disable_intr(rxq);
1820 		return;
1821 	}
1822 
1823 again:
1824 	VTNET_RXQ_LOCK(rxq);
1825 
1826 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1827 		VTNET_RXQ_UNLOCK(rxq);
1828 		return;
1829 	}
1830 
1831 	more = vtnet_rxq_eof(rxq);
1832 	if (more || vtnet_rxq_enable_intr(rxq) != 0) {
1833 		if (!more)
1834 			vtnet_rxq_disable_intr(rxq);
1835 		/*
1836 		 * This is an occasional condition or race (when !more),
1837 		 * so retry a few times before scheduling the taskqueue.
1838 		 */
1839 		rxq->vtnrx_stats.vrxs_rescheduled++;
1840 		VTNET_RXQ_UNLOCK(rxq);
1841 		if (tries++ < VTNET_INTR_DISABLE_RETRIES)
1842 			goto again;
1843 		taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
1844 	} else
1845 		VTNET_RXQ_UNLOCK(rxq);
1846 }
1847 
1848 static void
1849 vtnet_rxq_tq_intr(void *xrxq, int pending)
1850 {
1851 	struct vtnet_softc *sc;
1852 	struct vtnet_rxq *rxq;
1853 	struct ifnet *ifp;
1854 	int more;
1855 
1856 	rxq = xrxq;
1857 	sc = rxq->vtnrx_sc;
1858 	ifp = sc->vtnet_ifp;
1859 
1860 	VTNET_RXQ_LOCK(rxq);
1861 
1862 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1863 		VTNET_RXQ_UNLOCK(rxq);
1864 		return;
1865 	}
1866 
1867 	more = vtnet_rxq_eof(rxq);
1868 	if (more || vtnet_rxq_enable_intr(rxq) != 0) {
1869 		if (!more)
1870 			vtnet_rxq_disable_intr(rxq);
1871 		rxq->vtnrx_stats.vrxs_rescheduled++;
1872 		taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
1873 	}
1874 
1875 	VTNET_RXQ_UNLOCK(rxq);
1876 }
1877 
1878 static void
1879 vtnet_txq_free_mbufs(struct vtnet_txq *txq)
1880 {
1881 	struct virtqueue *vq;
1882 	struct vtnet_tx_header *txhdr;
1883 	int last;
1884 
1885 	vq = txq->vtntx_vq;
1886 	last = 0;
1887 
1888 	while ((txhdr = virtqueue_drain(vq, &last)) != NULL) {
1889 		m_freem(txhdr->vth_mbuf);
1890 		uma_zfree(vtnet_tx_header_zone, txhdr);
1891 	}
1892 
1893 	KASSERT(virtqueue_empty(vq),
1894 	    ("%s: mbufs remaining in tx queue %p", __func__, txq));
1895 }
1896 
1897 /*
1898  * BMV: Much of this can go away once we finally have offsets in
1899  * the mbuf packet header. Bug andre@.
1900  */
1901 static int
1902 vtnet_txq_offload_ctx(struct vtnet_txq *txq, struct mbuf *m,
1903     int *etype, int *proto, int *start)
1904 {
1905 	struct vtnet_softc *sc;
1906 	struct ether_vlan_header *evh;
1907 	int offset;
1908 
1909 	sc = txq->vtntx_sc;
1910 
1911 	evh = mtod(m, struct ether_vlan_header *);
1912 	if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1913 		/* BMV: We should handle nested VLAN tags too. */
1914 		*etype = ntohs(evh->evl_proto);
1915 		offset = sizeof(struct ether_vlan_header);
1916 	} else {
1917 		*etype = ntohs(evh->evl_encap_proto);
1918 		offset = sizeof(struct ether_header);
1919 	}
1920 
1921 	switch (*etype) {
1922 #if defined(INET)
1923 	case ETHERTYPE_IP: {
1924 		struct ip *ip, iphdr;
1925 		if (__predict_false(m->m_len < offset + sizeof(struct ip))) {
1926 			m_copydata(m, offset, sizeof(struct ip),
1927 			    (caddr_t) &iphdr);
1928 			ip = &iphdr;
1929 		} else
1930 			ip = (struct ip *)(m->m_data + offset);
1931 		*proto = ip->ip_p;
1932 		*start = offset + (ip->ip_hl << 2);
1933 		break;
1934 	}
1935 #endif
1936 #if defined(INET6)
1937 	case ETHERTYPE_IPV6:
1938 		*proto = -1;
1939 		*start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto);
1940 		/* Assert the network stack sent us a valid packet. */
1941 		KASSERT(*start > offset,
1942 		    ("%s: mbuf %p start %d offset %d proto %d", __func__, m,
1943 		    *start, offset, *proto));
1944 		break;
1945 #endif
1946 	default:
1947 		sc->vtnet_stats.tx_csum_bad_ethtype++;
1948 		return (EINVAL);
1949 	}
1950 
1951 	return (0);
1952 }
1953 
1954 static int
1955 vtnet_txq_offload_tso(struct vtnet_txq *txq, struct mbuf *m, int eth_type,
1956     int offset, struct virtio_net_hdr *hdr)
1957 {
1958 	static struct timeval lastecn;
1959 	static int curecn;
1960 	struct vtnet_softc *sc;
1961 	struct tcphdr *tcp, tcphdr;
1962 
1963 	sc = txq->vtntx_sc;
1964 
1965 	if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) {
1966 		m_copydata(m, offset, sizeof(struct tcphdr), (caddr_t) &tcphdr);
1967 		tcp = &tcphdr;
1968 	} else
1969 		tcp = (struct tcphdr *)(m->m_data + offset);
1970 
1971 	hdr->hdr_len = offset + (tcp->th_off << 2);
1972 	hdr->gso_size = m->m_pkthdr.tso_segsz;
1973 	hdr->gso_type = eth_type == ETHERTYPE_IP ? VIRTIO_NET_HDR_GSO_TCPV4 :
1974 	    VIRTIO_NET_HDR_GSO_TCPV6;
1975 
1976 	if (tcp->th_flags & TH_CWR) {
1977 		/*
1978 		 * Drop if VIRTIO_NET_F_HOST_ECN was not negotiated. In FreeBSD,
1979 		 * ECN support is not on a per-interface basis, but globally via
1980 		 * the net.inet.tcp.ecn.enable sysctl knob. The default is off.
1981 		 */
1982 		if ((sc->vtnet_flags & VTNET_FLAG_TSO_ECN) == 0) {
1983 			if (ppsratecheck(&lastecn, &curecn, 1))
1984 				if_printf(sc->vtnet_ifp,
1985 				    "TSO with ECN not negotiated with host\n");
1986 			return (ENOTSUP);
1987 		}
1988 		hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
1989 	}
1990 
1991 	txq->vtntx_stats.vtxs_tso++;
1992 
1993 	return (0);
1994 }
1995 
1996 static struct mbuf *
1997 vtnet_txq_offload(struct vtnet_txq *txq, struct mbuf *m,
1998     struct virtio_net_hdr *hdr)
1999 {
2000 	struct vtnet_softc *sc;
2001 	int flags, etype, csum_start, proto, error;
2002 
2003 	sc = txq->vtntx_sc;
2004 	flags = m->m_pkthdr.csum_flags;
2005 
2006 	error = vtnet_txq_offload_ctx(txq, m, &etype, &proto, &csum_start);
2007 	if (error)
2008 		goto drop;
2009 
2010 	if ((etype == ETHERTYPE_IP && flags & VTNET_CSUM_OFFLOAD) ||
2011 	    (etype == ETHERTYPE_IPV6 && flags & VTNET_CSUM_OFFLOAD_IPV6)) {
2012 		/*
2013 		 * We could compare the IP protocol vs the CSUM_ flag too,
2014 		 * but that really should not be necessary.
2015 		 */
2016 		hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM;
2017 		hdr->csum_start = csum_start;
2018 		hdr->csum_offset = m->m_pkthdr.csum_data;
2019 		txq->vtntx_stats.vtxs_csum++;
2020 	}
2021 
2022 	if (flags & CSUM_TSO) {
2023 		if (__predict_false(proto != IPPROTO_TCP)) {
2024 			/* Likely failed to correctly parse the mbuf. */
2025 			sc->vtnet_stats.tx_tso_not_tcp++;
2026 			goto drop;
2027 		}
2028 
2029 		KASSERT(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM,
2030 		    ("%s: mbuf %p TSO without checksum offload", __func__, m));
2031 
2032 		error = vtnet_txq_offload_tso(txq, m, etype, csum_start, hdr);
2033 		if (error)
2034 			goto drop;
2035 	}
2036 
2037 	return (m);
2038 
2039 drop:
2040 	m_freem(m);
2041 	return (NULL);
2042 }
2043 
2044 static int
2045 vtnet_txq_enqueue_buf(struct vtnet_txq *txq, struct mbuf **m_head,
2046     struct vtnet_tx_header *txhdr)
2047 {
2048 	struct sglist sg;
2049 	struct sglist_seg segs[VTNET_MAX_TX_SEGS];
2050 	struct vtnet_softc *sc;
2051 	struct virtqueue *vq;
2052 	struct mbuf *m;
2053 	int collapsed, error;
2054 
2055 	vq = txq->vtntx_vq;
2056 	sc = txq->vtntx_sc;
2057 	m = *m_head;
2058 	collapsed = 0;
2059 
2060 	sglist_init(&sg, VTNET_MAX_TX_SEGS, segs);
2061 	error = sglist_append(&sg, &txhdr->vth_uhdr, sc->vtnet_hdr_size);
2062 	KASSERT(error == 0 && sg.sg_nseg == 1,
2063 	    ("%s: error %d adding header to sglist", __func__, error));
2064 
2065 again:
2066 	error = sglist_append_mbuf(&sg, m);
2067 	if (error) {
2068 		if (collapsed)
2069 			goto fail;
2070 
2071 		m = m_collapse(m, M_NOWAIT, VTNET_MAX_TX_SEGS - 1);
2072 		if (m == NULL)
2073 			goto fail;
2074 
2075 		*m_head = m;
2076 		collapsed = 1;
2077 		txq->vtntx_stats.vtxs_collapsed++;
2078 		goto again;
2079 	}
2080 
2081 	txhdr->vth_mbuf = m;
2082 	error = virtqueue_enqueue(vq, txhdr, &sg, sg.sg_nseg, 0);
2083 
2084 	return (error);
2085 
2086 fail:
2087 	m_freem(*m_head);
2088 	*m_head = NULL;
2089 
2090 	return (ENOBUFS);
2091 }
2092 
2093 static int
2094 vtnet_txq_encap(struct vtnet_txq *txq, struct mbuf **m_head)
2095 {
2096 	struct vtnet_softc *sc;
2097 	struct vtnet_tx_header *txhdr;
2098 	struct virtio_net_hdr *hdr;
2099 	struct mbuf *m;
2100 	int error;
2101 
2102 	sc = txq->vtntx_sc;
2103 	m = *m_head;
2104 	M_ASSERTPKTHDR(m);
2105 
2106 	txhdr = uma_zalloc(vtnet_tx_header_zone, M_NOWAIT | M_ZERO);
2107 	if (txhdr == NULL) {
2108 		m_freem(m);
2109 		*m_head = NULL;
2110 		return (ENOMEM);
2111 	}
2112 
2113 	/*
2114 	 * Always use the non-mergeable header, regardless if the feature
2115 	 * was negotiated. For transmit, num_buffers is always zero. The
2116 	 * vtnet_hdr_size is used to enqueue the correct header size.
2117 	 */
2118 	hdr = &txhdr->vth_uhdr.hdr;
2119 
2120 	if (m->m_flags & M_VLANTAG) {
2121 		m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
2122 		if ((*m_head = m) == NULL) {
2123 			error = ENOBUFS;
2124 			goto fail;
2125 		}
2126 		m->m_flags &= ~M_VLANTAG;
2127 	}
2128 
2129 	if (m->m_pkthdr.csum_flags & VTNET_CSUM_ALL_OFFLOAD) {
2130 		m = vtnet_txq_offload(txq, m, hdr);
2131 		if ((*m_head = m) == NULL) {
2132 			error = ENOBUFS;
2133 			goto fail;
2134 		}
2135 	}
2136 
2137 	error = vtnet_txq_enqueue_buf(txq, m_head, txhdr);
2138 	if (error == 0)
2139 		return (0);
2140 
2141 fail:
2142 	uma_zfree(vtnet_tx_header_zone, txhdr);
2143 
2144 	return (error);
2145 }
2146 
2147 #ifdef VTNET_LEGACY_TX
2148 
2149 static void
2150 vtnet_start_locked(struct vtnet_txq *txq, struct ifnet *ifp)
2151 {
2152 	struct vtnet_softc *sc;
2153 	struct virtqueue *vq;
2154 	struct mbuf *m0;
2155 	int enq;
2156 
2157 	sc = txq->vtntx_sc;
2158 	vq = txq->vtntx_vq;
2159 	enq = 0;
2160 
2161 	VTNET_TXQ_LOCK_ASSERT(txq);
2162 
2163 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
2164 	    sc->vtnet_link_active == 0)
2165 		return;
2166 
2167 	vtnet_txq_eof(txq);
2168 
2169 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
2170 		if (virtqueue_full(vq))
2171 			break;
2172 
2173 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m0);
2174 		if (m0 == NULL)
2175 			break;
2176 
2177 		if (vtnet_txq_encap(txq, &m0) != 0) {
2178 			if (m0 != NULL)
2179 				IFQ_DRV_PREPEND(&ifp->if_snd, m0);
2180 			break;
2181 		}
2182 
2183 		enq++;
2184 		ETHER_BPF_MTAP(ifp, m0);
2185 	}
2186 
2187 	if (enq > 0) {
2188 		virtqueue_notify(vq);
2189 		txq->vtntx_watchdog = VTNET_TX_TIMEOUT;
2190 	}
2191 }
2192 
2193 static void
2194 vtnet_start(struct ifnet *ifp)
2195 {
2196 	struct vtnet_softc *sc;
2197 	struct vtnet_txq *txq;
2198 
2199 	sc = ifp->if_softc;
2200 	txq = &sc->vtnet_txqs[0];
2201 
2202 	VTNET_TXQ_LOCK(txq);
2203 	vtnet_start_locked(txq, ifp);
2204 	VTNET_TXQ_UNLOCK(txq);
2205 }
2206 
2207 #else /* !VTNET_LEGACY_TX */
2208 
2209 static int
2210 vtnet_txq_mq_start_locked(struct vtnet_txq *txq, struct mbuf *m)
2211 {
2212 	struct vtnet_softc *sc;
2213 	struct virtqueue *vq;
2214 	struct buf_ring *br;
2215 	struct ifnet *ifp;
2216 	int enq, error;
2217 
2218 	sc = txq->vtntx_sc;
2219 	vq = txq->vtntx_vq;
2220 	br = txq->vtntx_br;
2221 	ifp = sc->vtnet_ifp;
2222 	enq = 0;
2223 	error = 0;
2224 
2225 	VTNET_TXQ_LOCK_ASSERT(txq);
2226 
2227 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
2228 	    sc->vtnet_link_active == 0) {
2229 		if (m != NULL)
2230 			error = drbr_enqueue(ifp, br, m);
2231 		return (error);
2232 	}
2233 
2234 	if (m != NULL) {
2235 		error = drbr_enqueue(ifp, br, m);
2236 		if (error)
2237 			return (error);
2238 	}
2239 
2240 	vtnet_txq_eof(txq);
2241 
2242 	while ((m = drbr_peek(ifp, br)) != NULL) {
2243 		error = vtnet_txq_encap(txq, &m);
2244 		if (error) {
2245 			if (m != NULL)
2246 				drbr_putback(ifp, br, m);
2247 			else
2248 				drbr_advance(ifp, br);
2249 			break;
2250 		}
2251 		drbr_advance(ifp, br);
2252 
2253 		enq++;
2254 		ETHER_BPF_MTAP(ifp, m);
2255 	}
2256 
2257 	if (enq > 0) {
2258 		virtqueue_notify(vq);
2259 		txq->vtntx_watchdog = VTNET_TX_TIMEOUT;
2260 	}
2261 
2262 	return (error);
2263 }
2264 
2265 static int
2266 vtnet_txq_mq_start(struct ifnet *ifp, struct mbuf *m)
2267 {
2268 	struct vtnet_softc *sc;
2269 	struct vtnet_txq *txq;
2270 	int i, npairs, error;
2271 
2272 	sc = ifp->if_softc;
2273 	npairs = sc->vtnet_act_vq_pairs;
2274 
2275 	if (m->m_flags & M_FLOWID)
2276 		i = m->m_pkthdr.flowid % npairs;
2277 	else
2278 		i = curcpu % npairs;
2279 
2280 	txq = &sc->vtnet_txqs[i];
2281 
2282 	if (VTNET_TXQ_TRYLOCK(txq) != 0) {
2283 		error = vtnet_txq_mq_start_locked(txq, m);
2284 		VTNET_TXQ_UNLOCK(txq);
2285 	} else {
2286 		error = drbr_enqueue(ifp, txq->vtntx_br, m);
2287 		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_defrtask);
2288 	}
2289 
2290 	return (error);
2291 }
2292 
2293 static void
2294 vtnet_txq_tq_deferred(void *xtxq, int pending)
2295 {
2296 	struct vtnet_softc *sc;
2297 	struct vtnet_txq *txq;
2298 
2299 	txq = xtxq;
2300 	sc = txq->vtntx_sc;
2301 
2302 	VTNET_TXQ_LOCK(txq);
2303 	if (!drbr_empty(sc->vtnet_ifp, txq->vtntx_br))
2304 		vtnet_txq_mq_start_locked(txq, NULL);
2305 	VTNET_TXQ_UNLOCK(txq);
2306 }
2307 
2308 #endif /* VTNET_LEGACY_TX */
2309 
2310 static void
2311 vtnet_txq_tq_intr(void *xtxq, int pending)
2312 {
2313 	struct vtnet_softc *sc;
2314 	struct vtnet_txq *txq;
2315 	struct ifnet *ifp;
2316 
2317 	txq = xtxq;
2318 	sc = txq->vtntx_sc;
2319 	ifp = sc->vtnet_ifp;
2320 
2321 	VTNET_TXQ_LOCK(txq);
2322 
2323 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2324 		VTNET_TXQ_UNLOCK(txq);
2325 		return;
2326 	}
2327 
2328 	vtnet_txq_eof(txq);
2329 
2330 #ifdef VTNET_LEGACY_TX
2331 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2332 		vtnet_start_locked(txq, ifp);
2333 #else
2334 	if (!drbr_empty(ifp, txq->vtntx_br))
2335 		vtnet_txq_mq_start_locked(txq, NULL);
2336 #endif
2337 
2338 	if (vtnet_txq_enable_intr(txq) != 0) {
2339 		vtnet_txq_disable_intr(txq);
2340 		txq->vtntx_stats.vtxs_rescheduled++;
2341 		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
2342 	}
2343 
2344 	VTNET_TXQ_UNLOCK(txq);
2345 }
2346 
2347 static void
2348 vtnet_txq_eof(struct vtnet_txq *txq)
2349 {
2350 	struct virtqueue *vq;
2351 	struct vtnet_tx_header *txhdr;
2352 	struct mbuf *m;
2353 
2354 	vq = txq->vtntx_vq;
2355 	VTNET_TXQ_LOCK_ASSERT(txq);
2356 
2357 	while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) {
2358 		m = txhdr->vth_mbuf;
2359 
2360 		txq->vtntx_stats.vtxs_opackets++;
2361 		txq->vtntx_stats.vtxs_obytes += m->m_pkthdr.len;
2362 		if (m->m_flags & M_MCAST)
2363 			txq->vtntx_stats.vtxs_omcasts++;
2364 
2365 		m_freem(m);
2366 		uma_zfree(vtnet_tx_header_zone, txhdr);
2367 	}
2368 
2369 	if (virtqueue_empty(vq))
2370 		txq->vtntx_watchdog = 0;
2371 }
2372 
2373 static void
2374 vtnet_tx_vq_intr(void *xtxq)
2375 {
2376 	struct vtnet_softc *sc;
2377 	struct vtnet_txq *txq;
2378 	struct ifnet *ifp;
2379 	int tries;
2380 
2381 	txq = xtxq;
2382 	sc = txq->vtntx_sc;
2383 	ifp = sc->vtnet_ifp;
2384 	tries = 0;
2385 
2386 	if (__predict_false(txq->vtntx_id >= sc->vtnet_act_vq_pairs)) {
2387 		/*
2388 		 * Ignore this interrupt. Either this is a spurious interrupt
2389 		 * or multiqueue without per-VQ MSIX so every queue needs to
2390 		 * be polled (a brain dead configuration we could try harder
2391 		 * to avoid).
2392 		 */
2393 		vtnet_txq_disable_intr(txq);
2394 		return;
2395 	}
2396 
2397 again:
2398 	VTNET_TXQ_LOCK(txq);
2399 
2400 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2401 		VTNET_TXQ_UNLOCK(txq);
2402 		return;
2403 	}
2404 
2405 	vtnet_txq_eof(txq);
2406 
2407 #ifdef VTNET_LEGACY_TX
2408 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2409 		vtnet_start_locked(txq, ifp);
2410 #else
2411 	if (!drbr_empty(ifp, txq->vtntx_br))
2412 		vtnet_txq_mq_start_locked(txq, NULL);
2413 #endif
2414 
2415 	if (vtnet_txq_enable_intr(txq) != 0) {
2416 		vtnet_txq_disable_intr(txq);
2417 		/*
2418 		 * This is an occasional race, so retry a few times
2419 		 * before scheduling the taskqueue.
2420 		 */
2421 		VTNET_TXQ_UNLOCK(txq);
2422 		if (tries++ < VTNET_INTR_DISABLE_RETRIES)
2423 			goto again;
2424 		txq->vtntx_stats.vtxs_rescheduled++;
2425 		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
2426 	} else
2427 		VTNET_TXQ_UNLOCK(txq);
2428 }
2429 
2430 static void
2431 vtnet_tx_start_all(struct vtnet_softc *sc)
2432 {
2433 	struct ifnet *ifp;
2434 	struct vtnet_txq *txq;
2435 	int i;
2436 
2437 	ifp = sc->vtnet_ifp;
2438 	VTNET_CORE_LOCK_ASSERT(sc);
2439 
2440 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2441 		txq = &sc->vtnet_txqs[i];
2442 
2443 		VTNET_TXQ_LOCK(txq);
2444 #ifdef VTNET_LEGACY_TX
2445 		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2446 			vtnet_start_locked(txq, ifp);
2447 #else
2448 		if (!drbr_empty(ifp, txq->vtntx_br))
2449 			vtnet_txq_mq_start_locked(txq, NULL);
2450 #endif
2451 		VTNET_TXQ_UNLOCK(txq);
2452 	}
2453 }
2454 
2455 #ifndef VTNET_LEGACY_TX
2456 static void
2457 vtnet_qflush(struct ifnet *ifp)
2458 {
2459 	struct vtnet_softc *sc;
2460 	struct vtnet_txq *txq;
2461 	struct mbuf *m;
2462 	int i;
2463 
2464 	sc = ifp->if_softc;
2465 
2466 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2467 		txq = &sc->vtnet_txqs[i];
2468 
2469 		VTNET_TXQ_LOCK(txq);
2470 		while ((m = buf_ring_dequeue_sc(txq->vtntx_br)) != NULL)
2471 			m_freem(m);
2472 		VTNET_TXQ_UNLOCK(txq);
2473 	}
2474 
2475 	if_qflush(ifp);
2476 }
2477 #endif
2478 
2479 static int
2480 vtnet_watchdog(struct vtnet_txq *txq)
2481 {
2482 	struct vtnet_softc *sc;
2483 
2484 	sc = txq->vtntx_sc;
2485 
2486 	VTNET_TXQ_LOCK(txq);
2487 	if (sc->vtnet_flags & VTNET_FLAG_EVENT_IDX)
2488 		vtnet_txq_eof(txq);
2489 	if (txq->vtntx_watchdog == 0 || --txq->vtntx_watchdog) {
2490 		VTNET_TXQ_UNLOCK(txq);
2491 		return (0);
2492 	}
2493 	VTNET_TXQ_UNLOCK(txq);
2494 
2495 	if_printf(sc->vtnet_ifp, "watchdog timeout on queue %d\n",
2496 	    txq->vtntx_id);
2497 	return (1);
2498 }
2499 
2500 static void
2501 vtnet_rxq_accum_stats(struct vtnet_rxq *rxq, struct vtnet_rxq_stats *accum)
2502 {
2503 	struct vtnet_rxq_stats *st;
2504 
2505 	st = &rxq->vtnrx_stats;
2506 
2507 	accum->vrxs_ipackets += st->vrxs_ipackets;
2508 	accum->vrxs_ibytes += st->vrxs_ibytes;
2509 	accum->vrxs_iqdrops += st->vrxs_iqdrops;
2510 	accum->vrxs_csum += st->vrxs_csum;
2511 	accum->vrxs_csum_failed += st->vrxs_csum_failed;
2512 	accum->vrxs_rescheduled += st->vrxs_rescheduled;
2513 }
2514 
2515 static void
2516 vtnet_txq_accum_stats(struct vtnet_txq *txq, struct vtnet_txq_stats *accum)
2517 {
2518 	struct vtnet_txq_stats *st;
2519 
2520 	st = &txq->vtntx_stats;
2521 
2522 	accum->vtxs_opackets += st->vtxs_opackets;
2523 	accum->vtxs_obytes += st->vtxs_obytes;
2524 	accum->vtxs_csum += st->vtxs_csum;
2525 	accum->vtxs_tso += st->vtxs_tso;
2526 	accum->vtxs_collapsed += st->vtxs_collapsed;
2527 	accum->vtxs_rescheduled += st->vtxs_rescheduled;
2528 }
2529 
2530 static void
2531 vtnet_accumulate_stats(struct vtnet_softc *sc)
2532 {
2533 	struct ifnet *ifp;
2534 	struct vtnet_statistics *st;
2535 	struct vtnet_rxq_stats rxaccum;
2536 	struct vtnet_txq_stats txaccum;
2537 	int i;
2538 
2539 	ifp = sc->vtnet_ifp;
2540 	st = &sc->vtnet_stats;
2541 	bzero(&rxaccum, sizeof(struct vtnet_rxq_stats));
2542 	bzero(&txaccum, sizeof(struct vtnet_txq_stats));
2543 
2544 	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
2545 		vtnet_rxq_accum_stats(&sc->vtnet_rxqs[i], &rxaccum);
2546 		vtnet_txq_accum_stats(&sc->vtnet_txqs[i], &txaccum);
2547 	}
2548 
2549 	st->rx_csum_offloaded = rxaccum.vrxs_csum;
2550 	st->rx_csum_failed = rxaccum.vrxs_csum_failed;
2551 	st->rx_task_rescheduled = rxaccum.vrxs_rescheduled;
2552 	st->tx_csum_offloaded = txaccum.vtxs_csum;
2553 	st->tx_tso_offloaded = txaccum.vtxs_tso;
2554 	st->tx_task_rescheduled = txaccum.vtxs_rescheduled;
2555 
2556 	/*
2557 	 * With the exception of if_ierrors, these ifnet statistics are
2558 	 * only updated in the driver, so just set them to our accumulated
2559 	 * values. if_ierrors is updated in ether_input() for malformed
2560 	 * frames that we should have already discarded.
2561 	 */
2562 	ifp->if_ipackets = rxaccum.vrxs_ipackets;
2563 	ifp->if_iqdrops = rxaccum.vrxs_iqdrops;
2564 	ifp->if_ierrors = rxaccum.vrxs_ierrors;
2565 	ifp->if_opackets = txaccum.vtxs_opackets;
2566 #ifndef VTNET_LEGACY_TX
2567 	ifp->if_obytes = txaccum.vtxs_obytes;
2568 	ifp->if_omcasts = txaccum.vtxs_omcasts;
2569 #endif
2570 }
2571 
2572 static void
2573 vtnet_tick(void *xsc)
2574 {
2575 	struct vtnet_softc *sc;
2576 	struct ifnet *ifp;
2577 	int i, timedout;
2578 
2579 	sc = xsc;
2580 	ifp = sc->vtnet_ifp;
2581 	timedout = 0;
2582 
2583 	VTNET_CORE_LOCK_ASSERT(sc);
2584 	vtnet_accumulate_stats(sc);
2585 
2586 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
2587 		timedout |= vtnet_watchdog(&sc->vtnet_txqs[i]);
2588 
2589 	if (timedout != 0) {
2590 		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2591 		vtnet_init_locked(sc);
2592 	} else
2593 		callout_schedule(&sc->vtnet_tick_ch, hz);
2594 }
2595 
2596 static void
2597 vtnet_start_taskqueues(struct vtnet_softc *sc)
2598 {
2599 	device_t dev;
2600 	struct vtnet_rxq *rxq;
2601 	struct vtnet_txq *txq;
2602 	int i, error;
2603 
2604 	dev = sc->vtnet_dev;
2605 
2606 	/*
2607 	 * Errors here are very difficult to recover from - we cannot
2608 	 * easily fail because, if this is during boot, we will hang
2609 	 * when freeing any successfully started taskqueues because
2610 	 * the scheduler isn't up yet.
2611 	 *
2612 	 * Most drivers just ignore the return value - it only fails
2613 	 * with ENOMEM so an error is not likely.
2614 	 */
2615 	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
2616 		rxq = &sc->vtnet_rxqs[i];
2617 		error = taskqueue_start_threads(&rxq->vtnrx_tq, 1, PI_NET,
2618 		    "%s rxq %d", device_get_nameunit(dev), rxq->vtnrx_id);
2619 		if (error) {
2620 			device_printf(dev, "failed to start rx taskq %d\n",
2621 			    rxq->vtnrx_id);
2622 		}
2623 
2624 		txq = &sc->vtnet_txqs[i];
2625 		error = taskqueue_start_threads(&txq->vtntx_tq, 1, PI_NET,
2626 		    "%s txq %d", device_get_nameunit(dev), txq->vtntx_id);
2627 		if (error) {
2628 			device_printf(dev, "failed to start tx taskq %d\n",
2629 			    txq->vtntx_id);
2630 		}
2631 	}
2632 }
2633 
2634 static void
2635 vtnet_free_taskqueues(struct vtnet_softc *sc)
2636 {
2637 	struct vtnet_rxq *rxq;
2638 	struct vtnet_txq *txq;
2639 	int i;
2640 
2641 	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
2642 		rxq = &sc->vtnet_rxqs[i];
2643 		if (rxq->vtnrx_tq != NULL) {
2644 			taskqueue_free(rxq->vtnrx_tq);
2645 			rxq->vtnrx_vq = NULL;
2646 		}
2647 
2648 		txq = &sc->vtnet_txqs[i];
2649 		if (txq->vtntx_tq != NULL) {
2650 			taskqueue_free(txq->vtntx_tq);
2651 			txq->vtntx_tq = NULL;
2652 		}
2653 	}
2654 }
2655 
2656 static void
2657 vtnet_drain_taskqueues(struct vtnet_softc *sc)
2658 {
2659 	struct vtnet_rxq *rxq;
2660 	struct vtnet_txq *txq;
2661 	int i;
2662 
2663 	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
2664 		rxq = &sc->vtnet_rxqs[i];
2665 		if (rxq->vtnrx_tq != NULL)
2666 			taskqueue_drain(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
2667 
2668 		txq = &sc->vtnet_txqs[i];
2669 		if (txq->vtntx_tq != NULL) {
2670 			taskqueue_drain(txq->vtntx_tq, &txq->vtntx_intrtask);
2671 #ifndef VTNET_LEGACY_TX
2672 			taskqueue_drain(txq->vtntx_tq, &txq->vtntx_defrtask);
2673 #endif
2674 		}
2675 	}
2676 }
2677 
2678 static void
2679 vtnet_drain_rxtx_queues(struct vtnet_softc *sc)
2680 {
2681 	struct vtnet_rxq *rxq;
2682 	struct vtnet_txq *txq;
2683 	int i;
2684 
2685 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2686 		rxq = &sc->vtnet_rxqs[i];
2687 		vtnet_rxq_free_mbufs(rxq);
2688 
2689 		txq = &sc->vtnet_txqs[i];
2690 		vtnet_txq_free_mbufs(txq);
2691 	}
2692 }
2693 
2694 static void
2695 vtnet_stop_rendezvous(struct vtnet_softc *sc)
2696 {
2697 	struct vtnet_rxq *rxq;
2698 	struct vtnet_txq *txq;
2699 	int i;
2700 
2701 	/*
2702 	 * Lock and unlock the per-queue mutex so we known the stop
2703 	 * state is visible. Doing only the active queues should be
2704 	 * sufficient, but it does not cost much extra to do all the
2705 	 * queues. Note we hold the core mutex here too.
2706 	 */
2707 	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
2708 		rxq = &sc->vtnet_rxqs[i];
2709 		VTNET_RXQ_LOCK(rxq);
2710 		VTNET_RXQ_UNLOCK(rxq);
2711 
2712 		txq = &sc->vtnet_txqs[i];
2713 		VTNET_TXQ_LOCK(txq);
2714 		VTNET_TXQ_UNLOCK(txq);
2715 	}
2716 }
2717 
2718 static void
2719 vtnet_stop(struct vtnet_softc *sc)
2720 {
2721 	device_t dev;
2722 	struct ifnet *ifp;
2723 
2724 	dev = sc->vtnet_dev;
2725 	ifp = sc->vtnet_ifp;
2726 
2727 	VTNET_CORE_LOCK_ASSERT(sc);
2728 
2729 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2730 	sc->vtnet_link_active = 0;
2731 	callout_stop(&sc->vtnet_tick_ch);
2732 
2733 	/* Only advisory. */
2734 	vtnet_disable_interrupts(sc);
2735 
2736 	/*
2737 	 * Stop the host adapter. This resets it to the pre-initialized
2738 	 * state. It will not generate any interrupts until after it is
2739 	 * reinitialized.
2740 	 */
2741 	virtio_stop(dev);
2742 	vtnet_stop_rendezvous(sc);
2743 
2744 	/* Free any mbufs left in the virtqueues. */
2745 	vtnet_drain_rxtx_queues(sc);
2746 }
2747 
2748 static int
2749 vtnet_virtio_reinit(struct vtnet_softc *sc)
2750 {
2751 	device_t dev;
2752 	struct ifnet *ifp;
2753 	uint64_t features;
2754 	int mask, error;
2755 
2756 	dev = sc->vtnet_dev;
2757 	ifp = sc->vtnet_ifp;
2758 	features = sc->vtnet_features;
2759 
2760 	mask = 0;
2761 #if defined(INET)
2762 	mask |= IFCAP_RXCSUM;
2763 #endif
2764 #if defined (INET6)
2765 	mask |= IFCAP_RXCSUM_IPV6;
2766 #endif
2767 
2768 	/*
2769 	 * Re-negotiate with the host, removing any disabled receive
2770 	 * features. Transmit features are disabled only on our side
2771 	 * via if_capenable and if_hwassist.
2772 	 */
2773 
2774 	if (ifp->if_capabilities & mask) {
2775 		/*
2776 		 * We require both IPv4 and IPv6 offloading to be enabled
2777 		 * in order to negotiated it: VirtIO does not distinguish
2778 		 * between the two.
2779 		 */
2780 		if ((ifp->if_capenable & mask) != mask)
2781 			features &= ~VIRTIO_NET_F_GUEST_CSUM;
2782 	}
2783 
2784 	if (ifp->if_capabilities & IFCAP_LRO) {
2785 		if ((ifp->if_capenable & IFCAP_LRO) == 0)
2786 			features &= ~VTNET_LRO_FEATURES;
2787 	}
2788 
2789 	if (ifp->if_capabilities & IFCAP_VLAN_HWFILTER) {
2790 		if ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0)
2791 			features &= ~VIRTIO_NET_F_CTRL_VLAN;
2792 	}
2793 
2794 	error = virtio_reinit(dev, features);
2795 	if (error)
2796 		device_printf(dev, "virtio reinit error %d\n", error);
2797 
2798 	return (error);
2799 }
2800 
2801 static void
2802 vtnet_init_rx_filters(struct vtnet_softc *sc)
2803 {
2804 	struct ifnet *ifp;
2805 
2806 	ifp = sc->vtnet_ifp;
2807 
2808 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
2809 		/* Restore promiscuous and all-multicast modes. */
2810 		vtnet_rx_filter(sc);
2811 		/* Restore filtered MAC addresses. */
2812 		vtnet_rx_filter_mac(sc);
2813 	}
2814 
2815 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
2816 		vtnet_rx_filter_vlan(sc);
2817 }
2818 
2819 static int
2820 vtnet_init_rx_queues(struct vtnet_softc *sc)
2821 {
2822 	device_t dev;
2823 	struct vtnet_rxq *rxq;
2824 	int i, clsize, error;
2825 
2826 	dev = sc->vtnet_dev;
2827 
2828 	/*
2829 	 * Use the new cluster size if one has been set (via a MTU
2830 	 * change). Otherwise, use the standard 2K clusters.
2831 	 *
2832 	 * BMV: It might make sense to use page sized clusters as
2833 	 * the default (depending on the features negotiated).
2834 	 */
2835 	if (sc->vtnet_rx_new_clsize != 0) {
2836 		clsize = sc->vtnet_rx_new_clsize;
2837 		sc->vtnet_rx_new_clsize = 0;
2838 	} else
2839 		clsize = MCLBYTES;
2840 
2841 	sc->vtnet_rx_clsize = clsize;
2842 	sc->vtnet_rx_nmbufs = VTNET_NEEDED_RX_MBUFS(sc, clsize);
2843 
2844 	/* The first segment is reserved for the header. */
2845 	KASSERT(sc->vtnet_rx_nmbufs < VTNET_MAX_RX_SEGS,
2846 	    ("%s: too many rx mbufs %d", __func__, sc->vtnet_rx_nmbufs));
2847 
2848 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2849 		rxq = &sc->vtnet_rxqs[i];
2850 
2851 		/* Hold the lock to satisfy asserts. */
2852 		VTNET_RXQ_LOCK(rxq);
2853 		error = vtnet_rxq_populate(rxq);
2854 		VTNET_RXQ_UNLOCK(rxq);
2855 
2856 		if (error) {
2857 			device_printf(dev,
2858 			    "cannot allocate mbufs for Rx queue %d\n", i);
2859 			return (error);
2860 		}
2861 	}
2862 
2863 	return (0);
2864 }
2865 
2866 static int
2867 vtnet_init_tx_queues(struct vtnet_softc *sc)
2868 {
2869 	struct vtnet_txq *txq;
2870 	int i;
2871 
2872 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2873 		txq = &sc->vtnet_txqs[i];
2874 		txq->vtntx_watchdog = 0;
2875 	}
2876 
2877 	return (0);
2878 }
2879 
2880 static int
2881 vtnet_init_rxtx_queues(struct vtnet_softc *sc)
2882 {
2883 	int error;
2884 
2885 	error = vtnet_init_rx_queues(sc);
2886 	if (error)
2887 		return (error);
2888 
2889 	error = vtnet_init_tx_queues(sc);
2890 	if (error)
2891 		return (error);
2892 
2893 	return (0);
2894 }
2895 
2896 static void
2897 vtnet_set_active_vq_pairs(struct vtnet_softc *sc)
2898 {
2899 	device_t dev;
2900 	int npairs;
2901 
2902 	dev = sc->vtnet_dev;
2903 
2904 	if ((sc->vtnet_flags & VTNET_FLAG_MULTIQ) == 0) {
2905 		MPASS(sc->vtnet_max_vq_pairs == 1);
2906 		sc->vtnet_act_vq_pairs = 1;
2907 		return;
2908 	}
2909 
2910 	/* BMV: Just use the maximum configured for now. */
2911 	npairs = sc->vtnet_max_vq_pairs;
2912 
2913 	if (vtnet_ctrl_mq_cmd(sc, npairs) != 0) {
2914 		device_printf(dev,
2915 		    "cannot set active queue pairs to %d\n", npairs);
2916 		npairs = 1;
2917 	}
2918 
2919 	sc->vtnet_act_vq_pairs = npairs;
2920 }
2921 
2922 static int
2923 vtnet_reinit(struct vtnet_softc *sc)
2924 {
2925 	device_t dev;
2926 	struct ifnet *ifp;
2927 	int error;
2928 
2929 	dev = sc->vtnet_dev;
2930 	ifp = sc->vtnet_ifp;
2931 
2932 	/* Use the current MAC address. */
2933 	bcopy(IF_LLADDR(ifp), sc->vtnet_hwaddr, ETHER_ADDR_LEN);
2934 	vtnet_set_hwaddr(sc);
2935 
2936 	vtnet_set_active_vq_pairs(sc);
2937 
2938 	ifp->if_hwassist = 0;
2939 	if (ifp->if_capenable & IFCAP_TXCSUM)
2940 		ifp->if_hwassist |= VTNET_CSUM_OFFLOAD;
2941 	if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
2942 		ifp->if_hwassist |= VTNET_CSUM_OFFLOAD_IPV6;
2943 	if (ifp->if_capenable & IFCAP_TSO4)
2944 		ifp->if_hwassist |= CSUM_TSO;
2945 	if (ifp->if_capenable & IFCAP_TSO6)
2946 		ifp->if_hwassist |= CSUM_TSO; /* No CSUM_TSO_IPV6. */
2947 
2948 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ)
2949 		vtnet_init_rx_filters(sc);
2950 
2951 	error = vtnet_init_rxtx_queues(sc);
2952 	if (error)
2953 		return (error);
2954 
2955 	vtnet_enable_interrupts(sc);
2956 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
2957 
2958 	return (0);
2959 }
2960 
2961 static void
2962 vtnet_init_locked(struct vtnet_softc *sc)
2963 {
2964 	device_t dev;
2965 	struct ifnet *ifp;
2966 
2967 	dev = sc->vtnet_dev;
2968 	ifp = sc->vtnet_ifp;
2969 
2970 	VTNET_CORE_LOCK_ASSERT(sc);
2971 
2972 	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2973 		return;
2974 
2975 	vtnet_stop(sc);
2976 
2977 	/* Reinitialize with the host. */
2978 	if (vtnet_virtio_reinit(sc) != 0)
2979 		goto fail;
2980 
2981 	if (vtnet_reinit(sc) != 0)
2982 		goto fail;
2983 
2984 	virtio_reinit_complete(dev);
2985 
2986 	vtnet_update_link_status(sc);
2987 	callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc);
2988 
2989 	return;
2990 
2991 fail:
2992 	vtnet_stop(sc);
2993 }
2994 
2995 static void
2996 vtnet_init(void *xsc)
2997 {
2998 	struct vtnet_softc *sc;
2999 
3000 	sc = xsc;
3001 
3002 	VTNET_CORE_LOCK(sc);
3003 	vtnet_init_locked(sc);
3004 	VTNET_CORE_UNLOCK(sc);
3005 }
3006 
3007 static void
3008 vtnet_free_ctrl_vq(struct vtnet_softc *sc)
3009 {
3010 	struct virtqueue *vq;
3011 
3012 	vq = sc->vtnet_ctrl_vq;
3013 
3014 	/*
3015 	 * The control virtqueue is only polled and therefore it should
3016 	 * already be empty.
3017 	 */
3018 	KASSERT(virtqueue_empty(vq),
3019 	    ("%s: ctrl vq %p not empty", __func__, vq));
3020 }
3021 
3022 static void
3023 vtnet_exec_ctrl_cmd(struct vtnet_softc *sc, void *cookie,
3024     struct sglist *sg, int readable, int writable)
3025 {
3026 	struct virtqueue *vq;
3027 
3028 	vq = sc->vtnet_ctrl_vq;
3029 
3030 	VTNET_CORE_LOCK_ASSERT(sc);
3031 	KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_VQ,
3032 	    ("%s: CTRL_VQ feature not negotiated", __func__));
3033 
3034 	if (!virtqueue_empty(vq))
3035 		return;
3036 	if (virtqueue_enqueue(vq, cookie, sg, readable, writable) != 0)
3037 		return;
3038 
3039 	/*
3040 	 * Poll for the response, but the command is likely already
3041 	 * done when we return from the notify.
3042 	 */
3043 	virtqueue_notify(vq);
3044 	virtqueue_poll(vq, NULL);
3045 }
3046 
3047 static int
3048 vtnet_ctrl_mac_cmd(struct vtnet_softc *sc, uint8_t *hwaddr)
3049 {
3050 	struct virtio_net_ctrl_hdr hdr;
3051 	struct sglist_seg segs[3];
3052 	struct sglist sg;
3053 	uint8_t ack;
3054 	int error;
3055 
3056 	hdr.class = VIRTIO_NET_CTRL_MAC;
3057 	hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
3058 	ack = VIRTIO_NET_ERR;
3059 
3060 	sglist_init(&sg, 3, segs);
3061 	error = 0;
3062 	error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
3063 	error |= sglist_append(&sg, hwaddr, ETHER_ADDR_LEN);
3064 	error |= sglist_append(&sg, &ack, sizeof(uint8_t));
3065 	KASSERT(error == 0 && sg.sg_nseg == 3,
3066 	    ("%s: error %d adding set MAC msg to sglist", __func__, error));
3067 
3068 	vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
3069 
3070 	return (ack == VIRTIO_NET_OK ? 0 : EIO);
3071 }
3072 
3073 static int
3074 vtnet_ctrl_mq_cmd(struct vtnet_softc *sc, uint16_t npairs)
3075 {
3076 	struct sglist_seg segs[3];
3077 	struct sglist sg;
3078 	struct {
3079 		struct virtio_net_ctrl_hdr hdr;
3080 		uint8_t pad1;
3081 		struct virtio_net_ctrl_mq mq;
3082 		uint8_t pad2;
3083 		uint8_t ack;
3084 	} s;
3085 	int error;
3086 
3087 	s.hdr.class = VIRTIO_NET_CTRL_MQ;
3088 	s.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
3089 	s.mq.virtqueue_pairs = npairs;
3090 	s.ack = VIRTIO_NET_ERR;
3091 
3092 	sglist_init(&sg, 3, segs);
3093 	error = 0;
3094 	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3095 	error |= sglist_append(&sg, &s.mq, sizeof(struct virtio_net_ctrl_mq));
3096 	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3097 	KASSERT(error == 0 && sg.sg_nseg == 3,
3098 	    ("%s: error %d adding MQ message to sglist", __func__, error));
3099 
3100 	vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3101 
3102 	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3103 }
3104 
3105 static int
3106 vtnet_ctrl_rx_cmd(struct vtnet_softc *sc, int cmd, int on)
3107 {
3108 	struct sglist_seg segs[3];
3109 	struct sglist sg;
3110 	struct {
3111 		struct virtio_net_ctrl_hdr hdr;
3112 		uint8_t pad1;
3113 		uint8_t onoff;
3114 		uint8_t pad2;
3115 		uint8_t ack;
3116 	} s;
3117 	int error;
3118 
3119 	KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX,
3120 	    ("%s: CTRL_RX feature not negotiated", __func__));
3121 
3122 	s.hdr.class = VIRTIO_NET_CTRL_RX;
3123 	s.hdr.cmd = cmd;
3124 	s.onoff = !!on;
3125 	s.ack = VIRTIO_NET_ERR;
3126 
3127 	sglist_init(&sg, 3, segs);
3128 	error = 0;
3129 	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3130 	error |= sglist_append(&sg, &s.onoff, sizeof(uint8_t));
3131 	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3132 	KASSERT(error == 0 && sg.sg_nseg == 3,
3133 	    ("%s: error %d adding Rx message to sglist", __func__, error));
3134 
3135 	vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3136 
3137 	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3138 }
3139 
3140 static int
3141 vtnet_set_promisc(struct vtnet_softc *sc, int on)
3142 {
3143 
3144 	return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_PROMISC, on));
3145 }
3146 
3147 static int
3148 vtnet_set_allmulti(struct vtnet_softc *sc, int on)
3149 {
3150 
3151 	return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, on));
3152 }
3153 
3154 /*
3155  * The device defaults to promiscuous mode for backwards compatibility.
3156  * Turn it off at attach time if possible.
3157  */
3158 static void
3159 vtnet_attach_disable_promisc(struct vtnet_softc *sc)
3160 {
3161 	struct ifnet *ifp;
3162 
3163 	ifp = sc->vtnet_ifp;
3164 
3165 	VTNET_CORE_LOCK(sc);
3166 	if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0) {
3167 		ifp->if_flags |= IFF_PROMISC;
3168 	} else if (vtnet_set_promisc(sc, 0) != 0) {
3169 		ifp->if_flags |= IFF_PROMISC;
3170 		device_printf(sc->vtnet_dev,
3171 		    "cannot disable default promiscuous mode\n");
3172 	}
3173 	VTNET_CORE_UNLOCK(sc);
3174 }
3175 
3176 static void
3177 vtnet_rx_filter(struct vtnet_softc *sc)
3178 {
3179 	device_t dev;
3180 	struct ifnet *ifp;
3181 
3182 	dev = sc->vtnet_dev;
3183 	ifp = sc->vtnet_ifp;
3184 
3185 	VTNET_CORE_LOCK_ASSERT(sc);
3186 
3187 	if (vtnet_set_promisc(sc, ifp->if_flags & IFF_PROMISC) != 0)
3188 		device_printf(dev, "cannot %s promiscuous mode\n",
3189 		    ifp->if_flags & IFF_PROMISC ? "enable" : "disable");
3190 
3191 	if (vtnet_set_allmulti(sc, ifp->if_flags & IFF_ALLMULTI) != 0)
3192 		device_printf(dev, "cannot %s all-multicast mode\n",
3193 		    ifp->if_flags & IFF_ALLMULTI ? "enable" : "disable");
3194 }
3195 
3196 static void
3197 vtnet_rx_filter_mac(struct vtnet_softc *sc)
3198 {
3199 	struct virtio_net_ctrl_hdr hdr;
3200 	struct vtnet_mac_filter *filter;
3201 	struct sglist_seg segs[4];
3202 	struct sglist sg;
3203 	struct ifnet *ifp;
3204 	struct ifaddr *ifa;
3205 	struct ifmultiaddr *ifma;
3206 	int ucnt, mcnt, promisc, allmulti, error;
3207 	uint8_t ack;
3208 
3209 	ifp = sc->vtnet_ifp;
3210 	filter = sc->vtnet_mac_filter;
3211 	ucnt = 0;
3212 	mcnt = 0;
3213 	promisc = 0;
3214 	allmulti = 0;
3215 
3216 	VTNET_CORE_LOCK_ASSERT(sc);
3217 	KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX,
3218 	    ("%s: CTRL_RX feature not negotiated", __func__));
3219 
3220 	/* Unicast MAC addresses: */
3221 	if_addr_rlock(ifp);
3222 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
3223 		if (ifa->ifa_addr->sa_family != AF_LINK)
3224 			continue;
3225 		else if (memcmp(LLADDR((struct sockaddr_dl *)ifa->ifa_addr),
3226 		    sc->vtnet_hwaddr, ETHER_ADDR_LEN) == 0)
3227 			continue;
3228 		else if (ucnt == VTNET_MAX_MAC_ENTRIES) {
3229 			promisc = 1;
3230 			break;
3231 		}
3232 
3233 		bcopy(LLADDR((struct sockaddr_dl *)ifa->ifa_addr),
3234 		    &filter->vmf_unicast.macs[ucnt], ETHER_ADDR_LEN);
3235 		ucnt++;
3236 	}
3237 	if_addr_runlock(ifp);
3238 
3239 	if (promisc != 0) {
3240 		filter->vmf_unicast.nentries = 0;
3241 		if_printf(ifp, "more than %d MAC addresses assigned, "
3242 		    "falling back to promiscuous mode\n",
3243 		    VTNET_MAX_MAC_ENTRIES);
3244 	} else
3245 		filter->vmf_unicast.nentries = ucnt;
3246 
3247 	/* Multicast MAC addresses: */
3248 	if_maddr_rlock(ifp);
3249 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
3250 		if (ifma->ifma_addr->sa_family != AF_LINK)
3251 			continue;
3252 		else if (mcnt == VTNET_MAX_MAC_ENTRIES) {
3253 			allmulti = 1;
3254 			break;
3255 		}
3256 
3257 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
3258 		    &filter->vmf_multicast.macs[mcnt], ETHER_ADDR_LEN);
3259 		mcnt++;
3260 	}
3261 	if_maddr_runlock(ifp);
3262 
3263 	if (allmulti != 0) {
3264 		filter->vmf_multicast.nentries = 0;
3265 		if_printf(ifp, "more than %d multicast MAC addresses "
3266 		    "assigned, falling back to all-multicast mode\n",
3267 		    VTNET_MAX_MAC_ENTRIES);
3268 	} else
3269 		filter->vmf_multicast.nentries = mcnt;
3270 
3271 	if (promisc != 0 && allmulti != 0)
3272 		goto out;
3273 
3274 	hdr.class = VIRTIO_NET_CTRL_MAC;
3275 	hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
3276 	ack = VIRTIO_NET_ERR;
3277 
3278 	sglist_init(&sg, 4, segs);
3279 	error = 0;
3280 	error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
3281 	error |= sglist_append(&sg, &filter->vmf_unicast,
3282 	    sizeof(uint32_t) + filter->vmf_unicast.nentries * ETHER_ADDR_LEN);
3283 	error |= sglist_append(&sg, &filter->vmf_multicast,
3284 	    sizeof(uint32_t) + filter->vmf_multicast.nentries * ETHER_ADDR_LEN);
3285 	error |= sglist_append(&sg, &ack, sizeof(uint8_t));
3286 	KASSERT(error == 0 && sg.sg_nseg == 4,
3287 	    ("%s: error %d adding MAC filter msg to sglist", __func__, error));
3288 
3289 	vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
3290 
3291 	if (ack != VIRTIO_NET_OK)
3292 		if_printf(ifp, "error setting host MAC filter table\n");
3293 
3294 out:
3295 	if (promisc != 0 && vtnet_set_promisc(sc, 1) != 0)
3296 		if_printf(ifp, "cannot enable promiscuous mode\n");
3297 	if (allmulti != 0 && vtnet_set_allmulti(sc, 1) != 0)
3298 		if_printf(ifp, "cannot enable all-multicast mode\n");
3299 }
3300 
3301 static int
3302 vtnet_exec_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
3303 {
3304 	struct sglist_seg segs[3];
3305 	struct sglist sg;
3306 	struct {
3307 		struct virtio_net_ctrl_hdr hdr;
3308 		uint8_t pad1;
3309 		uint16_t tag;
3310 		uint8_t pad2;
3311 		uint8_t ack;
3312 	} s;
3313 	int error;
3314 
3315 	s.hdr.class = VIRTIO_NET_CTRL_VLAN;
3316 	s.hdr.cmd = add ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
3317 	s.tag = tag;
3318 	s.ack = VIRTIO_NET_ERR;
3319 
3320 	sglist_init(&sg, 3, segs);
3321 	error = 0;
3322 	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3323 	error |= sglist_append(&sg, &s.tag, sizeof(uint16_t));
3324 	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3325 	KASSERT(error == 0 && sg.sg_nseg == 3,
3326 	    ("%s: error %d adding VLAN message to sglist", __func__, error));
3327 
3328 	vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3329 
3330 	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3331 }
3332 
3333 static void
3334 vtnet_rx_filter_vlan(struct vtnet_softc *sc)
3335 {
3336 	uint32_t w;
3337 	uint16_t tag;
3338 	int i, bit;
3339 
3340 	VTNET_CORE_LOCK_ASSERT(sc);
3341 	KASSERT(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER,
3342 	    ("%s: VLAN_FILTER feature not negotiated", __func__));
3343 
3344 	/* Enable the filter for each configured VLAN. */
3345 	for (i = 0; i < VTNET_VLAN_FILTER_NWORDS; i++) {
3346 		w = sc->vtnet_vlan_filter[i];
3347 
3348 		while ((bit = ffs(w) - 1) != -1) {
3349 			w &= ~(1 << bit);
3350 			tag = sizeof(w) * CHAR_BIT * i + bit;
3351 
3352 			if (vtnet_exec_vlan_filter(sc, 1, tag) != 0) {
3353 				device_printf(sc->vtnet_dev,
3354 				    "cannot enable VLAN %d filter\n", tag);
3355 			}
3356 		}
3357 	}
3358 }
3359 
3360 static void
3361 vtnet_update_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
3362 {
3363 	struct ifnet *ifp;
3364 	int idx, bit;
3365 
3366 	ifp = sc->vtnet_ifp;
3367 	idx = (tag >> 5) & 0x7F;
3368 	bit = tag & 0x1F;
3369 
3370 	if (tag == 0 || tag > 4095)
3371 		return;
3372 
3373 	VTNET_CORE_LOCK(sc);
3374 
3375 	if (add)
3376 		sc->vtnet_vlan_filter[idx] |= (1 << bit);
3377 	else
3378 		sc->vtnet_vlan_filter[idx] &= ~(1 << bit);
3379 
3380 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER &&
3381 	    vtnet_exec_vlan_filter(sc, add, tag) != 0) {
3382 		device_printf(sc->vtnet_dev,
3383 		    "cannot %s VLAN %d %s the host filter table\n",
3384 		    add ? "add" : "remove", tag, add ? "to" : "from");
3385 	}
3386 
3387 	VTNET_CORE_UNLOCK(sc);
3388 }
3389 
3390 static void
3391 vtnet_register_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
3392 {
3393 
3394 	if (ifp->if_softc != arg)
3395 		return;
3396 
3397 	vtnet_update_vlan_filter(arg, 1, tag);
3398 }
3399 
3400 static void
3401 vtnet_unregister_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
3402 {
3403 
3404 	if (ifp->if_softc != arg)
3405 		return;
3406 
3407 	vtnet_update_vlan_filter(arg, 0, tag);
3408 }
3409 
3410 static int
3411 vtnet_is_link_up(struct vtnet_softc *sc)
3412 {
3413 	device_t dev;
3414 	struct ifnet *ifp;
3415 	uint16_t status;
3416 
3417 	dev = sc->vtnet_dev;
3418 	ifp = sc->vtnet_ifp;
3419 
3420 	if ((ifp->if_capabilities & IFCAP_LINKSTATE) == 0)
3421 		status = VIRTIO_NET_S_LINK_UP;
3422 	else
3423 		status = virtio_read_dev_config_2(dev,
3424 		    offsetof(struct virtio_net_config, status));
3425 
3426 	return ((status & VIRTIO_NET_S_LINK_UP) != 0);
3427 }
3428 
3429 static void
3430 vtnet_update_link_status(struct vtnet_softc *sc)
3431 {
3432 	struct ifnet *ifp;
3433 	int link;
3434 
3435 	ifp = sc->vtnet_ifp;
3436 
3437 	VTNET_CORE_LOCK_ASSERT(sc);
3438 	link = vtnet_is_link_up(sc);
3439 
3440 	/* Notify if the link status has changed. */
3441 	if (link != 0 && sc->vtnet_link_active == 0) {
3442 		sc->vtnet_link_active = 1;
3443 		if_link_state_change(ifp, LINK_STATE_UP);
3444 	} else if (link == 0 && sc->vtnet_link_active != 0) {
3445 		sc->vtnet_link_active = 0;
3446 		if_link_state_change(ifp, LINK_STATE_DOWN);
3447 	}
3448 }
3449 
3450 static int
3451 vtnet_ifmedia_upd(struct ifnet *ifp)
3452 {
3453 	struct vtnet_softc *sc;
3454 	struct ifmedia *ifm;
3455 
3456 	sc = ifp->if_softc;
3457 	ifm = &sc->vtnet_media;
3458 
3459 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
3460 		return (EINVAL);
3461 
3462 	return (0);
3463 }
3464 
3465 static void
3466 vtnet_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
3467 {
3468 	struct vtnet_softc *sc;
3469 
3470 	sc = ifp->if_softc;
3471 
3472 	ifmr->ifm_status = IFM_AVALID;
3473 	ifmr->ifm_active = IFM_ETHER;
3474 
3475 	VTNET_CORE_LOCK(sc);
3476 	if (vtnet_is_link_up(sc) != 0) {
3477 		ifmr->ifm_status |= IFM_ACTIVE;
3478 		ifmr->ifm_active |= VTNET_MEDIATYPE;
3479 	} else
3480 		ifmr->ifm_active |= IFM_NONE;
3481 	VTNET_CORE_UNLOCK(sc);
3482 }
3483 
3484 static void
3485 vtnet_set_hwaddr(struct vtnet_softc *sc)
3486 {
3487 	device_t dev;
3488 
3489 	dev = sc->vtnet_dev;
3490 
3491 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_MAC) {
3492 		if (vtnet_ctrl_mac_cmd(sc, sc->vtnet_hwaddr) != 0)
3493 			device_printf(dev, "unable to set MAC address\n");
3494 	} else if (sc->vtnet_flags & VTNET_FLAG_MAC) {
3495 		virtio_write_device_config(dev,
3496 		    offsetof(struct virtio_net_config, mac),
3497 		    sc->vtnet_hwaddr, ETHER_ADDR_LEN);
3498 	}
3499 }
3500 
3501 static void
3502 vtnet_get_hwaddr(struct vtnet_softc *sc)
3503 {
3504 	device_t dev;
3505 
3506 	dev = sc->vtnet_dev;
3507 
3508 	if ((sc->vtnet_flags & VTNET_FLAG_MAC) == 0) {
3509 		/*
3510 		 * Generate a random locally administered unicast address.
3511 		 *
3512 		 * It would be nice to generate the same MAC address across
3513 		 * reboots, but it seems all the hosts currently available
3514 		 * support the MAC feature, so this isn't too important.
3515 		 */
3516 		sc->vtnet_hwaddr[0] = 0xB2;
3517 		arc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1, 0);
3518 		vtnet_set_hwaddr(sc);
3519 		return;
3520 	}
3521 
3522 	virtio_read_device_config(dev, offsetof(struct virtio_net_config, mac),
3523 	    sc->vtnet_hwaddr, ETHER_ADDR_LEN);
3524 }
3525 
3526 static void
3527 vtnet_vlan_tag_remove(struct mbuf *m)
3528 {
3529 	struct ether_vlan_header *evh;
3530 
3531 	evh = mtod(m, struct ether_vlan_header *);
3532 	m->m_pkthdr.ether_vtag = ntohs(evh->evl_tag);
3533 	m->m_flags |= M_VLANTAG;
3534 
3535 	/* Strip the 802.1Q header. */
3536 	bcopy((char *) evh, (char *) evh + ETHER_VLAN_ENCAP_LEN,
3537 	    ETHER_HDR_LEN - ETHER_TYPE_LEN);
3538 	m_adj(m, ETHER_VLAN_ENCAP_LEN);
3539 }
3540 
3541 static void
3542 vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *ctx,
3543     struct sysctl_oid_list *child, struct vtnet_rxq *rxq)
3544 {
3545 	struct sysctl_oid *node;
3546 	struct sysctl_oid_list *list;
3547 	struct vtnet_rxq_stats *stats;
3548 	char namebuf[16];
3549 
3550 	snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vtnrx_id);
3551 	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
3552 	    CTLFLAG_RD, NULL, "Receive Queue");
3553 	list = SYSCTL_CHILDREN(node);
3554 
3555 	stats = &rxq->vtnrx_stats;
3556 
3557 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD,
3558 	    &stats->vrxs_ipackets, "Receive packets");
3559 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD,
3560 	    &stats->vrxs_ibytes, "Receive bytes");
3561 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD,
3562 	    &stats->vrxs_iqdrops, "Receive drops");
3563 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD,
3564 	    &stats->vrxs_ierrors, "Receive errors");
3565 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
3566 	    &stats->vrxs_csum, "Receive checksum offloaded");
3567 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum_failed", CTLFLAG_RD,
3568 	    &stats->vrxs_csum_failed, "Receive checksum offload failed");
3569 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD,
3570 	    &stats->vrxs_rescheduled,
3571 	    "Receive interrupt handler rescheduled");
3572 }
3573 
3574 static void
3575 vtnet_setup_txq_sysctl(struct sysctl_ctx_list *ctx,
3576     struct sysctl_oid_list *child, struct vtnet_txq *txq)
3577 {
3578 	struct sysctl_oid *node;
3579 	struct sysctl_oid_list *list;
3580 	struct vtnet_txq_stats *stats;
3581 	char namebuf[16];
3582 
3583 	snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vtntx_id);
3584 	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
3585 	    CTLFLAG_RD, NULL, "Transmit Queue");
3586 	list = SYSCTL_CHILDREN(node);
3587 
3588 	stats = &txq->vtntx_stats;
3589 
3590 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD,
3591 	    &stats->vtxs_opackets, "Transmit packets");
3592 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD,
3593 	    &stats->vtxs_obytes, "Transmit bytes");
3594 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD,
3595 	    &stats->vtxs_omcasts, "Transmit multicasts");
3596 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
3597 	    &stats->vtxs_csum, "Transmit checksum offloaded");
3598 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD,
3599 	    &stats->vtxs_tso, "Transmit segmentation offloaded");
3600 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "collapsed", CTLFLAG_RD,
3601 	    &stats->vtxs_collapsed, "Transmit mbufs collapsed");
3602 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD,
3603 	    &stats->vtxs_rescheduled,
3604 	    "Transmit interrupt handler rescheduled");
3605 }
3606 
3607 static void
3608 vtnet_setup_queue_sysctl(struct vtnet_softc *sc)
3609 {
3610 	device_t dev;
3611 	struct sysctl_ctx_list *ctx;
3612 	struct sysctl_oid *tree;
3613 	struct sysctl_oid_list *child;
3614 	int i;
3615 
3616 	dev = sc->vtnet_dev;
3617 	ctx = device_get_sysctl_ctx(dev);
3618 	tree = device_get_sysctl_tree(dev);
3619 	child = SYSCTL_CHILDREN(tree);
3620 
3621 	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
3622 		vtnet_setup_rxq_sysctl(ctx, child, &sc->vtnet_rxqs[i]);
3623 		vtnet_setup_txq_sysctl(ctx, child, &sc->vtnet_txqs[i]);
3624 	}
3625 }
3626 
3627 static void
3628 vtnet_setup_stat_sysctl(struct sysctl_ctx_list *ctx,
3629     struct sysctl_oid_list *child, struct vtnet_softc *sc)
3630 {
3631 	struct vtnet_statistics *stats;
3632 
3633 	stats = &sc->vtnet_stats;
3634 
3635 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "mbuf_alloc_failed",
3636 	    CTLFLAG_RD, &stats->mbuf_alloc_failed,
3637 	    "Mbuf cluster allocation failures");
3638 
3639 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_frame_too_large",
3640 	    CTLFLAG_RD, &stats->rx_frame_too_large,
3641 	    "Received frame larger than the mbuf chain");
3642 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_enq_replacement_failed",
3643 	    CTLFLAG_RD, &stats->rx_enq_replacement_failed,
3644 	    "Enqueuing the replacement receive mbuf failed");
3645 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_mergeable_failed",
3646 	    CTLFLAG_RD, &stats->rx_mergeable_failed,
3647 	    "Mergeable buffers receive failures");
3648 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ethtype",
3649 	    CTLFLAG_RD, &stats->rx_csum_bad_ethtype,
3650 	    "Received checksum offloaded buffer with unsupported "
3651 	    "Ethernet type");
3652 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ipproto",
3653 	    CTLFLAG_RD, &stats->rx_csum_bad_ipproto,
3654 	    "Received checksum offloaded buffer with incorrect IP protocol");
3655 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_offset",
3656 	    CTLFLAG_RD, &stats->rx_csum_bad_offset,
3657 	    "Received checksum offloaded buffer with incorrect offset");
3658 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_proto",
3659 	    CTLFLAG_RD, &stats->rx_csum_bad_proto,
3660 	    "Received checksum offloaded buffer with incorrect protocol");
3661 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_failed",
3662 	    CTLFLAG_RD, &stats->rx_csum_failed,
3663 	    "Received buffer checksum offload failed");
3664 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_offloaded",
3665 	    CTLFLAG_RD, &stats->rx_csum_offloaded,
3666 	    "Received buffer checksum offload succeeded");
3667 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_task_rescheduled",
3668 	    CTLFLAG_RD, &stats->rx_task_rescheduled,
3669 	    "Times the receive interrupt task rescheduled itself");
3670 
3671 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_bad_ethtype",
3672 	    CTLFLAG_RD, &stats->tx_csum_bad_ethtype,
3673 	    "Aborted transmit of checksum offloaded buffer with unknown "
3674 	    "Ethernet type");
3675 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_bad_ethtype",
3676 	    CTLFLAG_RD, &stats->tx_tso_bad_ethtype,
3677 	    "Aborted transmit of TSO buffer with unknown Ethernet type");
3678 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_not_tcp",
3679 	    CTLFLAG_RD, &stats->tx_tso_not_tcp,
3680 	    "Aborted transmit of TSO buffer with non TCP protocol");
3681 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_offloaded",
3682 	    CTLFLAG_RD, &stats->tx_csum_offloaded,
3683 	    "Offloaded checksum of transmitted buffer");
3684 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_offloaded",
3685 	    CTLFLAG_RD, &stats->tx_tso_offloaded,
3686 	    "Segmentation offload of transmitted buffer");
3687 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_task_rescheduled",
3688 	    CTLFLAG_RD, &stats->tx_task_rescheduled,
3689 	    "Times the transmit interrupt task rescheduled itself");
3690 }
3691 
3692 static void
3693 vtnet_setup_sysctl(struct vtnet_softc *sc)
3694 {
3695 	device_t dev;
3696 	struct sysctl_ctx_list *ctx;
3697 	struct sysctl_oid *tree;
3698 	struct sysctl_oid_list *child;
3699 
3700 	dev = sc->vtnet_dev;
3701 	ctx = device_get_sysctl_ctx(dev);
3702 	tree = device_get_sysctl_tree(dev);
3703 	child = SYSCTL_CHILDREN(tree);
3704 
3705 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_vq_pairs",
3706 	    CTLFLAG_RD, &sc->vtnet_max_vq_pairs, 0,
3707 	    "Maximum number of supported virtqueue pairs");
3708 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "act_vq_pairs",
3709 	    CTLFLAG_RD, &sc->vtnet_act_vq_pairs, 0,
3710 	    "Number of active virtqueue pairs");
3711 
3712 	vtnet_setup_stat_sysctl(ctx, child, sc);
3713 }
3714 
3715 static int
3716 vtnet_rxq_enable_intr(struct vtnet_rxq *rxq)
3717 {
3718 
3719 	return (virtqueue_enable_intr(rxq->vtnrx_vq));
3720 }
3721 
3722 static void
3723 vtnet_rxq_disable_intr(struct vtnet_rxq *rxq)
3724 {
3725 
3726 	virtqueue_disable_intr(rxq->vtnrx_vq);
3727 }
3728 
3729 static int
3730 vtnet_txq_enable_intr(struct vtnet_txq *txq)
3731 {
3732 
3733 	return (virtqueue_postpone_intr(txq->vtntx_vq, VQ_POSTPONE_LONG));
3734 }
3735 
3736 static void
3737 vtnet_txq_disable_intr(struct vtnet_txq *txq)
3738 {
3739 
3740 	virtqueue_disable_intr(txq->vtntx_vq);
3741 }
3742 
3743 static void
3744 vtnet_enable_rx_interrupts(struct vtnet_softc *sc)
3745 {
3746 	int i;
3747 
3748 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
3749 		vtnet_rxq_enable_intr(&sc->vtnet_rxqs[i]);
3750 }
3751 
3752 static void
3753 vtnet_enable_tx_interrupts(struct vtnet_softc *sc)
3754 {
3755 	int i;
3756 
3757 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
3758 		vtnet_txq_enable_intr(&sc->vtnet_txqs[i]);
3759 }
3760 
3761 static void
3762 vtnet_enable_interrupts(struct vtnet_softc *sc)
3763 {
3764 
3765 	vtnet_enable_rx_interrupts(sc);
3766 	vtnet_enable_tx_interrupts(sc);
3767 }
3768 
3769 static void
3770 vtnet_disable_rx_interrupts(struct vtnet_softc *sc)
3771 {
3772 	int i;
3773 
3774 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
3775 		vtnet_rxq_disable_intr(&sc->vtnet_rxqs[i]);
3776 }
3777 
3778 static void
3779 vtnet_disable_tx_interrupts(struct vtnet_softc *sc)
3780 {
3781 	int i;
3782 
3783 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
3784 		vtnet_txq_disable_intr(&sc->vtnet_txqs[i]);
3785 }
3786 
3787 static void
3788 vtnet_disable_interrupts(struct vtnet_softc *sc)
3789 {
3790 
3791 	vtnet_disable_rx_interrupts(sc);
3792 	vtnet_disable_tx_interrupts(sc);
3793 }
3794 
3795 static int
3796 vtnet_tunable_int(struct vtnet_softc *sc, const char *knob, int def)
3797 {
3798 	char path[64];
3799 
3800 	snprintf(path, sizeof(path),
3801 	    "hw.vtnet.%d.%s", device_get_unit(sc->vtnet_dev), knob);
3802 	TUNABLE_INT_FETCH(path, &def);
3803 
3804 	return (def);
3805 }
3806