xref: /freebsd/sys/dev/virtio/network/if_vtnet.c (revision ea130fa32099ea7e0c03702efdec28e745ea6ef1)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /* Driver for VirtIO network devices. */
30 
31 #include "opt_inet.h"
32 #include "opt_inet6.h"
33 
34 #include <sys/param.h>
35 #include <sys/eventhandler.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/sockio.h>
39 #include <sys/malloc.h>
40 #include <sys/mbuf.h>
41 #include <sys/module.h>
42 #include <sys/msan.h>
43 #include <sys/sbuf.h>
44 #include <sys/socket.h>
45 #include <sys/sysctl.h>
46 #include <sys/random.h>
47 #include <sys/sglist.h>
48 #include <sys/lock.h>
49 #include <sys/mutex.h>
50 #include <sys/taskqueue.h>
51 #include <sys/smp.h>
52 #include <machine/smp.h>
53 
54 #include <vm/uma.h>
55 
56 #include <net/debugnet.h>
57 #include <net/ethernet.h>
58 #include <net/pfil.h>
59 #include <net/if.h>
60 #include <net/if_var.h>
61 #include <net/if_arp.h>
62 #include <net/if_dl.h>
63 #include <net/if_types.h>
64 #include <net/if_media.h>
65 #include <net/if_vlan_var.h>
66 
67 #include <net/bpf.h>
68 
69 #include <netinet/in_systm.h>
70 #include <netinet/in.h>
71 #include <netinet/ip.h>
72 #include <netinet/ip6.h>
73 #include <netinet6/ip6_var.h>
74 #include <netinet/udp.h>
75 #include <netinet/tcp.h>
76 #include <netinet/tcp_lro.h>
77 
78 #include <machine/bus.h>
79 #include <machine/resource.h>
80 #include <sys/bus.h>
81 #include <sys/rman.h>
82 
83 #include <dev/virtio/virtio.h>
84 #include <dev/virtio/virtqueue.h>
85 #include <dev/virtio/network/virtio_net.h>
86 #include <dev/virtio/network/if_vtnetvar.h>
87 #include "virtio_if.h"
88 
89 #if defined(INET) || defined(INET6)
90 #include <machine/in_cksum.h>
91 #endif
92 
93 #ifdef __NO_STRICT_ALIGNMENT
94 #define VTNET_ETHER_ALIGN 0
95 #else /* Strict alignment */
96 #define VTNET_ETHER_ALIGN ETHER_ALIGN
97 #endif
98 
99 /*
100  * Worst case offset to ensure header doesn't share any cache lines with
101  * payload.
102  */
103 #define VTNET_RX_BUFFER_HEADER_OFFSET 128
104 
105 struct vtnet_rx_buffer_header {
106 	bus_addr_t   addr;
107 	bus_dmamap_t dmap;
108 };
109 
110 static int	vtnet_modevent(module_t, int, void *);
111 
112 static int	vtnet_probe(device_t);
113 static int	vtnet_attach(device_t);
114 static int	vtnet_detach(device_t);
115 static int	vtnet_suspend(device_t);
116 static int	vtnet_resume(device_t);
117 static int	vtnet_shutdown(device_t);
118 static int	vtnet_attach_completed(device_t);
119 static int	vtnet_config_change(device_t);
120 
121 static int	vtnet_negotiate_features(struct vtnet_softc *);
122 static int	vtnet_setup_features(struct vtnet_softc *);
123 static int	vtnet_init_rxq(struct vtnet_softc *, int);
124 static int	vtnet_init_txq(struct vtnet_softc *, int);
125 static int	vtnet_alloc_rxtx_queues(struct vtnet_softc *);
126 static void	vtnet_free_rxtx_queues(struct vtnet_softc *);
127 static int	vtnet_alloc_rx_filters(struct vtnet_softc *);
128 static void	vtnet_free_rx_filters(struct vtnet_softc *);
129 static int	vtnet_alloc_virtqueues(struct vtnet_softc *);
130 static void	vtnet_alloc_interface(struct vtnet_softc *);
131 static int	vtnet_setup_interface(struct vtnet_softc *);
132 static int	vtnet_ioctl_mtu(struct vtnet_softc *, u_int);
133 static int	vtnet_ioctl_ifflags(struct vtnet_softc *);
134 static int	vtnet_ioctl_multi(struct vtnet_softc *);
135 static int	vtnet_ioctl_ifcap(struct vtnet_softc *, struct ifreq *);
136 static int	vtnet_ioctl(if_t, u_long, caddr_t);
137 static uint64_t	vtnet_get_counter(if_t, ift_counter);
138 
139 static int	vtnet_rxq_populate(struct vtnet_rxq *);
140 static void	vtnet_rxq_free_mbufs(struct vtnet_rxq *);
141 static struct mbuf *
142 		vtnet_rx_alloc_buf(struct vtnet_softc *, int , struct mbuf **);
143 static int	vtnet_rxq_replace_lro_nomrg_buf(struct vtnet_rxq *,
144 		    struct mbuf *, int);
145 static int	vtnet_rxq_replace_buf(struct vtnet_rxq *, struct mbuf *, int);
146 static int	vtnet_rxq_enqueue_buf(struct vtnet_rxq *, struct mbuf *);
147 static int	vtnet_rxq_new_buf(struct vtnet_rxq *);
148 #if defined(INET) || defined(INET6)
149 static void	vtnet_rxq_csum_needs_csum(struct vtnet_rxq *, struct mbuf *,
150 		     bool, int, struct virtio_net_hdr *);
151 static void	vtnet_rxq_csum_data_valid(struct vtnet_rxq *, struct mbuf *,
152 		    int);
153 static int	vtnet_rxq_csum(struct vtnet_rxq *, struct mbuf *,
154 		     struct virtio_net_hdr *);
155 #endif
156 static void	vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *, int);
157 static void	vtnet_rxq_discard_buf(struct vtnet_rxq *, struct mbuf *);
158 static int	vtnet_rxq_merged_eof(struct vtnet_rxq *, struct mbuf *, int);
159 static void	vtnet_rxq_input(struct vtnet_rxq *, struct mbuf *,
160 		    struct virtio_net_hdr *);
161 static int	vtnet_rxq_eof(struct vtnet_rxq *);
162 static void	vtnet_rx_vq_process(struct vtnet_rxq *rxq, int tries);
163 static void	vtnet_rx_vq_intr(void *);
164 static void	vtnet_rxq_tq_intr(void *, int);
165 
166 static int	vtnet_txq_intr_threshold(struct vtnet_txq *);
167 static int	vtnet_txq_below_threshold(struct vtnet_txq *);
168 static int	vtnet_txq_notify(struct vtnet_txq *);
169 static void	vtnet_txq_free_mbufs(struct vtnet_txq *);
170 static int	vtnet_txq_offload_ctx(struct vtnet_txq *, struct mbuf *,
171 		    int *, int *, int *);
172 static int	vtnet_txq_offload_tso(struct vtnet_txq *, struct mbuf *, int,
173 		    int, struct virtio_net_hdr *);
174 static struct mbuf *
175 		vtnet_txq_offload(struct vtnet_txq *, struct mbuf *,
176 		    struct virtio_net_hdr *);
177 static int	vtnet_txq_enqueue_buf(struct vtnet_txq *, struct mbuf **,
178 		    struct vtnet_tx_header *);
179 static int	vtnet_txq_encap(struct vtnet_txq *, struct mbuf **, int);
180 
181 /* Required for ALTQ */
182 static void	vtnet_start_locked(struct vtnet_txq *, if_t);
183 static void	vtnet_start(if_t);
184 
185 /* Required for MQ */
186 static int	vtnet_txq_mq_start_locked(struct vtnet_txq *, struct mbuf *);
187 static int	vtnet_txq_mq_start(if_t, struct mbuf *);
188 static void	vtnet_txq_tq_deferred(void *, int);
189 static void	vtnet_qflush(if_t);
190 
191 
192 static void	vtnet_txq_start(struct vtnet_txq *);
193 static void	vtnet_txq_tq_intr(void *, int);
194 static int	vtnet_txq_eof(struct vtnet_txq *);
195 static void	vtnet_tx_vq_intr(void *);
196 static void	vtnet_tx_start_all(struct vtnet_softc *);
197 
198 static int	vtnet_watchdog(struct vtnet_txq *);
199 static void	vtnet_accum_stats(struct vtnet_softc *,
200 		    struct vtnet_rxq_stats *, struct vtnet_txq_stats *);
201 static void	vtnet_tick(void *);
202 
203 static void	vtnet_start_taskqueues(struct vtnet_softc *);
204 static void	vtnet_free_taskqueues(struct vtnet_softc *);
205 static void	vtnet_drain_taskqueues(struct vtnet_softc *);
206 
207 static void	vtnet_drain_rxtx_queues(struct vtnet_softc *);
208 static void	vtnet_stop_rendezvous(struct vtnet_softc *);
209 static void	vtnet_stop(struct vtnet_softc *);
210 static int	vtnet_virtio_reinit(struct vtnet_softc *);
211 static void	vtnet_init_rx_filters(struct vtnet_softc *);
212 static int	vtnet_init_rx_queues(struct vtnet_softc *);
213 static int	vtnet_init_tx_queues(struct vtnet_softc *);
214 static int	vtnet_init_rxtx_queues(struct vtnet_softc *);
215 static void	vtnet_set_active_vq_pairs(struct vtnet_softc *);
216 static void	vtnet_update_rx_offloads(struct vtnet_softc *);
217 static int	vtnet_reinit(struct vtnet_softc *);
218 static void	vtnet_init_locked(struct vtnet_softc *, int);
219 static void	vtnet_init(void *);
220 
221 static void	vtnet_free_ctrl_vq(struct vtnet_softc *);
222 static int	vtnet_exec_ctrl_cmd(struct vtnet_softc *, uint8_t *,
223 		    struct sglist *, int, int);
224 static int	vtnet_ctrl_mac_cmd(struct vtnet_softc *, uint8_t *);
225 static int	vtnet_ctrl_guest_offloads(struct vtnet_softc *, uint64_t);
226 static int	vtnet_ctrl_mq_cmd(struct vtnet_softc *, uint16_t);
227 static int	vtnet_ctrl_rx_cmd(struct vtnet_softc *, uint8_t, bool);
228 static int	vtnet_set_promisc(struct vtnet_softc *, bool);
229 static int	vtnet_set_allmulti(struct vtnet_softc *, bool);
230 static void	vtnet_rx_filter(struct vtnet_softc *);
231 static void	vtnet_rx_filter_mac(struct vtnet_softc *);
232 static int	vtnet_exec_vlan_filter(struct vtnet_softc *, int, uint16_t);
233 static void	vtnet_rx_filter_vlan(struct vtnet_softc *);
234 static void	vtnet_update_vlan_filter(struct vtnet_softc *, int, uint16_t);
235 static void	vtnet_register_vlan(void *, if_t, uint16_t);
236 static void	vtnet_unregister_vlan(void *, if_t, uint16_t);
237 
238 static void	vtnet_update_speed_duplex(struct vtnet_softc *);
239 static int	vtnet_is_link_up(struct vtnet_softc *);
240 static void	vtnet_update_link_status(struct vtnet_softc *);
241 static int	vtnet_ifmedia_upd(if_t);
242 static void	vtnet_ifmedia_sts(if_t, struct ifmediareq *);
243 static void	vtnet_get_macaddr(struct vtnet_softc *);
244 static void	vtnet_set_macaddr(struct vtnet_softc *);
245 static void	vtnet_attached_set_macaddr(struct vtnet_softc *);
246 static void	vtnet_vlan_tag_remove(struct mbuf *);
247 static void	vtnet_set_rx_process_limit(struct vtnet_softc *);
248 
249 static void	vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *,
250 		    struct sysctl_oid_list *, struct vtnet_rxq *);
251 static void	vtnet_setup_txq_sysctl(struct sysctl_ctx_list *,
252 		    struct sysctl_oid_list *, struct vtnet_txq *);
253 static void	vtnet_setup_queue_sysctl(struct vtnet_softc *);
254 static void	vtnet_load_tunables(struct vtnet_softc *);
255 static void	vtnet_setup_sysctl(struct vtnet_softc *);
256 
257 static int	vtnet_rxq_enable_intr(struct vtnet_rxq *);
258 static void	vtnet_rxq_disable_intr(struct vtnet_rxq *);
259 static int	vtnet_txq_enable_intr(struct vtnet_txq *);
260 static void	vtnet_txq_disable_intr(struct vtnet_txq *);
261 static void	vtnet_enable_rx_interrupts(struct vtnet_softc *);
262 static void	vtnet_enable_tx_interrupts(struct vtnet_softc *);
263 static void	vtnet_enable_interrupts(struct vtnet_softc *);
264 static void	vtnet_disable_rx_interrupts(struct vtnet_softc *);
265 static void	vtnet_disable_tx_interrupts(struct vtnet_softc *);
266 static void	vtnet_disable_interrupts(struct vtnet_softc *);
267 
268 static int	vtnet_tunable_int(struct vtnet_softc *, const char *, int);
269 
270 DEBUGNET_DEFINE(vtnet);
271 
272 #define vtnet_htog16(_sc, _val)	virtio_htog16(vtnet_modern(_sc), _val)
273 #define vtnet_htog32(_sc, _val)	virtio_htog32(vtnet_modern(_sc), _val)
274 #define vtnet_htog64(_sc, _val)	virtio_htog64(vtnet_modern(_sc), _val)
275 #define vtnet_gtoh16(_sc, _val)	virtio_gtoh16(vtnet_modern(_sc), _val)
276 #define vtnet_gtoh32(_sc, _val)	virtio_gtoh32(vtnet_modern(_sc), _val)
277 #define vtnet_gtoh64(_sc, _val)	virtio_gtoh64(vtnet_modern(_sc), _val)
278 
279 /* Tunables. */
280 static SYSCTL_NODE(_hw, OID_AUTO, vtnet, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
281     "VirtIO Net driver parameters");
282 
283 static int vtnet_csum_disable = 0;
284 SYSCTL_INT(_hw_vtnet, OID_AUTO, csum_disable, CTLFLAG_RDTUN,
285     &vtnet_csum_disable, 0, "Disables receive and send checksum offload");
286 
287 static int vtnet_tso_disable = 0;
288 SYSCTL_INT(_hw_vtnet, OID_AUTO, tso_disable, CTLFLAG_RDTUN,
289     &vtnet_tso_disable, 0, "Disables TSO");
290 
291 static int vtnet_lro_disable = 1;
292 SYSCTL_INT(_hw_vtnet, OID_AUTO, lro_disable, CTLFLAG_RDTUN,
293     &vtnet_lro_disable, 0, "Disables hardware LRO");
294 
295 static int vtnet_mq_disable = 0;
296 SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_disable, CTLFLAG_RDTUN,
297     &vtnet_mq_disable, 0, "Disables multiqueue support");
298 
299 static int vtnet_mq_max_pairs = VTNET_MAX_QUEUE_PAIRS;
300 SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_max_pairs, CTLFLAG_RDTUN,
301     &vtnet_mq_max_pairs, 0, "Maximum number of multiqueue pairs");
302 
303 static int vtnet_tso_maxlen = IP_MAXPACKET;
304 SYSCTL_INT(_hw_vtnet, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN,
305     &vtnet_tso_maxlen, 0, "TSO burst limit");
306 
307 static int vtnet_rx_process_limit = 1024;
308 SYSCTL_INT(_hw_vtnet, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
309     &vtnet_rx_process_limit, 0,
310     "Number of RX segments processed in one pass");
311 
312 static int vtnet_lro_entry_count = 128;
313 SYSCTL_INT(_hw_vtnet, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN,
314     &vtnet_lro_entry_count, 0, "Software LRO entry count");
315 
316 /* Enable sorted LRO, and the depth of the mbuf queue. */
317 static int vtnet_lro_mbufq_depth = 0;
318 SYSCTL_UINT(_hw_vtnet, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN,
319     &vtnet_lro_mbufq_depth, 0, "Depth of software LRO mbuf queue");
320 
321 /* Deactivate ALTQ Support */
322 static int vtnet_altq_disable = 0;
323 SYSCTL_INT(_hw_vtnet, OID_AUTO, altq_disable, CTLFLAG_RDTUN,
324     &vtnet_altq_disable, 0, "Disables ALTQ Support");
325 
326 /*
327  * For the driver to be considered as having altq enabled,
328  * it must be compiled with an ALTQ capable kernel,
329  * and the tunable hw.vtnet.altq_disable must be zero
330  */
331 #define VTNET_ALTQ_ENABLED (VTNET_ALTQ_CAPABLE && (!vtnet_altq_disable))
332 
333 
334 static uma_zone_t vtnet_tx_header_zone;
335 
336 static struct virtio_feature_desc vtnet_feature_desc[] = {
337 	{ VIRTIO_NET_F_CSUM,			"TxChecksum"		},
338 	{ VIRTIO_NET_F_GUEST_CSUM,		"RxChecksum"		},
339 	{ VIRTIO_NET_F_CTRL_GUEST_OFFLOADS,	"CtrlRxOffloads"	},
340 	{ VIRTIO_NET_F_MAC,			"MAC"			},
341 	{ VIRTIO_NET_F_GSO,			"TxGSO"			},
342 	{ VIRTIO_NET_F_GUEST_TSO4,		"RxLROv4"		},
343 	{ VIRTIO_NET_F_GUEST_TSO6,		"RxLROv6"		},
344 	{ VIRTIO_NET_F_GUEST_ECN,		"RxLROECN"		},
345 	{ VIRTIO_NET_F_GUEST_UFO,		"RxUFO"			},
346 	{ VIRTIO_NET_F_HOST_TSO4,		"TxTSOv4"		},
347 	{ VIRTIO_NET_F_HOST_TSO6,		"TxTSOv6"		},
348 	{ VIRTIO_NET_F_HOST_ECN,		"TxTSOECN"		},
349 	{ VIRTIO_NET_F_HOST_UFO,		"TxUFO"			},
350 	{ VIRTIO_NET_F_MRG_RXBUF,		"MrgRxBuf"		},
351 	{ VIRTIO_NET_F_STATUS,			"Status"		},
352 	{ VIRTIO_NET_F_CTRL_VQ,			"CtrlVq"		},
353 	{ VIRTIO_NET_F_CTRL_RX,			"CtrlRxMode"		},
354 	{ VIRTIO_NET_F_CTRL_VLAN,		"CtrlVLANFilter"	},
355 	{ VIRTIO_NET_F_CTRL_RX_EXTRA,		"CtrlRxModeExtra"	},
356 	{ VIRTIO_NET_F_GUEST_ANNOUNCE,		"GuestAnnounce"		},
357 	{ VIRTIO_NET_F_MQ,			"Multiqueue"		},
358 	{ VIRTIO_NET_F_CTRL_MAC_ADDR,		"CtrlMacAddr"		},
359 	{ VIRTIO_NET_F_SPEED_DUPLEX,		"SpeedDuplex"		},
360 
361 	{ 0, NULL }
362 };
363 
364 static device_method_t vtnet_methods[] = {
365 	/* Device methods. */
366 	DEVMETHOD(device_probe,			vtnet_probe),
367 	DEVMETHOD(device_attach,		vtnet_attach),
368 	DEVMETHOD(device_detach,		vtnet_detach),
369 	DEVMETHOD(device_suspend,		vtnet_suspend),
370 	DEVMETHOD(device_resume,		vtnet_resume),
371 	DEVMETHOD(device_shutdown,		vtnet_shutdown),
372 
373 	/* VirtIO methods. */
374 	DEVMETHOD(virtio_attach_completed,	vtnet_attach_completed),
375 	DEVMETHOD(virtio_config_change,		vtnet_config_change),
376 
377 	DEVMETHOD_END
378 };
379 
380 #ifdef DEV_NETMAP
381 #include <dev/netmap/if_vtnet_netmap.h>
382 #endif
383 
384 static driver_t vtnet_driver = {
385     .name = "vtnet",
386     .methods = vtnet_methods,
387     .size = sizeof(struct vtnet_softc)
388 };
389 VIRTIO_DRIVER_MODULE(vtnet, vtnet_driver, vtnet_modevent, NULL);
390 MODULE_VERSION(vtnet, 1);
391 MODULE_DEPEND(vtnet, virtio, 1, 1, 1);
392 #ifdef DEV_NETMAP
393 MODULE_DEPEND(vtnet, netmap, 1, 1, 1);
394 #endif
395 
396 VIRTIO_SIMPLE_PNPINFO(vtnet, VIRTIO_ID_NETWORK, "VirtIO Networking Adapter");
397 
398 static struct vtnet_rx_buffer_header *
399 vtnet_mbuf_to_rx_buffer_header(struct vtnet_softc *sc, struct mbuf *m)
400 {
401 	if (VTNET_ETHER_ALIGN != 0 && sc->vtnet_hdr_size % 4 == 0)
402 		return (struct vtnet_rx_buffer_header *)((uintptr_t)m->m_data -
403 		    VTNET_RX_BUFFER_HEADER_OFFSET - VTNET_ETHER_ALIGN);
404 	else
405 		return (struct vtnet_rx_buffer_header *)((uintptr_t)m->m_data -
406 		    VTNET_RX_BUFFER_HEADER_OFFSET);
407 }
408 
409 static int
410 vtnet_modevent(module_t mod __unused, int type, void *unused __unused)
411 {
412 	int error = 0;
413 	static int loaded = 0;
414 
415 	switch (type) {
416 	case MOD_LOAD:
417 		if (loaded++ == 0) {
418 			vtnet_tx_header_zone = uma_zcreate("vtnet_tx_hdr",
419 				sizeof(struct vtnet_tx_header),
420 				NULL, NULL, NULL, NULL, 0, 0);
421 #ifdef DEBUGNET
422 			/*
423 			 * We need to allocate from this zone in the transmit path, so ensure
424 			 * that we have at least one item per header available.
425 			 * XXX add a separate zone like we do for mbufs? otherwise we may alloc
426 			 * buckets
427 			 */
428 			uma_zone_reserve(vtnet_tx_header_zone, DEBUGNET_MAX_IN_FLIGHT * 2);
429 			uma_prealloc(vtnet_tx_header_zone, DEBUGNET_MAX_IN_FLIGHT * 2);
430 #endif
431 		}
432 		break;
433 	case MOD_QUIESCE:
434 		if (uma_zone_get_cur(vtnet_tx_header_zone) > 0)
435 			error = EBUSY;
436 		break;
437 	case MOD_UNLOAD:
438 		if (--loaded == 0) {
439 			uma_zdestroy(vtnet_tx_header_zone);
440 			vtnet_tx_header_zone = NULL;
441 		}
442 		break;
443 	case MOD_SHUTDOWN:
444 		break;
445 	default:
446 		error = EOPNOTSUPP;
447 		break;
448 	}
449 
450 	return (error);
451 }
452 
453 static int
454 vtnet_probe(device_t dev)
455 {
456 	return (VIRTIO_SIMPLE_PROBE(dev, vtnet));
457 }
458 
459 static int
460 vtnet_attach(device_t dev)
461 {
462 	struct vtnet_softc *sc;
463 	int error;
464 
465 	sc = device_get_softc(dev);
466 	sc->vtnet_dev = dev;
467 	virtio_set_feature_desc(dev, vtnet_feature_desc);
468 
469 	VTNET_CORE_LOCK_INIT(sc);
470 	callout_init_mtx(&sc->vtnet_tick_ch, VTNET_CORE_MTX(sc), 0);
471 	vtnet_load_tunables(sc);
472 
473 	vtnet_alloc_interface(sc);
474 	vtnet_setup_sysctl(sc);
475 
476 	error = vtnet_setup_features(sc);
477 	if (error) {
478 		device_printf(dev, "cannot setup features\n");
479 		goto fail;
480 	}
481 
482 	mtx_init(&sc->vtnet_rx_mtx, device_get_nameunit(dev),
483 	    "VirtIO Net RX lock", MTX_DEF);
484 
485 	error = bus_dma_tag_create(
486 	    bus_get_dma_tag(dev),		/* parent */
487 	    1,					/* alignment */
488 	    0,					/* boundary */
489 	    BUS_SPACE_MAXADDR,			/* lowaddr */
490 	    BUS_SPACE_MAXADDR,			/* highaddr */
491 	    NULL, NULL,				/* filter, filterarg */
492 	    MJUM9BYTES,				/* max request size */
493 	    1,					/* max # segments */
494 	    MJUM9BYTES,				/* maxsegsize - worst case */
495 	    BUS_DMA_COHERENT,			/* flags */
496 	    busdma_lock_mutex,			/* lockfunc */
497 	    &sc->vtnet_rx_mtx,			/* lockarg */
498 	    &sc->vtnet_rx_dmat);
499 	if (error) {
500 		device_printf(dev, "cannot create bus_dma_tag\n");
501 		goto fail;
502 	}
503 
504 	mtx_init(&sc->vtnet_tx_mtx, device_get_nameunit(dev),
505 	    "VirtIO Net TX lock", MTX_DEF);
506 
507 	error = bus_dma_tag_create(
508 	    bus_get_dma_tag(dev),		/* parent */
509 	    1,					/* alignment */
510 	    0,					/* boundary */
511 	    BUS_SPACE_MAXADDR,			/* lowaddr */
512 	    BUS_SPACE_MAXADDR,			/* highaddr */
513 	    NULL, NULL,				/* filter, filterarg */
514 	    sc->vtnet_tx_nsegs * MJUM9BYTES,	/* max request size */
515 	    sc->vtnet_tx_nsegs,			/* max # segments */
516 	    MJUM9BYTES,				/* maxsegsize */
517 	    BUS_DMA_COHERENT,			/* flags */
518 	    busdma_lock_mutex,			/* lockfunc */
519 	    &sc->vtnet_tx_mtx,			/* lockarg */
520 	    &sc->vtnet_tx_dmat);
521 	if (error) {
522 		device_printf(dev, "cannot create bus_dma_tag\n");
523 		goto fail;
524 	}
525 
526 	mtx_init(&sc->vtnet_hdr_mtx, device_get_nameunit(dev),
527 	    "VirtIO Net header lock", MTX_DEF);
528 
529 	error = bus_dma_tag_create(
530 	    bus_get_dma_tag(dev),		/* parent */
531 	    sizeof(uint16_t),			/* alignment */
532 	    0,					/* boundary */
533 	    BUS_SPACE_MAXADDR,			/* lowaddr */
534 	    BUS_SPACE_MAXADDR,			/* highaddr */
535 	    NULL, NULL,				/* filter, filterarg */
536 	    PAGE_SIZE,				/* max request size */
537 	    1,					/* max # segments */
538 	    PAGE_SIZE,				/* maxsegsize */
539 	    BUS_DMA_COHERENT,			/* flags */
540 	    busdma_lock_mutex,			/* lockfunc */
541 	    &sc->vtnet_hdr_mtx,			/* lockarg */
542 	    &sc->vtnet_hdr_dmat);
543 	if (error) {
544 		device_printf(dev, "cannot create bus_dma_tag\n");
545 		goto fail;
546 	}
547 
548 	mtx_init(&sc->vtnet_ack_mtx, device_get_nameunit(dev),
549 	    "VirtIO Net ACK lock", MTX_DEF);
550 
551 	error = bus_dma_tag_create(
552 	    bus_get_dma_tag(dev),		/* parent */
553 	    sizeof(uint8_t),			/* alignment */
554 	    0,					/* boundary */
555 	    BUS_SPACE_MAXADDR,			/* lowaddr */
556 	    BUS_SPACE_MAXADDR,			/* highaddr */
557 	    NULL, NULL,				/* filter, filterarg */
558 	    sizeof(uint8_t),			/* max request size */
559 	    1,					/* max # segments */
560 	    sizeof(uint8_t),			/* maxsegsize */
561 	    BUS_DMA_COHERENT,			/* flags */
562 	    busdma_lock_mutex,			/* lockfunc */
563 	    &sc->vtnet_ack_mtx,			/* lockarg */
564 	    &sc->vtnet_ack_dmat);
565 	if (error) {
566 		device_printf(dev, "cannot create bus_dma_tag\n");
567 		goto fail;
568 	}
569 
570 #ifdef __powerpc__
571         /*
572          * Virtio uses physical addresses rather than bus addresses, so we
573          * need to ask busdma to skip the iommu physical->bus mapping.  At
574          * present, this is only a thing on the powerpc architectures.
575          */
576         bus_dma_tag_set_iommu(sc->vtnet_rx_dmat, NULL, NULL);
577         bus_dma_tag_set_iommu(sc->vtnet_tx_dmat, NULL, NULL);
578         bus_dma_tag_set_iommu(sc->vtnet_hdr_dmat, NULL, NULL);
579         bus_dma_tag_set_iommu(sc->vtnet_ack_dmat, NULL, NULL);
580 #endif
581 
582 	error = vtnet_alloc_rx_filters(sc);
583 	if (error) {
584 		device_printf(dev, "cannot allocate Rx filters\n");
585 		goto fail;
586 	}
587 
588 	error = vtnet_alloc_rxtx_queues(sc);
589 	if (error) {
590 		device_printf(dev, "cannot allocate queues\n");
591 		goto fail;
592 	}
593 
594 	error = vtnet_alloc_virtqueues(sc);
595 	if (error) {
596 		device_printf(dev, "cannot allocate virtqueues\n");
597 		goto fail;
598 	}
599 
600 	error = vtnet_setup_interface(sc);
601 	if (error) {
602 		device_printf(dev, "cannot setup interface\n");
603 		goto fail;
604 	}
605 
606 	error = virtio_setup_intr(dev, INTR_TYPE_NET);
607 	if (error) {
608 		device_printf(dev, "cannot setup interrupts\n");
609 		ether_ifdetach(sc->vtnet_ifp);
610 		goto fail;
611 	}
612 
613 #ifdef DEV_NETMAP
614 	vtnet_netmap_attach(sc);
615 #endif
616 	vtnet_start_taskqueues(sc);
617 
618 fail:
619 	if (error)
620 		vtnet_detach(dev);
621 
622 	return (error);
623 }
624 
625 static int
626 vtnet_detach(device_t dev)
627 {
628 	struct vtnet_softc *sc;
629 	if_t ifp;
630 
631 	sc = device_get_softc(dev);
632 	ifp = sc->vtnet_ifp;
633 
634 	if (device_is_attached(dev)) {
635 		VTNET_CORE_LOCK(sc);
636 		vtnet_stop(sc);
637 		VTNET_CORE_UNLOCK(sc);
638 
639 		callout_drain(&sc->vtnet_tick_ch);
640 		vtnet_drain_taskqueues(sc);
641 
642 		ether_ifdetach(ifp);
643 	}
644 
645 #ifdef DEV_NETMAP
646 	netmap_detach(ifp);
647 #endif
648 
649 	if (sc->vtnet_pfil != NULL) {
650 		pfil_head_unregister(sc->vtnet_pfil);
651 		sc->vtnet_pfil = NULL;
652 	}
653 
654 	vtnet_free_taskqueues(sc);
655 
656 	if (sc->vtnet_vlan_attach != NULL) {
657 		EVENTHANDLER_DEREGISTER(vlan_config, sc->vtnet_vlan_attach);
658 		sc->vtnet_vlan_attach = NULL;
659 	}
660 	if (sc->vtnet_vlan_detach != NULL) {
661 		EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vtnet_vlan_detach);
662 		sc->vtnet_vlan_detach = NULL;
663 	}
664 
665 	ifmedia_removeall(&sc->vtnet_media);
666 
667 	if (ifp != NULL) {
668 		if_free(ifp);
669 		sc->vtnet_ifp = NULL;
670 	}
671 
672 	vtnet_free_rxtx_queues(sc);
673 	vtnet_free_rx_filters(sc);
674 
675 	if (sc->vtnet_ctrl_vq != NULL)
676 		vtnet_free_ctrl_vq(sc);
677 
678 	VTNET_CORE_LOCK_DESTROY(sc);
679 
680 	return (0);
681 }
682 
683 static int
684 vtnet_suspend(device_t dev)
685 {
686 	struct vtnet_softc *sc;
687 
688 	sc = device_get_softc(dev);
689 
690 	VTNET_CORE_LOCK(sc);
691 	vtnet_stop(sc);
692 	sc->vtnet_flags |= VTNET_FLAG_SUSPENDED;
693 	VTNET_CORE_UNLOCK(sc);
694 
695 	return (0);
696 }
697 
698 static int
699 vtnet_resume(device_t dev)
700 {
701 	struct vtnet_softc *sc;
702 	if_t ifp;
703 
704 	sc = device_get_softc(dev);
705 	ifp = sc->vtnet_ifp;
706 
707 	VTNET_CORE_LOCK(sc);
708 	if (if_getflags(ifp) & IFF_UP)
709 		vtnet_init_locked(sc, 0);
710 	sc->vtnet_flags &= ~VTNET_FLAG_SUSPENDED;
711 	VTNET_CORE_UNLOCK(sc);
712 
713 	return (0);
714 }
715 
716 static int
717 vtnet_shutdown(device_t dev)
718 {
719 	/*
720 	 * Suspend already does all of what we need to
721 	 * do here; we just never expect to be resumed.
722 	 */
723 	return (vtnet_suspend(dev));
724 }
725 
726 static int
727 vtnet_attach_completed(device_t dev)
728 {
729 	struct vtnet_softc *sc;
730 
731 	sc = device_get_softc(dev);
732 
733 	VTNET_CORE_LOCK(sc);
734 	vtnet_attached_set_macaddr(sc);
735 	VTNET_CORE_UNLOCK(sc);
736 
737 	return (0);
738 }
739 
740 static int
741 vtnet_config_change(device_t dev)
742 {
743 	struct vtnet_softc *sc;
744 
745 	sc = device_get_softc(dev);
746 
747 	VTNET_CORE_LOCK(sc);
748 	vtnet_update_link_status(sc);
749 	if (sc->vtnet_link_active != 0)
750 		vtnet_tx_start_all(sc);
751 	VTNET_CORE_UNLOCK(sc);
752 
753 	return (0);
754 }
755 
756 static int
757 vtnet_negotiate_features(struct vtnet_softc *sc)
758 {
759 	device_t dev;
760 	uint64_t features, negotiated_features;
761 	int no_csum;
762 
763 	dev = sc->vtnet_dev;
764 	features = virtio_bus_is_modern(dev) ? VTNET_MODERN_FEATURES :
765 	    VTNET_LEGACY_FEATURES;
766 
767 	/*
768 	 * TSO and LRO are only available when their corresponding checksum
769 	 * offload feature is also negotiated.
770 	 */
771 	no_csum = vtnet_tunable_int(sc, "csum_disable", vtnet_csum_disable);
772 	if (no_csum)
773 		features &= ~(VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM);
774 	if (no_csum || vtnet_tunable_int(sc, "tso_disable", vtnet_tso_disable))
775 		features &= ~VTNET_TSO_FEATURES;
776 	if (no_csum || vtnet_tunable_int(sc, "lro_disable", vtnet_lro_disable))
777 		features &= ~VTNET_LRO_FEATURES;
778 
779 	/* Deactivate MQ Feature flag, if driver has ALTQ enabled, or MQ is explicitly disabled */
780 	if (VTNET_ALTQ_ENABLED || vtnet_tunable_int(sc, "mq_disable", vtnet_mq_disable))
781 		features &= ~VIRTIO_NET_F_MQ;
782 
783 	negotiated_features = virtio_negotiate_features(dev, features);
784 
785 	if (virtio_with_feature(dev, VIRTIO_NET_F_MTU)) {
786 		uint16_t mtu;
787 
788 		mtu = virtio_read_dev_config_2(dev,
789 		    offsetof(struct virtio_net_config, mtu));
790 		if (mtu < VTNET_MIN_MTU) {
791 			device_printf(dev, "Invalid MTU value: %d. "
792 			    "MTU feature disabled.\n", mtu);
793 			features &= ~VIRTIO_NET_F_MTU;
794 			negotiated_features =
795 			    virtio_negotiate_features(dev, features);
796 		}
797 	}
798 
799 	if (virtio_with_feature(dev, VIRTIO_NET_F_MQ)) {
800 		uint16_t npairs;
801 
802 		npairs = virtio_read_dev_config_2(dev,
803 		    offsetof(struct virtio_net_config, max_virtqueue_pairs));
804 		if (npairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
805 		    npairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX) {
806 			device_printf(dev, "Invalid max_virtqueue_pairs value: "
807 			    "%d. Multiqueue feature disabled.\n", npairs);
808 			features &= ~VIRTIO_NET_F_MQ;
809 			negotiated_features =
810 			    virtio_negotiate_features(dev, features);
811 		}
812 	}
813 
814 	if (virtio_with_feature(dev, VTNET_LRO_FEATURES) &&
815 	    virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF) == 0) {
816 		/*
817 		 * LRO without mergeable buffers requires special care. This
818 		 * is not ideal because every receive buffer must be large
819 		 * enough to hold the maximum TCP packet, the Ethernet header,
820 		 * and the header. This requires up to 34 descriptors with
821 		 * MCLBYTES clusters. If we do not have indirect descriptors,
822 		 * LRO is disabled since the virtqueue will not contain very
823 		 * many receive buffers.
824 		 */
825 		if (!virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) {
826 			device_printf(dev,
827 			    "Host LRO disabled since both mergeable buffers "
828 			    "and indirect descriptors were not negotiated\n");
829 			features &= ~VTNET_LRO_FEATURES;
830 			negotiated_features =
831 			    virtio_negotiate_features(dev, features);
832 		} else
833 			sc->vtnet_flags |= VTNET_FLAG_LRO_NOMRG;
834 	}
835 
836 	sc->vtnet_features = negotiated_features;
837 	sc->vtnet_negotiated_features = negotiated_features;
838 
839 	return (virtio_finalize_features(dev));
840 }
841 
842 static int
843 vtnet_setup_features(struct vtnet_softc *sc)
844 {
845 	device_t dev;
846 	int error;
847 
848 	dev = sc->vtnet_dev;
849 
850 	error = vtnet_negotiate_features(sc);
851 	if (error)
852 		return (error);
853 
854 	if (virtio_with_feature(dev, VIRTIO_F_VERSION_1))
855 		sc->vtnet_flags |= VTNET_FLAG_MODERN;
856 	if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC))
857 		sc->vtnet_flags |= VTNET_FLAG_INDIRECT;
858 	if (virtio_with_feature(dev, VIRTIO_RING_F_EVENT_IDX))
859 		sc->vtnet_flags |= VTNET_FLAG_EVENT_IDX;
860 
861 	if (virtio_with_feature(dev, VIRTIO_NET_F_MAC)) {
862 		/* This feature should always be negotiated. */
863 		sc->vtnet_flags |= VTNET_FLAG_MAC;
864 	}
865 
866 	if (virtio_with_feature(dev, VIRTIO_NET_F_MTU)) {
867 		sc->vtnet_max_mtu = virtio_read_dev_config_2(dev,
868 		    offsetof(struct virtio_net_config, mtu));
869 	} else
870 		sc->vtnet_max_mtu = VTNET_MAX_MTU;
871 
872 	if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) {
873 		sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS;
874 		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
875 	} else if (vtnet_modern(sc)) {
876 		/* This is identical to the mergeable header. */
877 		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_v1);
878 	} else
879 		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
880 
881 	if (vtnet_modern(sc) || sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS)
882 		sc->vtnet_rx_nsegs = VTNET_RX_SEGS_HDR_INLINE;
883 	else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
884 		sc->vtnet_rx_nsegs = VTNET_RX_SEGS_LRO_NOMRG;
885 	else
886 		sc->vtnet_rx_nsegs = VTNET_RX_SEGS_HDR_SEPARATE;
887 
888 	/*
889 	 * Favor "hardware" LRO if negotiated, but support software LRO as
890 	 * a fallback; there is usually little benefit (or worse) with both.
891 	 */
892 	if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) == 0 &&
893 	    virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6) == 0)
894 		sc->vtnet_flags |= VTNET_FLAG_SW_LRO;
895 
896 	if (virtio_with_feature(dev, VIRTIO_NET_F_GSO) ||
897 	    virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4) ||
898 	    virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
899 		sc->vtnet_tx_nsegs = VTNET_TX_SEGS_MAX;
900 	else
901 		sc->vtnet_tx_nsegs = VTNET_TX_SEGS_MIN;
902 
903 	sc->vtnet_req_vq_pairs = 1;
904 	sc->vtnet_max_vq_pairs = 1;
905 
906 	if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VQ)) {
907 		sc->vtnet_flags |= VTNET_FLAG_CTRL_VQ;
908 
909 		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX))
910 			sc->vtnet_flags |= VTNET_FLAG_CTRL_RX;
911 		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VLAN))
912 			sc->vtnet_flags |= VTNET_FLAG_VLAN_FILTER;
913 		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_MAC_ADDR))
914 			sc->vtnet_flags |= VTNET_FLAG_CTRL_MAC;
915 
916 		if (virtio_with_feature(dev, VIRTIO_NET_F_MQ)) {
917 			sc->vtnet_max_vq_pairs = virtio_read_dev_config_2(dev,
918 			    offsetof(struct virtio_net_config,
919 			    max_virtqueue_pairs));
920 		}
921 	}
922 
923 	if (sc->vtnet_max_vq_pairs > 1) {
924 		int req;
925 
926 		/*
927 		 * Limit the maximum number of requested queue pairs to the
928 		 * number of CPUs and the configured maximum.
929 		 */
930 		req = vtnet_tunable_int(sc, "mq_max_pairs", vtnet_mq_max_pairs);
931 		if (req < 0)
932 			req = 1;
933 		if (req == 0)
934 			req = mp_ncpus;
935 		if (req > sc->vtnet_max_vq_pairs)
936 			req = sc->vtnet_max_vq_pairs;
937 		if (req > mp_ncpus)
938 			req = mp_ncpus;
939 		if (req > 1) {
940 			sc->vtnet_req_vq_pairs = req;
941 			sc->vtnet_flags |= VTNET_FLAG_MQ;
942 		}
943 	}
944 
945 	return (0);
946 }
947 
948 static int
949 vtnet_init_rxq(struct vtnet_softc *sc, int id)
950 {
951 	struct vtnet_rxq *rxq;
952 
953 	rxq = &sc->vtnet_rxqs[id];
954 
955 	snprintf(rxq->vtnrx_name, sizeof(rxq->vtnrx_name), "%s-rx%d",
956 	    device_get_nameunit(sc->vtnet_dev), id);
957 	mtx_init(&rxq->vtnrx_mtx, rxq->vtnrx_name, NULL, MTX_DEF);
958 
959 	rxq->vtnrx_sc = sc;
960 	rxq->vtnrx_id = id;
961 
962 	rxq->vtnrx_sg = sglist_alloc(sc->vtnet_rx_nsegs, M_NOWAIT);
963 	if (rxq->vtnrx_sg == NULL)
964 		return (ENOMEM);
965 
966 #if defined(INET) || defined(INET6)
967 	if (vtnet_software_lro(sc)) {
968 		if (tcp_lro_init_args(&rxq->vtnrx_lro, sc->vtnet_ifp,
969 		    sc->vtnet_lro_entry_count, sc->vtnet_lro_mbufq_depth) != 0)
970 			return (ENOMEM);
971 	}
972 #endif
973 
974 	NET_TASK_INIT(&rxq->vtnrx_intrtask, 0, vtnet_rxq_tq_intr, rxq);
975 	rxq->vtnrx_tq = taskqueue_create(rxq->vtnrx_name, M_NOWAIT,
976 	    taskqueue_thread_enqueue, &rxq->vtnrx_tq);
977 
978 	return (rxq->vtnrx_tq == NULL ? ENOMEM : 0);
979 }
980 
981 static int
982 vtnet_init_txq(struct vtnet_softc *sc, int id)
983 {
984 	struct vtnet_txq *txq;
985 
986 	txq = &sc->vtnet_txqs[id];
987 
988 	snprintf(txq->vtntx_name, sizeof(txq->vtntx_name), "%s-tx%d",
989 	    device_get_nameunit(sc->vtnet_dev), id);
990 	mtx_init(&txq->vtntx_mtx, txq->vtntx_name, NULL, MTX_DEF);
991 
992 	txq->vtntx_sc = sc;
993 	txq->vtntx_id = id;
994 
995 	txq->vtntx_sg = sglist_alloc(sc->vtnet_tx_nsegs, M_NOWAIT);
996 	if (txq->vtntx_sg == NULL)
997 		return (ENOMEM);
998 
999 	if (!VTNET_ALTQ_ENABLED) {
1000 		txq->vtntx_br = buf_ring_alloc(VTNET_DEFAULT_BUFRING_SIZE, M_DEVBUF,
1001 		    M_NOWAIT, &txq->vtntx_mtx);
1002 		if (txq->vtntx_br == NULL)
1003 			return (ENOMEM);
1004 
1005 		TASK_INIT(&txq->vtntx_defrtask, 0, vtnet_txq_tq_deferred, txq);
1006 	}
1007 	TASK_INIT(&txq->vtntx_intrtask, 0, vtnet_txq_tq_intr, txq);
1008 	txq->vtntx_tq = taskqueue_create(txq->vtntx_name, M_NOWAIT,
1009 	    taskqueue_thread_enqueue, &txq->vtntx_tq);
1010 	if (txq->vtntx_tq == NULL)
1011 		return (ENOMEM);
1012 
1013 	return (0);
1014 }
1015 
1016 static int
1017 vtnet_alloc_rxtx_queues(struct vtnet_softc *sc)
1018 {
1019 	int i, npairs, error;
1020 
1021 	npairs = sc->vtnet_max_vq_pairs;
1022 
1023 	sc->vtnet_rxqs = malloc(sizeof(struct vtnet_rxq) * npairs, M_DEVBUF,
1024 	    M_NOWAIT | M_ZERO);
1025 	sc->vtnet_txqs = malloc(sizeof(struct vtnet_txq) * npairs, M_DEVBUF,
1026 	    M_NOWAIT | M_ZERO);
1027 	if (sc->vtnet_rxqs == NULL || sc->vtnet_txqs == NULL)
1028 		return (ENOMEM);
1029 
1030 	for (i = 0; i < npairs; i++) {
1031 		error = vtnet_init_rxq(sc, i);
1032 		if (error)
1033 			return (error);
1034 		error = vtnet_init_txq(sc, i);
1035 		if (error)
1036 			return (error);
1037 	}
1038 
1039 	vtnet_set_rx_process_limit(sc);
1040 	vtnet_setup_queue_sysctl(sc);
1041 
1042 	return (0);
1043 }
1044 
1045 static void
1046 vtnet_destroy_rxq(struct vtnet_rxq *rxq)
1047 {
1048 
1049 	rxq->vtnrx_sc = NULL;
1050 	rxq->vtnrx_id = -1;
1051 
1052 #if defined(INET) || defined(INET6)
1053 	tcp_lro_free(&rxq->vtnrx_lro);
1054 #endif
1055 
1056 	if (rxq->vtnrx_sg != NULL) {
1057 		sglist_free(rxq->vtnrx_sg);
1058 		rxq->vtnrx_sg = NULL;
1059 	}
1060 
1061 	if (mtx_initialized(&rxq->vtnrx_mtx) != 0)
1062 		mtx_destroy(&rxq->vtnrx_mtx);
1063 }
1064 
1065 static void
1066 vtnet_destroy_txq(struct vtnet_txq *txq)
1067 {
1068 
1069 	txq->vtntx_sc = NULL;
1070 	txq->vtntx_id = -1;
1071 
1072 	if (txq->vtntx_sg != NULL) {
1073 		sglist_free(txq->vtntx_sg);
1074 		txq->vtntx_sg = NULL;
1075 	}
1076 
1077 	if (!VTNET_ALTQ_ENABLED) {
1078 		if (txq->vtntx_br != NULL) {
1079 			buf_ring_free(txq->vtntx_br, M_DEVBUF);
1080 			txq->vtntx_br = NULL;
1081 		}
1082 	}
1083 
1084 	if (mtx_initialized(&txq->vtntx_mtx) != 0)
1085 		mtx_destroy(&txq->vtntx_mtx);
1086 }
1087 
1088 static void
1089 vtnet_free_rxtx_queues(struct vtnet_softc *sc)
1090 {
1091 	int i;
1092 
1093 	if (sc->vtnet_rxqs != NULL) {
1094 		for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
1095 			vtnet_destroy_rxq(&sc->vtnet_rxqs[i]);
1096 		free(sc->vtnet_rxqs, M_DEVBUF);
1097 		sc->vtnet_rxqs = NULL;
1098 	}
1099 
1100 	if (sc->vtnet_txqs != NULL) {
1101 		for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
1102 			vtnet_destroy_txq(&sc->vtnet_txqs[i]);
1103 		free(sc->vtnet_txqs, M_DEVBUF);
1104 		sc->vtnet_txqs = NULL;
1105 	}
1106 }
1107 
1108 static int
1109 vtnet_alloc_rx_filters(struct vtnet_softc *sc)
1110 {
1111 
1112 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
1113 		sc->vtnet_mac_filter = malloc(sizeof(struct vtnet_mac_filter),
1114 		    M_DEVBUF, M_NOWAIT | M_ZERO);
1115 		if (sc->vtnet_mac_filter == NULL)
1116 			return (ENOMEM);
1117 	}
1118 
1119 	if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
1120 		sc->vtnet_vlan_filter = malloc(sizeof(uint32_t) *
1121 		    VTNET_VLAN_FILTER_NWORDS, M_DEVBUF, M_NOWAIT | M_ZERO);
1122 		if (sc->vtnet_vlan_filter == NULL)
1123 			return (ENOMEM);
1124 	}
1125 
1126 	return (0);
1127 }
1128 
1129 static void
1130 vtnet_free_rx_filters(struct vtnet_softc *sc)
1131 {
1132 
1133 	if (sc->vtnet_mac_filter != NULL) {
1134 		free(sc->vtnet_mac_filter, M_DEVBUF);
1135 		sc->vtnet_mac_filter = NULL;
1136 	}
1137 
1138 	if (sc->vtnet_vlan_filter != NULL) {
1139 		free(sc->vtnet_vlan_filter, M_DEVBUF);
1140 		sc->vtnet_vlan_filter = NULL;
1141 	}
1142 }
1143 
1144 static int
1145 vtnet_alloc_virtqueues(struct vtnet_softc *sc)
1146 {
1147 	device_t dev;
1148 	struct vq_alloc_info *info;
1149 	struct vtnet_rxq *rxq;
1150 	struct vtnet_txq *txq;
1151 	int i, idx, nvqs, error;
1152 
1153 	dev = sc->vtnet_dev;
1154 
1155 	nvqs = sc->vtnet_max_vq_pairs * 2;
1156 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ)
1157 		nvqs++;
1158 
1159 	info = malloc(sizeof(struct vq_alloc_info) * nvqs, M_TEMP, M_NOWAIT);
1160 	if (info == NULL)
1161 		return (ENOMEM);
1162 
1163 	for (i = 0, idx = 0; i < sc->vtnet_req_vq_pairs; i++, idx += 2) {
1164 		rxq = &sc->vtnet_rxqs[i];
1165 		VQ_ALLOC_INFO_INIT(&info[idx], sc->vtnet_rx_nsegs,
1166 		    vtnet_rx_vq_intr, rxq, &rxq->vtnrx_vq,
1167 		    "%s-rx%d", device_get_nameunit(dev), rxq->vtnrx_id);
1168 
1169 		txq = &sc->vtnet_txqs[i];
1170 		VQ_ALLOC_INFO_INIT(&info[idx + 1], sc->vtnet_tx_nsegs,
1171 		    vtnet_tx_vq_intr, txq, &txq->vtntx_vq,
1172 		    "%s-tx%d", device_get_nameunit(dev), txq->vtntx_id);
1173 	}
1174 
1175 	/* These queues will not be used so allocate the minimum resources. */
1176 	for (; i < sc->vtnet_max_vq_pairs; i++, idx += 2) {
1177 		rxq = &sc->vtnet_rxqs[i];
1178 		VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, rxq, &rxq->vtnrx_vq,
1179 		    "%s-rx%d", device_get_nameunit(dev), rxq->vtnrx_id);
1180 
1181 		txq = &sc->vtnet_txqs[i];
1182 		VQ_ALLOC_INFO_INIT(&info[idx + 1], 0, NULL, txq, &txq->vtntx_vq,
1183 		    "%s-tx%d", device_get_nameunit(dev), txq->vtntx_id);
1184 	}
1185 
1186 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
1187 		VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, NULL,
1188 		    &sc->vtnet_ctrl_vq, "%s ctrl", device_get_nameunit(dev));
1189 	}
1190 
1191 	error = virtio_alloc_virtqueues(dev, nvqs, info);
1192 	free(info, M_TEMP);
1193 
1194 	return (error);
1195 }
1196 
1197 static void
1198 vtnet_alloc_interface(struct vtnet_softc *sc)
1199 {
1200 	device_t dev;
1201 	if_t ifp;
1202 
1203 	dev = sc->vtnet_dev;
1204 
1205 	ifp = if_alloc(IFT_ETHER);
1206 	sc->vtnet_ifp = ifp;
1207 	if_setsoftc(ifp, sc);
1208 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1209 }
1210 
1211 static int
1212 vtnet_setup_interface(struct vtnet_softc *sc)
1213 {
1214 	device_t dev;
1215 	struct pfil_head_args pa;
1216 	if_t ifp;
1217 
1218 	dev = sc->vtnet_dev;
1219 	ifp = sc->vtnet_ifp;
1220 
1221 	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
1222 	if_setbaudrate(ifp, IF_Gbps(10));
1223 	if_setinitfn(ifp, vtnet_init);
1224 	if_setioctlfn(ifp, vtnet_ioctl);
1225 	if_setgetcounterfn(ifp, vtnet_get_counter);
1226 
1227 	if (!VTNET_ALTQ_ENABLED) {
1228 		if_settransmitfn(ifp, vtnet_txq_mq_start);
1229 		if_setqflushfn(ifp, vtnet_qflush);
1230 	} else {
1231 		struct virtqueue *vq = sc->vtnet_txqs[0].vtntx_vq;
1232 		if_setstartfn(ifp, vtnet_start);
1233 		if_setsendqlen(ifp, virtqueue_size(vq) - 1);
1234 		if_setsendqready(ifp);
1235 	}
1236 
1237 	vtnet_get_macaddr(sc);
1238 
1239 	if (virtio_with_feature(dev, VIRTIO_NET_F_STATUS))
1240 		if_setcapabilitiesbit(ifp, IFCAP_LINKSTATE, 0);
1241 
1242 	ifmedia_init(&sc->vtnet_media, 0, vtnet_ifmedia_upd, vtnet_ifmedia_sts);
1243 	ifmedia_add(&sc->vtnet_media, IFM_ETHER | IFM_AUTO, 0, NULL);
1244 	ifmedia_set(&sc->vtnet_media, IFM_ETHER | IFM_AUTO);
1245 
1246 	if (virtio_with_feature(dev, VIRTIO_NET_F_CSUM)) {
1247 		int gso;
1248 
1249 		if_setcapabilitiesbit(ifp, IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6, 0);
1250 
1251 		gso = virtio_with_feature(dev, VIRTIO_NET_F_GSO);
1252 		if (gso || virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4))
1253 			if_setcapabilitiesbit(ifp, IFCAP_TSO4, 0);
1254 		if (gso || virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
1255 			if_setcapabilitiesbit(ifp, IFCAP_TSO6, 0);
1256 		if (gso || virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN))
1257 			sc->vtnet_flags |= VTNET_FLAG_TSO_ECN;
1258 
1259 		if (if_getcapabilities(ifp) & (IFCAP_TSO4 | IFCAP_TSO6)) {
1260 			int tso_maxlen;
1261 
1262 			if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTSO, 0);
1263 
1264 			tso_maxlen = vtnet_tunable_int(sc, "tso_maxlen",
1265 			    vtnet_tso_maxlen);
1266 			if_sethwtsomax(ifp, tso_maxlen -
1267 			    (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN));
1268 			if_sethwtsomaxsegcount(ifp, sc->vtnet_tx_nsegs - 1);
1269 			if_sethwtsomaxsegsize(ifp, PAGE_SIZE);
1270 		}
1271 	}
1272 
1273 	if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM)) {
1274 		/* BMV: Rx checksums not distinguished between IPv4 and IPv6. */
1275 		if_setcapabilitiesbit(ifp, IFCAP_RXCSUM, 0);
1276 		if_setcapabilitiesbit(ifp, IFCAP_RXCSUM_IPV6, 0);
1277 
1278 		/* Support either "hardware" or software LRO. */
1279 		if_setcapabilitiesbit(ifp, IFCAP_LRO, 0);
1280 	}
1281 
1282 	if (if_getcapabilities(ifp) & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6)) {
1283 		/*
1284 		 * VirtIO does not support VLAN tagging, but we can fake
1285 		 * it by inserting and removing the 802.1Q header during
1286 		 * transmit and receive. We are then able to do checksum
1287 		 * offloading of VLAN frames.
1288 		 */
1289 		if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM, 0);
1290 	}
1291 
1292 	if (sc->vtnet_max_mtu >= ETHERMTU_JUMBO)
1293 		if_setcapabilitiesbit(ifp, IFCAP_JUMBO_MTU, 0);
1294 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_MTU, 0);
1295 	if_setcapabilitiesbit(ifp, IFCAP_HWSTATS, 0);
1296 
1297 	/*
1298 	 * Capabilities after here are not enabled by default.
1299 	 */
1300 	if_setcapenable(ifp, if_getcapabilities(ifp));
1301 
1302 	if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
1303 		if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER, 0);
1304 
1305 		sc->vtnet_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
1306 		    vtnet_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
1307 		sc->vtnet_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
1308 		    vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
1309 	}
1310 
1311 	ether_ifattach(ifp, sc->vtnet_hwaddr);
1312 
1313 	/* Tell the upper layer(s) we support long frames. */
1314 	if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
1315 
1316 	DEBUGNET_SET(ifp, vtnet);
1317 
1318 	pa.pa_version = PFIL_VERSION;
1319 	pa.pa_flags = PFIL_IN;
1320 	pa.pa_type = PFIL_TYPE_ETHERNET;
1321 	pa.pa_headname = if_name(ifp);
1322 	sc->vtnet_pfil = pfil_head_register(&pa);
1323 
1324 	return (0);
1325 }
1326 
1327 static int
1328 vtnet_rx_cluster_size(struct vtnet_softc *sc, int mtu)
1329 {
1330 	int framesz;
1331 
1332 	if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS)
1333 		return (MJUMPAGESIZE);
1334 	else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
1335 		return (MCLBYTES);
1336 
1337 	/*
1338 	 * Try to scale the receive mbuf cluster size from the MTU. We
1339 	 * could also use the VQ size to influence the selected size,
1340 	 * but that would only matter for very small queues.
1341 	 */
1342 	if (vtnet_modern(sc)) {
1343 		MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr_v1));
1344 		framesz = sizeof(struct virtio_net_hdr_v1);
1345 	} else
1346 		framesz = sizeof(struct vtnet_rx_header);
1347 	framesz += sizeof(struct ether_vlan_header) + mtu;
1348 	/*
1349 	 * Account for the offsetting we'll do elsewhere so we allocate the
1350 	 * right size for the mtu.
1351 	 */
1352 	if (VTNET_ETHER_ALIGN != 0 && sc->vtnet_hdr_size % 4 == 0) {
1353 		framesz += VTNET_ETHER_ALIGN;
1354 	}
1355 
1356 	if (framesz <= MCLBYTES)
1357 		return (MCLBYTES);
1358 	else if (framesz <= MJUMPAGESIZE)
1359 		return (MJUMPAGESIZE);
1360 	else if (framesz <= MJUM9BYTES)
1361 		return (MJUM9BYTES);
1362 
1363 	/* Sane default; avoid 16KB clusters. */
1364 	return (MCLBYTES);
1365 }
1366 
1367 static int
1368 vtnet_ioctl_mtu(struct vtnet_softc *sc, u_int mtu)
1369 {
1370 	if_t ifp;
1371 	int clustersz;
1372 
1373 	ifp = sc->vtnet_ifp;
1374 	VTNET_CORE_LOCK_ASSERT(sc);
1375 
1376 	if (if_getmtu(ifp) == mtu)
1377 		return (0);
1378 	else if (mtu < ETHERMIN || mtu > sc->vtnet_max_mtu)
1379 		return (EINVAL);
1380 
1381 	if_setmtu(ifp, mtu);
1382 	clustersz = vtnet_rx_cluster_size(sc, mtu);
1383 
1384 	if (clustersz != sc->vtnet_rx_clustersz &&
1385 	    if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1386 		if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
1387 		vtnet_init_locked(sc, 0);
1388 	}
1389 
1390 	return (0);
1391 }
1392 
1393 static int
1394 vtnet_ioctl_ifflags(struct vtnet_softc *sc)
1395 {
1396 	if_t ifp;
1397 	int drv_running;
1398 
1399 	ifp = sc->vtnet_ifp;
1400 	drv_running = (if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0;
1401 
1402 	VTNET_CORE_LOCK_ASSERT(sc);
1403 
1404 	if ((if_getflags(ifp) & IFF_UP) == 0) {
1405 		if (drv_running)
1406 			vtnet_stop(sc);
1407 		goto out;
1408 	}
1409 
1410 	if (!drv_running) {
1411 		vtnet_init_locked(sc, 0);
1412 		goto out;
1413 	}
1414 
1415 	if ((if_getflags(ifp) ^ sc->vtnet_if_flags) &
1416 	    (IFF_PROMISC | IFF_ALLMULTI)) {
1417 		if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX)
1418 			vtnet_rx_filter(sc);
1419 		else {
1420 			/*
1421 			 * We don't support filtering out multicast, so
1422 			 * ALLMULTI is always set.
1423 			 */
1424 			if_setflagbits(ifp, IFF_ALLMULTI, 0);
1425 			if_setflagbits(ifp, IFF_PROMISC, 0);
1426 		}
1427 	}
1428 
1429 out:
1430 	sc->vtnet_if_flags = if_getflags(ifp);
1431 	return (0);
1432 }
1433 
1434 static int
1435 vtnet_ioctl_multi(struct vtnet_softc *sc)
1436 {
1437 	if_t ifp;
1438 
1439 	ifp = sc->vtnet_ifp;
1440 
1441 	VTNET_CORE_LOCK_ASSERT(sc);
1442 
1443 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX &&
1444 	    if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1445 		vtnet_rx_filter_mac(sc);
1446 
1447 	return (0);
1448 }
1449 
1450 static int
1451 vtnet_ioctl_ifcap(struct vtnet_softc *sc, struct ifreq *ifr)
1452 {
1453 	if_t ifp;
1454 	int mask, reinit, update;
1455 
1456 	ifp = sc->vtnet_ifp;
1457 	mask = (ifr->ifr_reqcap & if_getcapabilities(ifp)) ^ if_getcapenable(ifp);
1458 	reinit = update = 0;
1459 
1460 	VTNET_CORE_LOCK_ASSERT(sc);
1461 
1462 	if (mask & IFCAP_TXCSUM) {
1463 		if (if_getcapenable(ifp) & IFCAP_TXCSUM &&
1464 		    if_getcapenable(ifp) & IFCAP_TSO4) {
1465 			/* Disable tso4, because txcsum will be disabled. */
1466 			if_setcapenablebit(ifp, 0, IFCAP_TSO4);
1467 			if_sethwassistbits(ifp, 0, CSUM_IP_TSO);
1468 			mask &= ~IFCAP_TSO4;
1469 		}
1470 		if_togglecapenable(ifp, IFCAP_TXCSUM);
1471 		if_togglehwassist(ifp, VTNET_CSUM_OFFLOAD);
1472 	}
1473 	if (mask & IFCAP_TXCSUM_IPV6) {
1474 		if (if_getcapenable(ifp) & IFCAP_TXCSUM_IPV6 &&
1475 		    if_getcapenable(ifp) & IFCAP_TSO6) {
1476 			/* Disable tso6, because txcsum6 will be disabled. */
1477 			if_setcapenablebit(ifp, 0, IFCAP_TSO6);
1478 			if_sethwassistbits(ifp, 0, CSUM_IP6_TSO);
1479 			mask &= ~IFCAP_TSO6;
1480 		}
1481 		if_togglecapenable(ifp, IFCAP_TXCSUM_IPV6);
1482 		if_togglehwassist(ifp, VTNET_CSUM_OFFLOAD_IPV6);
1483 	}
1484 	if (mask & IFCAP_TSO4) {
1485 		if (if_getcapenable(ifp) & (IFCAP_TXCSUM | IFCAP_TSO4)) {
1486 			/* tso4 can only be enabled, if txcsum is enabled. */
1487 			if_togglecapenable(ifp, IFCAP_TSO4);
1488 			if_togglehwassist(ifp, CSUM_IP_TSO);
1489 		}
1490 	}
1491 	if (mask & IFCAP_TSO6) {
1492 		if (if_getcapenable(ifp) & (IFCAP_TXCSUM_IPV6 | IFCAP_TSO6)) {
1493 			/* tso6 can only be enabled, if txcsum6 is enabled. */
1494 			if_togglecapenable(ifp, IFCAP_TSO6);
1495 			if_togglehwassist(ifp, CSUM_IP6_TSO);
1496 		}
1497 	}
1498 
1499 	if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO)) {
1500 		/*
1501 		 * These Rx features require the negotiated features to
1502 		 * be updated. Avoid a full reinit if possible.
1503 		 */
1504 		if (sc->vtnet_features & VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)
1505 			update = 1;
1506 		else
1507 			reinit = 1;
1508 
1509 		/* BMV: Avoid needless renegotiation for just software LRO. */
1510 		if ((mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO)) ==
1511 		    IFCAP_LRO && vtnet_software_lro(sc))
1512 			reinit = update = 0;
1513 		/*
1514 		 * VirtIO does not distinguish between receive checksum offload
1515 		 * for IPv4 and IPv6 packets, so treat them as a pair.
1516 		 */
1517 		if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) {
1518 			if_togglecapenable(ifp, IFCAP_RXCSUM);
1519 			if_togglecapenable(ifp, IFCAP_RXCSUM_IPV6);
1520 		}
1521 		if (mask & IFCAP_LRO)
1522 			if_togglecapenable(ifp, IFCAP_LRO);
1523 		/* Both SW and HW TCP LRO require receive checksum offload. */
1524 		if ((if_getcapenable(ifp) &
1525 		    (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) == 0)
1526 			if_setcapenablebit(ifp, 0, IFCAP_LRO);
1527 	}
1528 
1529 	if (mask & IFCAP_VLAN_HWFILTER) {
1530 		/* These Rx features require renegotiation. */
1531 		reinit = 1;
1532 
1533 		if (mask & IFCAP_VLAN_HWFILTER)
1534 			if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER);
1535 	}
1536 
1537 	if (mask & IFCAP_VLAN_HWTSO)
1538 		if_togglecapenable(ifp, IFCAP_VLAN_HWTSO);
1539 	if (mask & IFCAP_VLAN_HWTAGGING)
1540 		if_togglecapenable(ifp, IFCAP_VLAN_HWTAGGING);
1541 
1542 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1543 		if (reinit) {
1544 			if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
1545 			vtnet_init_locked(sc, 0);
1546 		} else if (update)
1547 			vtnet_update_rx_offloads(sc);
1548 	}
1549 
1550 	return (0);
1551 }
1552 
1553 static int
1554 vtnet_ioctl(if_t ifp, u_long cmd, caddr_t data)
1555 {
1556 	struct vtnet_softc *sc;
1557 	struct ifreq *ifr;
1558 	int error;
1559 
1560 	sc = if_getsoftc(ifp);
1561 	ifr = (struct ifreq *) data;
1562 	error = 0;
1563 
1564 	switch (cmd) {
1565 	case SIOCSIFMTU:
1566 		VTNET_CORE_LOCK(sc);
1567 		error = vtnet_ioctl_mtu(sc, ifr->ifr_mtu);
1568 		VTNET_CORE_UNLOCK(sc);
1569 		break;
1570 
1571 	case SIOCSIFFLAGS:
1572 		VTNET_CORE_LOCK(sc);
1573 		error = vtnet_ioctl_ifflags(sc);
1574 		VTNET_CORE_UNLOCK(sc);
1575 		break;
1576 
1577 	case SIOCADDMULTI:
1578 	case SIOCDELMULTI:
1579 		VTNET_CORE_LOCK(sc);
1580 		error = vtnet_ioctl_multi(sc);
1581 		VTNET_CORE_UNLOCK(sc);
1582 		break;
1583 
1584 	case SIOCSIFMEDIA:
1585 	case SIOCGIFMEDIA:
1586 		error = ifmedia_ioctl(ifp, ifr, &sc->vtnet_media, cmd);
1587 		break;
1588 
1589 	case SIOCSIFCAP:
1590 		VTNET_CORE_LOCK(sc);
1591 		error = vtnet_ioctl_ifcap(sc, ifr);
1592 		VTNET_CORE_UNLOCK(sc);
1593 		VLAN_CAPABILITIES(ifp);
1594 		break;
1595 
1596 	default:
1597 		error = ether_ioctl(ifp, cmd, data);
1598 		break;
1599 	}
1600 
1601 	VTNET_CORE_LOCK_ASSERT_NOTOWNED(sc);
1602 
1603 	return (error);
1604 }
1605 
1606 static int
1607 vtnet_rxq_populate(struct vtnet_rxq *rxq)
1608 {
1609 	struct virtqueue *vq;
1610 	int nbufs, error;
1611 
1612 #ifdef DEV_NETMAP
1613 	error = vtnet_netmap_rxq_populate(rxq);
1614 	if (error >= 0)
1615 		return (error);
1616 #endif  /* DEV_NETMAP */
1617 
1618 	vq = rxq->vtnrx_vq;
1619 	error = ENOSPC;
1620 
1621 	for (nbufs = 0; !virtqueue_full(vq); nbufs++) {
1622 		error = vtnet_rxq_new_buf(rxq);
1623 		if (error)
1624 			break;
1625 	}
1626 
1627 	if (nbufs > 0) {
1628 		virtqueue_notify(vq);
1629 		/*
1630 		 * EMSGSIZE signifies the virtqueue did not have enough
1631 		 * entries available to hold the last mbuf. This is not
1632 		 * an error.
1633 		 */
1634 		if (error == EMSGSIZE)
1635 			error = 0;
1636 	}
1637 
1638 	return (error);
1639 }
1640 
1641 static void
1642 vtnet_rxq_free_mbufs(struct vtnet_rxq *rxq)
1643 {
1644 	struct virtqueue *vq;
1645 	struct mbuf *m;
1646 	int last;
1647 #ifdef DEV_NETMAP
1648 	struct netmap_kring *kring = netmap_kring_on(NA(rxq->vtnrx_sc->vtnet_ifp),
1649 							rxq->vtnrx_id, NR_RX);
1650 #else  /* !DEV_NETMAP */
1651 	void *kring = NULL;
1652 #endif /* !DEV_NETMAP */
1653 
1654 	vq = rxq->vtnrx_vq;
1655 	last = 0;
1656 
1657 	while ((m = virtqueue_drain(vq, &last)) != NULL) {
1658 		if (kring == NULL)
1659 			m_freem(m);
1660 	}
1661 
1662 	KASSERT(virtqueue_empty(vq),
1663 	    ("%s: mbufs remaining in rx queue %p", __func__, rxq));
1664 }
1665 
1666 static struct mbuf *
1667 vtnet_rx_alloc_buf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp)
1668 {
1669 	struct mbuf *m_head, *m_tail, *m;
1670 	struct vtnet_rx_buffer_header *vthdr;
1671 	bus_dma_segment_t segs[1];
1672 	bus_dmamap_t dmap;
1673 	int nsegs;
1674 	int err;
1675 	int i, size;
1676 
1677 	m_head = NULL;
1678 	size = sc->vtnet_rx_clustersz;
1679 
1680 	KASSERT(nbufs == 1 || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
1681 	    ("%s: mbuf %d chain requested without LRO_NOMRG", __func__, nbufs));
1682 
1683 	for (i = 0; i < nbufs; i++) {
1684 		m = m_getjcl(M_NOWAIT, MT_DATA, i == 0 ? M_PKTHDR : 0, size);
1685 		if (m == NULL) {
1686 			sc->vtnet_stats.mbuf_alloc_failed++;
1687 			m_freem(m_head);
1688 			return (NULL);
1689 		}
1690 
1691 		m->m_len = size;
1692 		vthdr = (struct vtnet_rx_buffer_header *)m->m_data;
1693 
1694 		/* Reserve space for header */
1695 		m_adj(m, VTNET_RX_BUFFER_HEADER_OFFSET);
1696 
1697 		/*
1698 		 * Need to offset the mbuf if the header we're going to add
1699 		 * will misalign.
1700 		 */
1701 		if (VTNET_ETHER_ALIGN != 0 && sc->vtnet_hdr_size % 4 == 0)
1702 			m_adj(m, VTNET_ETHER_ALIGN);
1703 
1704 		err = bus_dmamap_create(sc->vtnet_rx_dmat, 0, &dmap);
1705 		if (err) {
1706 			printf("Failed to create dmamap, err :%d\n",
1707 			    err);
1708 			m_freem(m);
1709 			return (NULL);
1710 		}
1711 
1712 		nsegs = 0;
1713 		err = bus_dmamap_load_mbuf_sg(sc->vtnet_rx_dmat, dmap, m, segs,
1714 		    &nsegs, BUS_DMA_NOWAIT);
1715 		if (err != 0) {
1716 			printf("Failed to map mbuf into DMA visible memory, err: %d\n",
1717 			    err);
1718 			m_freem(m);
1719 			bus_dmamap_destroy(sc->vtnet_rx_dmat, dmap);
1720 			return (NULL);
1721 		}
1722 		KASSERT(nsegs == 1,
1723 		    ("%s: unexpected number of DMA segments for rx buffer: %d",
1724 		    __func__, nsegs));
1725 
1726 		vthdr->addr = segs[0].ds_addr;
1727 		vthdr->dmap = dmap;
1728 
1729 		if (m_head != NULL) {
1730 			m_tail->m_next = m;
1731 			m_tail = m;
1732 		} else
1733 			m_head = m_tail = m;
1734 	}
1735 
1736 	if (m_tailp != NULL)
1737 		*m_tailp = m_tail;
1738 
1739 	return (m_head);
1740 }
1741 
1742 /*
1743  * Slow path for when LRO without mergeable buffers is negotiated.
1744  */
1745 static int
1746 vtnet_rxq_replace_lro_nomrg_buf(struct vtnet_rxq *rxq, struct mbuf *m0,
1747     int len0)
1748 {
1749 	struct vtnet_softc *sc;
1750 	struct mbuf *m, *m_prev, *m_new, *m_tail;
1751 	int len, clustersz, nreplace, error;
1752 
1753 	sc = rxq->vtnrx_sc;
1754 	clustersz = sc->vtnet_rx_clustersz - VTNET_RX_BUFFER_HEADER_OFFSET;
1755 	/*
1756 	 * Need to offset the mbuf if the header we're going to add will
1757 	 * misalign, account for that here.
1758 	 */
1759 	if (VTNET_ETHER_ALIGN != 0 && sc->vtnet_hdr_size % 4 == 0)
1760 		clustersz -= VTNET_ETHER_ALIGN;
1761 
1762 	m_prev = NULL;
1763 	m_tail = NULL;
1764 	nreplace = 0;
1765 
1766 	m = m0;
1767 	len = len0;
1768 
1769 	/*
1770 	 * Since these mbuf chains are so large, avoid allocating a complete
1771 	 * replacement when the received frame did not consume the entire
1772 	 * chain. Unused mbufs are moved to the tail of the replacement mbuf.
1773 	 */
1774 	while (len > 0) {
1775 		if (m == NULL) {
1776 			sc->vtnet_stats.rx_frame_too_large++;
1777 			return (EMSGSIZE);
1778 		}
1779 
1780 		/*
1781 		 * Every mbuf should have the expected cluster size since that
1782 		 * is also used to allocate the replacements.
1783 		 */
1784 		KASSERT(m->m_len == clustersz,
1785 		    ("%s: mbuf size %d not expected cluster size %d", __func__,
1786 		    m->m_len, clustersz));
1787 
1788 		m->m_len = MIN(m->m_len, len);
1789 		len -= m->m_len;
1790 
1791 		m_prev = m;
1792 		m = m->m_next;
1793 		nreplace++;
1794 	}
1795 
1796 	KASSERT(nreplace > 0 && nreplace <= sc->vtnet_rx_nmbufs,
1797 	    ("%s: invalid replacement mbuf count %d max %d", __func__,
1798 	    nreplace, sc->vtnet_rx_nmbufs));
1799 
1800 	m_new = vtnet_rx_alloc_buf(sc, nreplace, &m_tail);
1801 	if (m_new == NULL) {
1802 		m_prev->m_len = clustersz;
1803 		return (ENOBUFS);
1804 	}
1805 
1806 	/*
1807 	 * Move any unused mbufs from the received mbuf chain onto the
1808 	 * end of the replacement chain.
1809 	 */
1810 	if (m_prev->m_next != NULL) {
1811 		m_tail->m_next = m_prev->m_next;
1812 		m_prev->m_next = NULL;
1813 	}
1814 
1815 	error = vtnet_rxq_enqueue_buf(rxq, m_new);
1816 	if (error) {
1817 		/*
1818 		 * The replacement is suppose to be an copy of the one
1819 		 * dequeued so this is a very unexpected error.
1820 		 *
1821 		 * Restore the m0 chain to the original state if it was
1822 		 * modified so we can then discard it.
1823 		 */
1824 		if (m_tail->m_next != NULL) {
1825 			m_prev->m_next = m_tail->m_next;
1826 			m_tail->m_next = NULL;
1827 		}
1828 		m_prev->m_len = clustersz;
1829 		sc->vtnet_stats.rx_enq_replacement_failed++;
1830 		m_freem(m_new);
1831 	}
1832 
1833 	return (error);
1834 }
1835 
1836 static int
1837 vtnet_rxq_replace_buf(struct vtnet_rxq *rxq, struct mbuf *m, int len)
1838 {
1839 	struct vtnet_softc *sc;
1840 	struct mbuf *m_new;
1841 	int error;
1842 
1843 	sc = rxq->vtnrx_sc;
1844 
1845 	if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
1846 		return (vtnet_rxq_replace_lro_nomrg_buf(rxq, m, len));
1847 
1848 	MPASS(m->m_next == NULL);
1849 	if (m->m_len < len)
1850 		return (EMSGSIZE);
1851 
1852 	m_new = vtnet_rx_alloc_buf(sc, 1, NULL);
1853 	if (m_new == NULL)
1854 		return (ENOBUFS);
1855 
1856 	error = vtnet_rxq_enqueue_buf(rxq, m_new);
1857 	if (error) {
1858 		sc->vtnet_stats.rx_enq_replacement_failed++;
1859 		m_freem(m_new);
1860 	} else
1861 		m->m_len = len;
1862 
1863 	return (error);
1864 }
1865 
1866 static int
1867 vtnet_rxq_enqueue_buf(struct vtnet_rxq *rxq, struct mbuf *m)
1868 {
1869 	struct vtnet_rx_buffer_header *hdr;
1870 	struct vtnet_softc *sc;
1871 	struct sglist *sg;
1872 	int header_inlined, error;
1873 	bus_addr_t paddr;
1874 	struct mbuf *mp;
1875 
1876 	sc = rxq->vtnrx_sc;
1877 	sg = rxq->vtnrx_sg;
1878 
1879 	KASSERT(m->m_next == NULL || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
1880 	    ("%s: mbuf chain without LRO_NOMRG", __func__));
1881 	VTNET_RXQ_LOCK_ASSERT(rxq);
1882 
1883 	sglist_reset(sg);
1884 	header_inlined = vtnet_modern(sc) ||
1885 	    (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) != 0; /* TODO: ANY_LAYOUT */
1886 
1887 	hdr = vtnet_mbuf_to_rx_buffer_header(sc, m);
1888 	paddr = hdr->addr;
1889 
1890 	/*
1891 	 * Note: The mbuf has been already adjusted when we allocate it if we
1892 	 * have to do strict alignment.
1893 	 */
1894 	if (header_inlined) {
1895 		error = sglist_append_phys(sg, paddr, m->m_len);
1896 	} else {
1897 		MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr));
1898 
1899 		/* Append the header and remaining mbuf data. */
1900 		error = sglist_append_phys(sg, paddr, sc->vtnet_hdr_size);
1901 		if (error)
1902 			return (error);
1903 		error = sglist_append_phys(sg,
1904 		    paddr + sizeof(struct vtnet_rx_header),
1905 		    m->m_len - sizeof(struct vtnet_rx_header));
1906 		if (error)
1907 			return (error);
1908 
1909 		mp = m->m_next;
1910 		while (mp) {
1911 			hdr = vtnet_mbuf_to_rx_buffer_header(sc, mp);
1912 			paddr = hdr->addr;
1913 			error = sglist_append_phys(sg, paddr, mp->m_len);
1914 			if (error)
1915 				return (error);
1916 
1917 			mp = mp->m_next;
1918 		}
1919 	}
1920 
1921 	if (error)
1922 		return (error);
1923 
1924 	return (virtqueue_enqueue(rxq->vtnrx_vq, m, sg, 0, sg->sg_nseg));
1925 }
1926 
1927 static int
1928 vtnet_rxq_new_buf(struct vtnet_rxq *rxq)
1929 {
1930 	struct vtnet_softc *sc;
1931 	struct mbuf *m;
1932 	int error;
1933 
1934 	sc = rxq->vtnrx_sc;
1935 
1936 	m = vtnet_rx_alloc_buf(sc, sc->vtnet_rx_nmbufs, NULL);
1937 	if (m == NULL)
1938 		return (ENOBUFS);
1939 
1940 	error = vtnet_rxq_enqueue_buf(rxq, m);
1941 	if (error)
1942 		m_freem(m);
1943 
1944 	return (error);
1945 }
1946 
1947 #if defined(INET) || defined(INET6)
1948 static void
1949 vtnet_rxq_csum_needs_csum(struct vtnet_rxq *rxq, struct mbuf *m, bool isipv6,
1950     int protocol, struct virtio_net_hdr *hdr)
1951 {
1952 	/*
1953 	 * The packet is likely from another VM on the same host or from the
1954 	 * host that itself performed checksum offloading so Tx/Rx is basically
1955 	 * a memcpy and the checksum has little value so far.
1956 	 */
1957 
1958 	KASSERT(protocol == IPPROTO_TCP || protocol == IPPROTO_UDP,
1959 	    ("%s: unsupported IP protocol %d", __func__, protocol));
1960 
1961 	/*
1962 	 * Just forward the order to compute the checksum by setting
1963 	 * the corresponding mbuf flag (e.g., CSUM_TCP).
1964 	 */
1965 	switch (protocol) {
1966 	case IPPROTO_TCP:
1967 		m->m_pkthdr.csum_flags |= (isipv6 ? CSUM_TCP_IPV6 : CSUM_TCP);
1968 		break;
1969 	case IPPROTO_UDP:
1970 		m->m_pkthdr.csum_flags |= (isipv6 ? CSUM_UDP_IPV6 : CSUM_UDP);
1971 		break;
1972 	}
1973 	m->m_pkthdr.csum_data = hdr->csum_offset;
1974 }
1975 
1976 static void
1977 vtnet_rxq_csum_data_valid(struct vtnet_rxq *rxq, struct mbuf *m, int protocol)
1978 {
1979 	KASSERT(protocol == IPPROTO_TCP || protocol == IPPROTO_UDP,
1980 	    ("%s: unsupported IP protocol %d", __func__, protocol));
1981 
1982 	m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1983 	m->m_pkthdr.csum_data = 0xFFFF;
1984 }
1985 
1986 static int
1987 vtnet_rxq_csum(struct vtnet_rxq *rxq, struct mbuf *m,
1988     struct virtio_net_hdr *hdr)
1989 {
1990 	const struct ether_header *eh;
1991 	struct vtnet_softc *sc;
1992 	int hoff, protocol;
1993 	uint16_t etype;
1994 	bool isipv6;
1995 
1996 	KASSERT(hdr->flags &
1997 	    (VIRTIO_NET_HDR_F_NEEDS_CSUM | VIRTIO_NET_HDR_F_DATA_VALID),
1998 	    ("%s: missing checksum offloading flag %x", __func__, hdr->flags));
1999 
2000 	eh = mtod(m, const struct ether_header *);
2001 	etype = ntohs(eh->ether_type);
2002 	if (etype == ETHERTYPE_VLAN) {
2003 		/* TODO BMV: Handle QinQ. */
2004 		const struct ether_vlan_header *evh =
2005 		    mtod(m, const struct ether_vlan_header *);
2006 		etype = ntohs(evh->evl_proto);
2007 		hoff = sizeof(struct ether_vlan_header);
2008 	} else
2009 		hoff = sizeof(struct ether_header);
2010 
2011 	sc = rxq->vtnrx_sc;
2012 
2013 	/* Check whether ethernet type is IP or IPv6, and get protocol. */
2014 	switch (etype) {
2015 #if defined(INET)
2016 	case ETHERTYPE_IP:
2017 		if (__predict_false(m->m_len < hoff + sizeof(struct ip))) {
2018 			sc->vtnet_stats.rx_csum_inaccessible_ipproto++;
2019 			return (1);
2020 		} else {
2021 			struct ip *ip = (struct ip *)(m->m_data + hoff);
2022 			protocol = ip->ip_p;
2023 		}
2024 		isipv6 = false;
2025 		break;
2026 #endif
2027 #if defined(INET6)
2028 	case ETHERTYPE_IPV6:
2029 		if (__predict_false(m->m_len < hoff + sizeof(struct ip6_hdr))
2030 		    || ip6_lasthdr(m, hoff, IPPROTO_IPV6, &protocol) < 0) {
2031 			sc->vtnet_stats.rx_csum_inaccessible_ipproto++;
2032 			return (1);
2033 		}
2034 		isipv6 = true;
2035 		break;
2036 #endif
2037 	default:
2038 		sc->vtnet_stats.rx_csum_bad_ethtype++;
2039 		return (1);
2040 	}
2041 
2042 	/* Check whether protocol is TCP or UDP. */
2043 	switch (protocol) {
2044 	case IPPROTO_TCP:
2045 	case IPPROTO_UDP:
2046 		break;
2047 	default:
2048 		/*
2049 		 * FreeBSD does not support checksum offloading of this
2050 		 * protocol here.
2051 		 */
2052 		sc->vtnet_stats.rx_csum_bad_ipproto++;
2053 		return (1);
2054 	}
2055 
2056 	if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
2057 		vtnet_rxq_csum_needs_csum(rxq, m, isipv6, protocol, hdr);
2058 	else /* VIRTIO_NET_HDR_F_DATA_VALID */
2059 		vtnet_rxq_csum_data_valid(rxq, m, protocol);
2060 
2061 	return (0);
2062 }
2063 #endif
2064 
2065 static void
2066 vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *rxq, int nbufs)
2067 {
2068 	struct mbuf *m;
2069 
2070 	while (--nbufs > 0) {
2071 		m = virtqueue_dequeue(rxq->vtnrx_vq, NULL);
2072 		if (m == NULL)
2073 			break;
2074 		vtnet_rxq_discard_buf(rxq, m);
2075 	}
2076 }
2077 
2078 static void
2079 vtnet_rxq_discard_buf(struct vtnet_rxq *rxq, struct mbuf *m)
2080 {
2081 	int error __diagused;
2082 
2083 	/*
2084 	 * Requeue the discarded mbuf. This should always be successful
2085 	 * since it was just dequeued.
2086 	 */
2087 	error = vtnet_rxq_enqueue_buf(rxq, m);
2088 	KASSERT(error == 0,
2089 	    ("%s: cannot requeue discarded mbuf %d", __func__, error));
2090 }
2091 
2092 static int
2093 vtnet_rxq_merged_eof(struct vtnet_rxq *rxq, struct mbuf *m_head, int nbufs)
2094 {
2095 	struct vtnet_softc *sc;
2096 	struct virtqueue *vq;
2097 	struct mbuf *m_tail;
2098 
2099 	sc = rxq->vtnrx_sc;
2100 	vq = rxq->vtnrx_vq;
2101 	m_tail = m_head;
2102 
2103 	while (--nbufs > 0) {
2104 		struct vtnet_rx_buffer_header *vthdr;
2105 		struct mbuf *m;
2106 		uint32_t len;
2107 
2108 		m = virtqueue_dequeue(vq, &len);
2109 		if (m == NULL) {
2110 			rxq->vtnrx_stats.vrxs_ierrors++;
2111 			goto fail;
2112 		}
2113 
2114 		vthdr = vtnet_mbuf_to_rx_buffer_header(sc, m);
2115 		bus_dmamap_sync(sc->vtnet_rx_dmat, vthdr->dmap,
2116 		    BUS_DMASYNC_POSTREAD);
2117 
2118 		if (vtnet_rxq_new_buf(rxq) != 0) {
2119 			rxq->vtnrx_stats.vrxs_iqdrops++;
2120 			vtnet_rxq_discard_buf(rxq, m);
2121 			if (nbufs > 1)
2122 				vtnet_rxq_discard_merged_bufs(rxq, nbufs);
2123 			goto fail;
2124 		}
2125 
2126 		bus_dmamap_unload(sc->vtnet_rx_dmat, vthdr->dmap);
2127 		bus_dmamap_destroy(sc->vtnet_rx_dmat, vthdr->dmap);
2128 
2129 		if (m->m_len < len)
2130 			len = m->m_len;
2131 
2132 		m->m_len = len;
2133 		m->m_flags &= ~M_PKTHDR;
2134 
2135 		m_head->m_pkthdr.len += len;
2136 		m_tail->m_next = m;
2137 		m_tail = m;
2138 	}
2139 
2140 	return (0);
2141 
2142 fail:
2143 	sc->vtnet_stats.rx_mergeable_failed++;
2144 	m_freem(m_head);
2145 
2146 	return (1);
2147 }
2148 
2149 #if defined(INET) || defined(INET6)
2150 static int
2151 vtnet_lro_rx(struct vtnet_rxq *rxq, struct mbuf *m)
2152 {
2153 	struct lro_ctrl *lro;
2154 
2155 	lro = &rxq->vtnrx_lro;
2156 
2157 	if (lro->lro_mbuf_max != 0) {
2158 		tcp_lro_queue_mbuf(lro, m);
2159 		return (0);
2160 	}
2161 
2162 	return (tcp_lro_rx(lro, m, 0));
2163 }
2164 #endif
2165 
2166 static void
2167 vtnet_rxq_input(struct vtnet_rxq *rxq, struct mbuf *m,
2168     struct virtio_net_hdr *hdr)
2169 {
2170 	struct vtnet_softc *sc;
2171 	if_t ifp;
2172 
2173 	sc = rxq->vtnrx_sc;
2174 	ifp = sc->vtnet_ifp;
2175 
2176 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) {
2177 		struct ether_header *eh = mtod(m, struct ether_header *);
2178 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2179 			vtnet_vlan_tag_remove(m);
2180 			/*
2181 			 * With the 802.1Q header removed, update the
2182 			 * checksum starting location accordingly.
2183 			 */
2184 			if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
2185 				hdr->csum_start -= ETHER_VLAN_ENCAP_LEN;
2186 		}
2187 	}
2188 
2189 	if (sc->vtnet_act_vq_pairs == 1) {
2190 		/*
2191 		 * When RSS is not needed (one active rx queue), let the upper
2192 		 * layer know and react.
2193 		 */
2194 		M_HASHTYPE_CLEAR(m);
2195 	} else {
2196 		m->m_pkthdr.flowid = rxq->vtnrx_id;
2197 		M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2198 	}
2199 
2200 	if (hdr->flags &
2201 	    (VIRTIO_NET_HDR_F_NEEDS_CSUM | VIRTIO_NET_HDR_F_DATA_VALID)) {
2202 #if defined(INET) || defined(INET6)
2203 		if (vtnet_rxq_csum(rxq, m, hdr) == 0)
2204 			rxq->vtnrx_stats.vrxs_csum++;
2205 		else
2206 			rxq->vtnrx_stats.vrxs_csum_failed++;
2207 #else
2208 		sc->vtnet_stats.rx_csum_bad_ethtype++;
2209 		rxq->vtnrx_stats.vrxs_csum_failed++;
2210 #endif
2211 	}
2212 
2213 	if (hdr->gso_size != 0) {
2214 		switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
2215 		case VIRTIO_NET_HDR_GSO_TCPV4:
2216 		case VIRTIO_NET_HDR_GSO_TCPV6:
2217 			m->m_pkthdr.lro_nsegs =
2218 			    howmany(m->m_pkthdr.len, hdr->gso_size);
2219 			rxq->vtnrx_stats.vrxs_host_lro++;
2220 			break;
2221 		}
2222 	}
2223 
2224 	rxq->vtnrx_stats.vrxs_ipackets++;
2225 	rxq->vtnrx_stats.vrxs_ibytes += m->m_pkthdr.len;
2226 
2227 #if defined(INET) || defined(INET6)
2228 	if (vtnet_software_lro(sc) && if_getcapenable(ifp) & IFCAP_LRO) {
2229 		if (vtnet_lro_rx(rxq, m) == 0)
2230 			return;
2231 	}
2232 #endif
2233 
2234 	if_input(ifp, m);
2235 }
2236 
2237 static int
2238 vtnet_rxq_eof(struct vtnet_rxq *rxq)
2239 {
2240 	struct virtio_net_hdr lhdr, *hdr;
2241 	struct vtnet_rx_buffer_header *vthdr;
2242 	struct vtnet_softc *sc;
2243 	if_t ifp;
2244 	struct virtqueue *vq;
2245 	int deq, count;
2246 
2247 	sc = rxq->vtnrx_sc;
2248 	vq = rxq->vtnrx_vq;
2249 	ifp = sc->vtnet_ifp;
2250 	deq = 0;
2251 	count = sc->vtnet_rx_process_limit;
2252 
2253 	VTNET_RXQ_LOCK_ASSERT(rxq);
2254 
2255 	CURVNET_SET(if_getvnet(ifp));
2256 	while (count-- > 0) {
2257 		struct mbuf *m, *mp;
2258 		uint32_t len, nbufs, adjsz;
2259 		uint32_t synced;
2260 
2261 		m = virtqueue_dequeue(vq, &len);
2262 		if (m == NULL)
2263 			break;
2264 		deq++;
2265 
2266 		mp = m;
2267 
2268 		/*
2269 		 * Sync all mbufs in this packet. There will only be a single
2270 		 * mbuf unless LRO is in use.
2271 		 */
2272 		synced = 0;
2273 		while (mp && synced < len) {
2274 			vthdr = vtnet_mbuf_to_rx_buffer_header(sc, mp);
2275 			bus_dmamap_sync(sc->vtnet_rx_dmat, vthdr->dmap,
2276 			    BUS_DMASYNC_POSTREAD);
2277 
2278 			synced += mp->m_len;
2279 			mp = mp->m_next;
2280 		}
2281 
2282 		if (len < sc->vtnet_hdr_size + ETHER_HDR_LEN) {
2283 			rxq->vtnrx_stats.vrxs_ierrors++;
2284 			vtnet_rxq_discard_buf(rxq, m);
2285 			continue;
2286 		}
2287 
2288 		if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) {
2289 			struct virtio_net_hdr_mrg_rxbuf *mhdr =
2290 			    mtod(m, struct virtio_net_hdr_mrg_rxbuf *);
2291 			kmsan_mark(mhdr, sizeof(*mhdr), KMSAN_STATE_INITED);
2292 			nbufs = vtnet_htog16(sc, mhdr->num_buffers);
2293 			adjsz = sizeof(struct virtio_net_hdr_mrg_rxbuf);
2294 		} else if (vtnet_modern(sc)) {
2295 			nbufs = 1; /* num_buffers is always 1 */
2296 			adjsz = sizeof(struct virtio_net_hdr_v1);
2297 		} else {
2298 			nbufs = 1;
2299 			adjsz = sizeof(struct vtnet_rx_header);
2300 			/*
2301 			 * Account for our gap between the header and start of
2302 			 * data to keep the segments separated.
2303 			 */
2304 			len += VTNET_RX_HEADER_PAD;
2305 		}
2306 
2307 		if (vtnet_rxq_replace_buf(rxq, m, len) != 0) {
2308 			rxq->vtnrx_stats.vrxs_iqdrops++;
2309 			vtnet_rxq_discard_buf(rxq, m);
2310 			if (nbufs > 1)
2311 				vtnet_rxq_discard_merged_bufs(rxq, nbufs);
2312 			continue;
2313 		}
2314 
2315 		mp = m;
2316 		synced = 0;
2317 		while (mp && synced < len) {
2318 			vthdr = vtnet_mbuf_to_rx_buffer_header(sc, mp);
2319 
2320 			bus_dmamap_unload(sc->vtnet_rx_dmat, vthdr->dmap);
2321 			bus_dmamap_destroy(sc->vtnet_rx_dmat, vthdr->dmap);
2322 
2323 			synced += mp->m_len;
2324 			mp = mp->m_next;
2325 		}
2326 
2327 		m->m_pkthdr.len = len;
2328 		m->m_pkthdr.rcvif = ifp;
2329 		m->m_pkthdr.csum_flags = 0;
2330 
2331 		if (nbufs > 1) {
2332 			/* Dequeue the rest of chain. */
2333 			if (vtnet_rxq_merged_eof(rxq, m, nbufs) != 0)
2334 				continue;
2335 		}
2336 
2337 		kmsan_mark_mbuf(m, KMSAN_STATE_INITED);
2338 
2339 		/*
2340 		 * Save an endian swapped version of the header prior to it
2341 		 * being stripped. The header is always at the start of the
2342 		 * mbuf data. num_buffers was already saved (and not needed)
2343 		 * so use the standard header.
2344 		 */
2345 		hdr = mtod(m, struct virtio_net_hdr *);
2346 		lhdr.flags = hdr->flags;
2347 		lhdr.gso_type = hdr->gso_type;
2348 		lhdr.hdr_len = vtnet_htog16(sc, hdr->hdr_len);
2349 		lhdr.gso_size = vtnet_htog16(sc, hdr->gso_size);
2350 		lhdr.csum_start = vtnet_htog16(sc, hdr->csum_start);
2351 		lhdr.csum_offset = vtnet_htog16(sc, hdr->csum_offset);
2352 		m_adj(m, adjsz);
2353 
2354 		if (PFIL_HOOKED_IN(sc->vtnet_pfil)) {
2355 			pfil_return_t pfil;
2356 
2357 			pfil = pfil_mbuf_in(sc->vtnet_pfil, &m, ifp, NULL);
2358 			switch (pfil) {
2359 			case PFIL_DROPPED:
2360 			case PFIL_CONSUMED:
2361 				continue;
2362 			default:
2363 				KASSERT(pfil == PFIL_PASS,
2364 				    ("Filter returned %d!", pfil));
2365 			}
2366 		}
2367 
2368 		vtnet_rxq_input(rxq, m, &lhdr);
2369 	}
2370 
2371 	if (deq > 0) {
2372 #if defined(INET) || defined(INET6)
2373 		if (vtnet_software_lro(sc))
2374 			tcp_lro_flush_all(&rxq->vtnrx_lro);
2375 #endif
2376 		virtqueue_notify(vq);
2377 	}
2378 	CURVNET_RESTORE();
2379 
2380 	return (count > 0 ? 0 : EAGAIN);
2381 }
2382 
2383 static void
2384 vtnet_rx_vq_process(struct vtnet_rxq *rxq, int tries)
2385 {
2386 	struct vtnet_softc *sc;
2387 	if_t ifp;
2388 	u_int more;
2389 #ifdef DEV_NETMAP
2390 	int nmirq;
2391 #endif /* DEV_NETMAP */
2392 
2393 	sc = rxq->vtnrx_sc;
2394 	ifp = sc->vtnet_ifp;
2395 
2396 	if (__predict_false(rxq->vtnrx_id >= sc->vtnet_act_vq_pairs)) {
2397 		/*
2398 		 * Ignore this interrupt. Either this is a spurious interrupt
2399 		 * or multiqueue without per-VQ MSIX so every queue needs to
2400 		 * be polled (a brain dead configuration we could try harder
2401 		 * to avoid).
2402 		 */
2403 		vtnet_rxq_disable_intr(rxq);
2404 		return;
2405 	}
2406 
2407 	VTNET_RXQ_LOCK(rxq);
2408 
2409 #ifdef DEV_NETMAP
2410 	/*
2411 	 * We call netmap_rx_irq() under lock to prevent concurrent calls.
2412 	 * This is not necessary to serialize the access to the RX vq, but
2413 	 * rather to avoid races that may happen if this interface is
2414 	 * attached to a VALE switch, which would cause received packets
2415 	 * to stall in the RX queue (nm_kr_tryget() could find the kring
2416 	 * busy when called from netmap_bwrap_intr_notify()).
2417 	 */
2418 	nmirq = netmap_rx_irq(ifp, rxq->vtnrx_id, &more);
2419 	if (nmirq != NM_IRQ_PASS) {
2420 		VTNET_RXQ_UNLOCK(rxq);
2421 		if (nmirq == NM_IRQ_RESCHED) {
2422 			taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
2423 		}
2424 		return;
2425 	}
2426 #endif /* DEV_NETMAP */
2427 
2428 again:
2429 	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
2430 		VTNET_RXQ_UNLOCK(rxq);
2431 		return;
2432 	}
2433 
2434 	more = vtnet_rxq_eof(rxq);
2435 	if (more || vtnet_rxq_enable_intr(rxq) != 0) {
2436 		if (!more)
2437 			vtnet_rxq_disable_intr(rxq);
2438 		/*
2439 		 * This is an occasional condition or race (when !more),
2440 		 * so retry a few times before scheduling the taskqueue.
2441 		 */
2442 		if (tries-- > 0)
2443 			goto again;
2444 
2445 		rxq->vtnrx_stats.vrxs_rescheduled++;
2446 		VTNET_RXQ_UNLOCK(rxq);
2447 		taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
2448 	} else
2449 		VTNET_RXQ_UNLOCK(rxq);
2450 }
2451 
2452 static void
2453 vtnet_rx_vq_intr(void *xrxq)
2454 {
2455 	struct vtnet_rxq *rxq;
2456 
2457 	rxq = xrxq;
2458 	vtnet_rx_vq_process(rxq, VTNET_INTR_DISABLE_RETRIES);
2459 }
2460 
2461 static void
2462 vtnet_rxq_tq_intr(void *xrxq, int pending __unused)
2463 {
2464 	struct vtnet_rxq *rxq;
2465 
2466 	rxq = xrxq;
2467 	vtnet_rx_vq_process(rxq, 0);
2468 }
2469 
2470 static int
2471 vtnet_txq_intr_threshold(struct vtnet_txq *txq)
2472 {
2473 	struct vtnet_softc *sc;
2474 	int threshold;
2475 
2476 	sc = txq->vtntx_sc;
2477 
2478 	/*
2479 	 * The Tx interrupt is disabled until the queue free count falls
2480 	 * below our threshold. Completed frames are drained from the Tx
2481 	 * virtqueue before transmitting new frames and in the watchdog
2482 	 * callout, so the frequency of Tx interrupts is greatly reduced,
2483 	 * at the cost of not freeing mbufs as quickly as they otherwise
2484 	 * would be.
2485 	 */
2486 	threshold = virtqueue_size(txq->vtntx_vq) / 4;
2487 
2488 	/*
2489 	 * Without indirect descriptors, leave enough room for the most
2490 	 * segments we handle.
2491 	 */
2492 	if ((sc->vtnet_flags & VTNET_FLAG_INDIRECT) == 0 &&
2493 	    threshold < sc->vtnet_tx_nsegs)
2494 		threshold = sc->vtnet_tx_nsegs;
2495 
2496 	return (threshold);
2497 }
2498 
2499 static int
2500 vtnet_txq_below_threshold(struct vtnet_txq *txq)
2501 {
2502 	struct virtqueue *vq;
2503 
2504 	vq = txq->vtntx_vq;
2505 
2506 	return (virtqueue_nfree(vq) <= txq->vtntx_intr_threshold);
2507 }
2508 
2509 static int
2510 vtnet_txq_notify(struct vtnet_txq *txq)
2511 {
2512 	struct virtqueue *vq;
2513 
2514 	vq = txq->vtntx_vq;
2515 
2516 	txq->vtntx_watchdog = VTNET_TX_TIMEOUT;
2517 	virtqueue_notify(vq);
2518 
2519 	if (vtnet_txq_enable_intr(txq) == 0)
2520 		return (0);
2521 
2522 	/*
2523 	 * Drain frames that were completed since last checked. If this
2524 	 * causes the queue to go above the threshold, the caller should
2525 	 * continue transmitting.
2526 	 */
2527 	if (vtnet_txq_eof(txq) != 0 && vtnet_txq_below_threshold(txq) == 0) {
2528 		virtqueue_disable_intr(vq);
2529 		return (1);
2530 	}
2531 
2532 	return (0);
2533 }
2534 
2535 static void
2536 vtnet_txq_free_mbufs(struct vtnet_txq *txq)
2537 {
2538 	struct virtqueue *vq;
2539 	struct vtnet_tx_header *txhdr;
2540 	int last;
2541 #ifdef DEV_NETMAP
2542 	struct netmap_kring *kring = netmap_kring_on(NA(txq->vtntx_sc->vtnet_ifp),
2543 							txq->vtntx_id, NR_TX);
2544 #else  /* !DEV_NETMAP */
2545 	void *kring = NULL;
2546 #endif /* !DEV_NETMAP */
2547 
2548 	vq = txq->vtntx_vq;
2549 	last = 0;
2550 
2551 	while ((txhdr = virtqueue_drain(vq, &last)) != NULL) {
2552 		if (kring == NULL) {
2553 			bus_dmamap_unload(txq->vtntx_sc->vtnet_tx_dmat,
2554 			    txhdr->dmap);
2555 			bus_dmamap_destroy(txq->vtntx_sc->vtnet_tx_dmat,
2556 			    txhdr->dmap);
2557 			bus_dmamap_unload(txq->vtntx_sc->vtnet_tx_dmat,
2558 			    txhdr->hdr_dmap);
2559 			bus_dmamap_destroy(txq->vtntx_sc->vtnet_tx_dmat,
2560 			    txhdr->hdr_dmap);
2561 			m_freem(txhdr->vth_mbuf);
2562 			uma_zfree(vtnet_tx_header_zone, txhdr);
2563 		}
2564 	}
2565 
2566 	KASSERT(virtqueue_empty(vq),
2567 	    ("%s: mbufs remaining in tx queue %p", __func__, txq));
2568 }
2569 
2570 /*
2571  * BMV: This can go away once we finally have offsets in the mbuf header.
2572  */
2573 static int
2574 vtnet_txq_offload_ctx(struct vtnet_txq *txq, struct mbuf *m, int *etype,
2575     int *proto, int *start)
2576 {
2577 	struct vtnet_softc *sc;
2578 	struct ether_vlan_header *evh;
2579 #if defined(INET) || defined(INET6)
2580 	int offset;
2581 #endif
2582 
2583 	sc = txq->vtntx_sc;
2584 
2585 	evh = mtod(m, struct ether_vlan_header *);
2586 	if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2587 		/* BMV: We should handle nested VLAN tags too. */
2588 		*etype = ntohs(evh->evl_proto);
2589 #if defined(INET) || defined(INET6)
2590 		offset = sizeof(struct ether_vlan_header);
2591 #endif
2592 	} else {
2593 		*etype = ntohs(evh->evl_encap_proto);
2594 #if defined(INET) || defined(INET6)
2595 		offset = sizeof(struct ether_header);
2596 #endif
2597 	}
2598 
2599 	switch (*etype) {
2600 #if defined(INET)
2601 	case ETHERTYPE_IP: {
2602 		struct ip *ip, iphdr;
2603 		if (__predict_false(m->m_len < offset + sizeof(struct ip))) {
2604 			m_copydata(m, offset, sizeof(struct ip),
2605 			    (caddr_t) &iphdr);
2606 			ip = &iphdr;
2607 		} else
2608 			ip = (struct ip *)(m->m_data + offset);
2609 		*proto = ip->ip_p;
2610 		*start = offset + (ip->ip_hl << 2);
2611 		break;
2612 	}
2613 #endif
2614 #if defined(INET6)
2615 	case ETHERTYPE_IPV6:
2616 		*proto = -1;
2617 		*start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto);
2618 		/* Assert the network stack sent us a valid packet. */
2619 		KASSERT(*start > offset,
2620 		    ("%s: mbuf %p start %d offset %d proto %d", __func__, m,
2621 		    *start, offset, *proto));
2622 		break;
2623 #endif
2624 	default:
2625 		sc->vtnet_stats.tx_csum_unknown_ethtype++;
2626 		return (EINVAL);
2627 	}
2628 
2629 	return (0);
2630 }
2631 
2632 static int
2633 vtnet_txq_offload_tso(struct vtnet_txq *txq, struct mbuf *m, int eth_type,
2634     int offset, struct virtio_net_hdr *hdr)
2635 {
2636 	static struct timeval lastecn;
2637 	static int curecn;
2638 	struct vtnet_softc *sc;
2639 	struct tcphdr *tcp, tcphdr;
2640 
2641 	sc = txq->vtntx_sc;
2642 
2643 	if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) {
2644 		m_copydata(m, offset, sizeof(struct tcphdr), (caddr_t) &tcphdr);
2645 		tcp = &tcphdr;
2646 	} else
2647 		tcp = (struct tcphdr *)(m->m_data + offset);
2648 
2649 	hdr->hdr_len = vtnet_gtoh16(sc, offset + (tcp->th_off << 2));
2650 	hdr->gso_size = vtnet_gtoh16(sc, m->m_pkthdr.tso_segsz);
2651 	hdr->gso_type = eth_type == ETHERTYPE_IP ? VIRTIO_NET_HDR_GSO_TCPV4 :
2652 	    VIRTIO_NET_HDR_GSO_TCPV6;
2653 
2654 	if (__predict_false(tcp_get_flags(tcp) & TH_CWR)) {
2655 		/*
2656 		 * Drop if VIRTIO_NET_F_HOST_ECN was not negotiated. In
2657 		 * FreeBSD, ECN support is not on a per-interface basis,
2658 		 * but globally via the net.inet.tcp.ecn.enable sysctl
2659 		 * knob. The default is off.
2660 		 */
2661 		if ((sc->vtnet_flags & VTNET_FLAG_TSO_ECN) == 0) {
2662 			if (ppsratecheck(&lastecn, &curecn, 1))
2663 				if_printf(sc->vtnet_ifp,
2664 				    "TSO with ECN not negotiated with host\n");
2665 			return (ENOTSUP);
2666 		}
2667 		hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
2668 	}
2669 
2670 	txq->vtntx_stats.vtxs_tso++;
2671 
2672 	return (0);
2673 }
2674 
2675 static struct mbuf *
2676 vtnet_txq_offload(struct vtnet_txq *txq, struct mbuf *m,
2677     struct virtio_net_hdr *hdr)
2678 {
2679 	struct vtnet_softc *sc;
2680 	int flags, etype, csum_start, proto, error;
2681 
2682 	sc = txq->vtntx_sc;
2683 	flags = m->m_pkthdr.csum_flags;
2684 
2685 	error = vtnet_txq_offload_ctx(txq, m, &etype, &proto, &csum_start);
2686 	if (error)
2687 		goto drop;
2688 
2689 	if (flags & (VTNET_CSUM_OFFLOAD | VTNET_CSUM_OFFLOAD_IPV6)) {
2690 		/* Sanity check the parsed mbuf matches the offload flags. */
2691 		if (__predict_false((flags & VTNET_CSUM_OFFLOAD &&
2692 		    etype != ETHERTYPE_IP) || (flags & VTNET_CSUM_OFFLOAD_IPV6
2693 		    && etype != ETHERTYPE_IPV6))) {
2694 			sc->vtnet_stats.tx_csum_proto_mismatch++;
2695 			goto drop;
2696 		}
2697 
2698 		hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM;
2699 		hdr->csum_start = vtnet_gtoh16(sc, csum_start);
2700 		hdr->csum_offset = vtnet_gtoh16(sc, m->m_pkthdr.csum_data);
2701 		txq->vtntx_stats.vtxs_csum++;
2702 	}
2703 
2704 	if (flags & (CSUM_IP_TSO | CSUM_IP6_TSO)) {
2705 		/*
2706 		 * Sanity check the parsed mbuf IP protocol is TCP, and
2707 		 * VirtIO TSO reqires the checksum offloading above.
2708 		 */
2709 		if (__predict_false(proto != IPPROTO_TCP)) {
2710 			sc->vtnet_stats.tx_tso_not_tcp++;
2711 			goto drop;
2712 		} else if (__predict_false((hdr->flags &
2713 		    VIRTIO_NET_HDR_F_NEEDS_CSUM) == 0)) {
2714 			sc->vtnet_stats.tx_tso_without_csum++;
2715 			goto drop;
2716 		}
2717 
2718 		error = vtnet_txq_offload_tso(txq, m, etype, csum_start, hdr);
2719 		if (error)
2720 			goto drop;
2721 	}
2722 
2723 	return (m);
2724 
2725 drop:
2726 	m_freem(m);
2727 	return (NULL);
2728 }
2729 
2730 static void
2731 vtnet_txq_enqueue_callback(void *arg, bus_dma_segment_t *segs,
2732     int nsegs, int error)
2733 {
2734 	vm_paddr_t *hdr_paddr;
2735 
2736 	if (error != 0)
2737 		return;
2738 
2739 	KASSERT(nsegs == 1, ("%s: %d segments returned!", __func__, nsegs));
2740 
2741 	hdr_paddr = (vm_paddr_t *)arg;
2742 	*hdr_paddr = segs[0].ds_addr;
2743 }
2744 
2745 static int
2746 vtnet_txq_enqueue_buf(struct vtnet_txq *txq, struct mbuf **m_head,
2747     struct vtnet_tx_header *txhdr)
2748 {
2749 	bus_dma_segment_t segs[VTNET_TX_SEGS_MAX];
2750 	int nsegs;
2751 	struct vtnet_softc *sc;
2752 	struct virtqueue *vq;
2753 	struct sglist *sg;
2754 	struct mbuf *m;
2755 	int error;
2756 	vm_paddr_t hdr_paddr;
2757 	bus_dmamap_t hdr_dmap;
2758 	bus_dmamap_t dmap;
2759 	int i;
2760 
2761 	sc = txq->vtntx_sc;
2762 	vq = txq->vtntx_vq;
2763 	sg = txq->vtntx_sg;
2764 	m = *m_head;
2765 
2766 	sglist_reset(sg);
2767 
2768 	error = bus_dmamap_create(sc->vtnet_tx_dmat, 0, &hdr_dmap);
2769 	if (error)
2770 	    goto fail;
2771 
2772 	error = bus_dmamap_load(sc->vtnet_tx_dmat, hdr_dmap, &txhdr->vth_uhdr,
2773 	    sc->vtnet_hdr_size, vtnet_txq_enqueue_callback, &hdr_paddr,
2774 	    BUS_DMA_NOWAIT);
2775 	if (error)
2776 		goto fail_hdr_dmamap_destroy;
2777 
2778 	error = sglist_append_phys(sg, hdr_paddr, sc->vtnet_hdr_size);
2779 	if (error != 0 || sg->sg_nseg != 1) {
2780 		KASSERT(0, ("%s: cannot add header to sglist error %d nseg %d",
2781 		    __func__, error, sg->sg_nseg));
2782 		goto fail_hdr_dmamap_unload;
2783 	}
2784 
2785 	bus_dmamap_sync(sc->vtnet_tx_dmat, hdr_dmap, BUS_DMASYNC_PREWRITE);
2786 
2787 	error = bus_dmamap_create(sc->vtnet_tx_dmat, 0, &dmap);
2788 	if (error)
2789 		goto fail_hdr_dmamap_unload;
2790 
2791 	nsegs = 0;
2792 	error = bus_dmamap_load_mbuf_sg(sc->vtnet_tx_dmat, dmap, m, segs,
2793 	    &nsegs, BUS_DMA_NOWAIT);
2794 	if (error != 0)
2795 		goto fail_dmamap_destroy;
2796 	KASSERT(nsegs <= sc->vtnet_tx_nsegs,
2797 	    ("%s: unexpected number of DMA segments for tx buffer: %d (max %d)",
2798 	    __func__, nsegs, sc->vtnet_tx_nsegs));
2799 
2800 	bus_dmamap_sync(sc->vtnet_tx_dmat, dmap, BUS_DMASYNC_PREWRITE);
2801 
2802 	for (i = 0; i < nsegs && !error; i++)
2803 		error = sglist_append_phys(sg, segs[i].ds_addr, segs[i].ds_len);
2804 
2805 	if (error) {
2806 		sglist_reset(sg);
2807 		bus_dmamap_unload(sc->vtnet_tx_dmat, dmap);
2808 
2809 		error = sglist_append_phys(sg, hdr_paddr, sc->vtnet_hdr_size);
2810 		if (error != 0 || sg->sg_nseg != 1) {
2811 			KASSERT(0, ("%s: cannot add header to sglist error %d nseg %d",
2812 			    __func__, error, sg->sg_nseg));
2813 			goto fail_dmamap_destroy;
2814 		}
2815 
2816 		m = m_defrag(m, M_NOWAIT);
2817 		if (m == NULL) {
2818 			sc->vtnet_stats.tx_defrag_failed++;
2819 			goto fail;
2820 		}
2821 
2822 		*m_head = m;
2823 		sc->vtnet_stats.tx_defragged++;
2824 
2825 		nsegs = 0;
2826 		error = bus_dmamap_load_mbuf_sg(sc->vtnet_tx_dmat, dmap, m,
2827 		    segs, &nsegs, BUS_DMA_NOWAIT);
2828 		if (error != 0)
2829 			goto fail_dmamap_destroy;
2830 		KASSERT(nsegs <= sc->vtnet_tx_nsegs,
2831 		    ("%s: unexpected number of DMA segments for tx buffer: %d (max %d)",
2832 		    __func__, nsegs, sc->vtnet_tx_nsegs));
2833 
2834 		bus_dmamap_sync(sc->vtnet_tx_dmat, dmap, BUS_DMASYNC_PREWRITE);
2835 
2836 		for (i = 0; i < nsegs && !error; i++)
2837 			error = sglist_append_phys(sg, segs[i].ds_addr,
2838 			    segs[i].ds_len);
2839 
2840 		if (error)
2841 			goto fail_dmamap_unload;
2842 	}
2843 
2844 	txhdr->vth_mbuf = m;
2845 	txhdr->dmap = dmap;
2846 	txhdr->hdr_dmap = hdr_dmap;
2847 
2848 	error = virtqueue_enqueue(vq, txhdr, sg, sg->sg_nseg, 0);
2849 
2850 	return (error);
2851 
2852 fail_dmamap_unload:
2853 	bus_dmamap_unload(sc->vtnet_tx_dmat, dmap);
2854 fail_dmamap_destroy:
2855 	bus_dmamap_destroy(sc->vtnet_tx_dmat, dmap);
2856 fail_hdr_dmamap_unload:
2857 	bus_dmamap_unload(sc->vtnet_tx_dmat, hdr_dmap);
2858 fail_hdr_dmamap_destroy:
2859 	bus_dmamap_destroy(sc->vtnet_tx_dmat, hdr_dmap);
2860 fail:
2861 	m_freem(*m_head);
2862 	*m_head = NULL;
2863 
2864 	return (ENOBUFS);
2865 }
2866 
2867 static int
2868 vtnet_txq_encap(struct vtnet_txq *txq, struct mbuf **m_head, int flags)
2869 {
2870 	struct vtnet_tx_header *txhdr;
2871 	struct virtio_net_hdr *hdr;
2872 	struct mbuf *m;
2873 	int error;
2874 
2875 	m = *m_head;
2876 	M_ASSERTPKTHDR(m);
2877 
2878 	txhdr = uma_zalloc(vtnet_tx_header_zone, flags | M_ZERO);
2879 	if (txhdr == NULL) {
2880 		m_freem(m);
2881 		*m_head = NULL;
2882 		return (ENOMEM);
2883 	}
2884 
2885 	/*
2886 	 * Always use the non-mergeable header, regardless if mergable headers
2887 	 * were negotiated, because for transmit num_buffers is always zero.
2888 	 * The vtnet_hdr_size is used to enqueue the right header size segment.
2889 	 */
2890 	hdr = &txhdr->vth_uhdr.hdr;
2891 
2892 	if (m->m_flags & M_VLANTAG) {
2893 		m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
2894 		if ((*m_head = m) == NULL) {
2895 			error = ENOBUFS;
2896 			goto fail;
2897 		}
2898 		m->m_flags &= ~M_VLANTAG;
2899 	}
2900 
2901 	if (m->m_pkthdr.csum_flags & VTNET_CSUM_ALL_OFFLOAD) {
2902 		m = vtnet_txq_offload(txq, m, hdr);
2903 		if ((*m_head = m) == NULL) {
2904 			error = ENOBUFS;
2905 			goto fail;
2906 		}
2907 	}
2908 
2909 	error = vtnet_txq_enqueue_buf(txq, m_head, txhdr);
2910 fail:
2911 	if (error)
2912 		uma_zfree(vtnet_tx_header_zone, txhdr);
2913 
2914 	return (error);
2915 }
2916 
2917 
2918 static void
2919 vtnet_start_locked(struct vtnet_txq *txq, if_t ifp)
2920 {
2921 	struct vtnet_softc *sc;
2922 	struct virtqueue *vq;
2923 	struct mbuf *m0;
2924 	int tries, enq;
2925 
2926 	sc = txq->vtntx_sc;
2927 	vq = txq->vtntx_vq;
2928 	tries = 0;
2929 
2930 	VTNET_TXQ_LOCK_ASSERT(txq);
2931 
2932 	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0 ||
2933 	    sc->vtnet_link_active == 0)
2934 		return;
2935 
2936 	vtnet_txq_eof(txq);
2937 
2938 again:
2939 	enq = 0;
2940 
2941 	while (!if_sendq_empty(ifp)) {
2942 		if (virtqueue_full(vq))
2943 			break;
2944 
2945 		m0 = if_dequeue(ifp);
2946 		if (m0 == NULL)
2947 			break;
2948 
2949 		if (vtnet_txq_encap(txq, &m0, M_NOWAIT) != 0) {
2950 			if (m0 != NULL)
2951 				if_sendq_prepend(ifp, m0);
2952 			break;
2953 		}
2954 
2955 		enq++;
2956 		ETHER_BPF_MTAP(ifp, m0);
2957 	}
2958 
2959 	if (enq > 0 && vtnet_txq_notify(txq) != 0) {
2960 		if (tries++ < VTNET_NOTIFY_RETRIES)
2961 			goto again;
2962 
2963 		txq->vtntx_stats.vtxs_rescheduled++;
2964 		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
2965 	}
2966 }
2967 
2968 static void
2969 vtnet_start(if_t ifp)
2970 {
2971 	struct vtnet_softc *sc;
2972 	struct vtnet_txq *txq;
2973 
2974 	sc = if_getsoftc(ifp);
2975 	txq = &sc->vtnet_txqs[0];
2976 
2977 	VTNET_TXQ_LOCK(txq);
2978 	vtnet_start_locked(txq, ifp);
2979 	VTNET_TXQ_UNLOCK(txq);
2980 }
2981 
2982 
2983 static int
2984 vtnet_txq_mq_start_locked(struct vtnet_txq *txq, struct mbuf *m)
2985 {
2986 	struct vtnet_softc *sc;
2987 	struct virtqueue *vq;
2988 	struct buf_ring *br;
2989 	if_t ifp;
2990 	int enq, tries, error;
2991 
2992 	sc = txq->vtntx_sc;
2993 	vq = txq->vtntx_vq;
2994 	br = txq->vtntx_br;
2995 	ifp = sc->vtnet_ifp;
2996 	tries = 0;
2997 	error = 0;
2998 
2999 	VTNET_TXQ_LOCK_ASSERT(txq);
3000 
3001 	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0 ||
3002 	    sc->vtnet_link_active == 0) {
3003 		if (m != NULL)
3004 			error = drbr_enqueue(ifp, br, m);
3005 		return (error);
3006 	}
3007 
3008 	if (m != NULL) {
3009 		error = drbr_enqueue(ifp, br, m);
3010 		if (error)
3011 			return (error);
3012 	}
3013 
3014 	vtnet_txq_eof(txq);
3015 
3016 again:
3017 	enq = 0;
3018 
3019 	while ((m = drbr_peek(ifp, br)) != NULL) {
3020 		if (virtqueue_full(vq)) {
3021 			drbr_putback(ifp, br, m);
3022 			break;
3023 		}
3024 
3025 		if (vtnet_txq_encap(txq, &m, M_NOWAIT) != 0) {
3026 			if (m != NULL)
3027 				drbr_putback(ifp, br, m);
3028 			else
3029 				drbr_advance(ifp, br);
3030 			break;
3031 		}
3032 		drbr_advance(ifp, br);
3033 
3034 		enq++;
3035 		ETHER_BPF_MTAP(ifp, m);
3036 	}
3037 
3038 	if (enq > 0 && vtnet_txq_notify(txq) != 0) {
3039 		if (tries++ < VTNET_NOTIFY_RETRIES)
3040 			goto again;
3041 
3042 		txq->vtntx_stats.vtxs_rescheduled++;
3043 		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
3044 	}
3045 
3046 	return (0);
3047 }
3048 
3049 static int
3050 vtnet_txq_mq_start(if_t ifp, struct mbuf *m)
3051 {
3052 	struct vtnet_softc *sc;
3053 	struct vtnet_txq *txq;
3054 	int i, npairs, error;
3055 
3056 	sc = if_getsoftc(ifp);
3057 	npairs = sc->vtnet_act_vq_pairs;
3058 
3059 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
3060 		i = m->m_pkthdr.flowid % npairs;
3061 	else
3062 		i = curcpu % npairs;
3063 
3064 	txq = &sc->vtnet_txqs[i];
3065 
3066 	if (VTNET_TXQ_TRYLOCK(txq) != 0) {
3067 		error = vtnet_txq_mq_start_locked(txq, m);
3068 		VTNET_TXQ_UNLOCK(txq);
3069 	} else {
3070 		error = drbr_enqueue(ifp, txq->vtntx_br, m);
3071 		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_defrtask);
3072 	}
3073 
3074 	return (error);
3075 }
3076 
3077 static void
3078 vtnet_txq_tq_deferred(void *xtxq, int pending __unused)
3079 {
3080 	struct vtnet_softc *sc;
3081 	struct vtnet_txq *txq;
3082 
3083 	txq = xtxq;
3084 	sc = txq->vtntx_sc;
3085 
3086 	VTNET_TXQ_LOCK(txq);
3087 	if (!drbr_empty(sc->vtnet_ifp, txq->vtntx_br))
3088 		vtnet_txq_mq_start_locked(txq, NULL);
3089 	VTNET_TXQ_UNLOCK(txq);
3090 }
3091 
3092 
3093 static void
3094 vtnet_txq_start(struct vtnet_txq *txq)
3095 {
3096 	struct vtnet_softc *sc;
3097 	if_t ifp;
3098 
3099 	sc = txq->vtntx_sc;
3100 	ifp = sc->vtnet_ifp;
3101 
3102 	if (!VTNET_ALTQ_ENABLED) {
3103 		if (!drbr_empty(ifp, txq->vtntx_br))
3104 			vtnet_txq_mq_start_locked(txq, NULL);
3105 	} else {
3106 		if (!if_sendq_empty(ifp))
3107 			vtnet_start_locked(txq, ifp);
3108 
3109 	}
3110 }
3111 
3112 static void
3113 vtnet_txq_tq_intr(void *xtxq, int pending __unused)
3114 {
3115 	struct vtnet_softc *sc;
3116 	struct vtnet_txq *txq;
3117 	if_t ifp;
3118 
3119 	txq = xtxq;
3120 	sc = txq->vtntx_sc;
3121 	ifp = sc->vtnet_ifp;
3122 
3123 	VTNET_TXQ_LOCK(txq);
3124 
3125 	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
3126 		VTNET_TXQ_UNLOCK(txq);
3127 		return;
3128 	}
3129 
3130 	vtnet_txq_eof(txq);
3131 	vtnet_txq_start(txq);
3132 
3133 	VTNET_TXQ_UNLOCK(txq);
3134 }
3135 
3136 static int
3137 vtnet_txq_eof(struct vtnet_txq *txq)
3138 {
3139 	struct vtnet_softc *sc;
3140 	struct virtqueue *vq;
3141 	struct vtnet_tx_header *txhdr;
3142 	struct mbuf *m;
3143 	int deq;
3144 
3145 	vq = txq->vtntx_vq;
3146 	deq = 0;
3147 	VTNET_TXQ_LOCK_ASSERT(txq);
3148 
3149 	sc = txq->vtntx_sc;
3150 
3151 	while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) {
3152 		m = txhdr->vth_mbuf;
3153 		deq++;
3154 
3155 		txq->vtntx_stats.vtxs_opackets++;
3156 		txq->vtntx_stats.vtxs_obytes += m->m_pkthdr.len;
3157 		if (m->m_flags & M_MCAST)
3158 			txq->vtntx_stats.vtxs_omcasts++;
3159 
3160 		bus_dmamap_unload(sc->vtnet_tx_dmat, txhdr->dmap);
3161 		bus_dmamap_destroy(sc->vtnet_tx_dmat, txhdr->dmap);
3162 		bus_dmamap_unload(sc->vtnet_tx_dmat, txhdr->hdr_dmap);
3163 		bus_dmamap_destroy(sc->vtnet_tx_dmat, txhdr->hdr_dmap);
3164 
3165 		m_freem(m);
3166 		uma_zfree(vtnet_tx_header_zone, txhdr);
3167 	}
3168 
3169 	if (virtqueue_empty(vq))
3170 		txq->vtntx_watchdog = 0;
3171 
3172 	return (deq);
3173 }
3174 
3175 static void
3176 vtnet_tx_vq_intr(void *xtxq)
3177 {
3178 	struct vtnet_softc *sc;
3179 	struct vtnet_txq *txq;
3180 	if_t ifp;
3181 
3182 	txq = xtxq;
3183 	sc = txq->vtntx_sc;
3184 	ifp = sc->vtnet_ifp;
3185 
3186 	if (__predict_false(txq->vtntx_id >= sc->vtnet_act_vq_pairs)) {
3187 		/*
3188 		 * Ignore this interrupt. Either this is a spurious interrupt
3189 		 * or multiqueue without per-VQ MSIX so every queue needs to
3190 		 * be polled (a brain dead configuration we could try harder
3191 		 * to avoid).
3192 		 */
3193 		vtnet_txq_disable_intr(txq);
3194 		return;
3195 	}
3196 
3197 #ifdef DEV_NETMAP
3198 	if (netmap_tx_irq(ifp, txq->vtntx_id) != NM_IRQ_PASS)
3199 		return;
3200 #endif /* DEV_NETMAP */
3201 
3202 	VTNET_TXQ_LOCK(txq);
3203 
3204 	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
3205 		VTNET_TXQ_UNLOCK(txq);
3206 		return;
3207 	}
3208 
3209 	vtnet_txq_eof(txq);
3210 	vtnet_txq_start(txq);
3211 
3212 	VTNET_TXQ_UNLOCK(txq);
3213 }
3214 
3215 static void
3216 vtnet_tx_start_all(struct vtnet_softc *sc)
3217 {
3218 	struct vtnet_txq *txq;
3219 	int i;
3220 
3221 	VTNET_CORE_LOCK_ASSERT(sc);
3222 
3223 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
3224 		txq = &sc->vtnet_txqs[i];
3225 
3226 		VTNET_TXQ_LOCK(txq);
3227 		vtnet_txq_start(txq);
3228 		VTNET_TXQ_UNLOCK(txq);
3229 	}
3230 }
3231 
3232 static void
3233 vtnet_qflush(if_t ifp)
3234 {
3235 	struct vtnet_softc *sc;
3236 	struct vtnet_txq *txq;
3237 	struct mbuf *m;
3238 	int i;
3239 
3240 	sc = if_getsoftc(ifp);
3241 
3242 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
3243 		txq = &sc->vtnet_txqs[i];
3244 
3245 		VTNET_TXQ_LOCK(txq);
3246 		while ((m = buf_ring_dequeue_sc(txq->vtntx_br)) != NULL)
3247 			m_freem(m);
3248 		VTNET_TXQ_UNLOCK(txq);
3249 	}
3250 
3251 	if_qflush(ifp);
3252 }
3253 
3254 static int
3255 vtnet_watchdog(struct vtnet_txq *txq)
3256 {
3257 	if_t ifp;
3258 
3259 	ifp = txq->vtntx_sc->vtnet_ifp;
3260 
3261 	VTNET_TXQ_LOCK(txq);
3262 	if (txq->vtntx_watchdog == 1) {
3263 		/*
3264 		 * Only drain completed frames if the watchdog is about to
3265 		 * expire. If any frames were drained, there may be enough
3266 		 * free descriptors now available to transmit queued frames.
3267 		 * In that case, the timer will immediately be decremented
3268 		 * below, but the timeout is generous enough that should not
3269 		 * be a problem.
3270 		 */
3271 		if (vtnet_txq_eof(txq) != 0)
3272 			vtnet_txq_start(txq);
3273 	}
3274 
3275 	if (txq->vtntx_watchdog == 0 || --txq->vtntx_watchdog) {
3276 		VTNET_TXQ_UNLOCK(txq);
3277 		return (0);
3278 	}
3279 	VTNET_TXQ_UNLOCK(txq);
3280 
3281 	if_printf(ifp, "watchdog timeout on queue %d\n", txq->vtntx_id);
3282 	return (1);
3283 }
3284 
3285 static void
3286 vtnet_accum_stats(struct vtnet_softc *sc, struct vtnet_rxq_stats *rxacc,
3287     struct vtnet_txq_stats *txacc)
3288 {
3289 
3290 	bzero(rxacc, sizeof(struct vtnet_rxq_stats));
3291 	bzero(txacc, sizeof(struct vtnet_txq_stats));
3292 
3293 	for (int i = 0; i < sc->vtnet_max_vq_pairs; i++) {
3294 		struct vtnet_rxq_stats *rxst;
3295 		struct vtnet_txq_stats *txst;
3296 
3297 		rxst = &sc->vtnet_rxqs[i].vtnrx_stats;
3298 		rxacc->vrxs_ipackets += rxst->vrxs_ipackets;
3299 		rxacc->vrxs_ibytes += rxst->vrxs_ibytes;
3300 		rxacc->vrxs_iqdrops += rxst->vrxs_iqdrops;
3301 		rxacc->vrxs_csum += rxst->vrxs_csum;
3302 		rxacc->vrxs_csum_failed += rxst->vrxs_csum_failed;
3303 		rxacc->vrxs_rescheduled += rxst->vrxs_rescheduled;
3304 
3305 		txst = &sc->vtnet_txqs[i].vtntx_stats;
3306 		txacc->vtxs_opackets += txst->vtxs_opackets;
3307 		txacc->vtxs_obytes += txst->vtxs_obytes;
3308 		txacc->vtxs_csum += txst->vtxs_csum;
3309 		txacc->vtxs_tso += txst->vtxs_tso;
3310 		txacc->vtxs_rescheduled += txst->vtxs_rescheduled;
3311 	}
3312 }
3313 
3314 static uint64_t
3315 vtnet_get_counter(if_t ifp, ift_counter cnt)
3316 {
3317 	struct vtnet_softc *sc;
3318 	struct vtnet_rxq_stats rxaccum;
3319 	struct vtnet_txq_stats txaccum;
3320 
3321 	sc = if_getsoftc(ifp);
3322 	vtnet_accum_stats(sc, &rxaccum, &txaccum);
3323 
3324 	switch (cnt) {
3325 	case IFCOUNTER_IPACKETS:
3326 		return (rxaccum.vrxs_ipackets);
3327 	case IFCOUNTER_IQDROPS:
3328 		return (rxaccum.vrxs_iqdrops);
3329 	case IFCOUNTER_IERRORS:
3330 		return (rxaccum.vrxs_ierrors);
3331 	case IFCOUNTER_IBYTES:
3332 		return (rxaccum.vrxs_ibytes);
3333 	case IFCOUNTER_OPACKETS:
3334 		return (txaccum.vtxs_opackets);
3335 	case IFCOUNTER_OBYTES:
3336 		return (txaccum.vtxs_obytes);
3337 	case IFCOUNTER_OMCASTS:
3338 		return (txaccum.vtxs_omcasts);
3339 	default:
3340 		return (if_get_counter_default(ifp, cnt));
3341 	}
3342 }
3343 
3344 static void
3345 vtnet_tick(void *xsc)
3346 {
3347 	struct vtnet_softc *sc;
3348 	if_t ifp;
3349 	int i, timedout;
3350 
3351 	sc = xsc;
3352 	ifp = sc->vtnet_ifp;
3353 	timedout = 0;
3354 
3355 	VTNET_CORE_LOCK_ASSERT(sc);
3356 
3357 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
3358 		timedout |= vtnet_watchdog(&sc->vtnet_txqs[i]);
3359 
3360 	if (timedout != 0) {
3361 		if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
3362 		vtnet_init_locked(sc, 0);
3363 	} else
3364 		callout_schedule(&sc->vtnet_tick_ch, hz);
3365 }
3366 
3367 static void
3368 vtnet_start_taskqueues(struct vtnet_softc *sc)
3369 {
3370 	device_t dev;
3371 	struct vtnet_rxq *rxq;
3372 	struct vtnet_txq *txq;
3373 	int i, error;
3374 
3375 	dev = sc->vtnet_dev;
3376 
3377 	/*
3378 	 * Errors here are very difficult to recover from - we cannot
3379 	 * easily fail because, if this is during boot, we will hang
3380 	 * when freeing any successfully started taskqueues because
3381 	 * the scheduler isn't up yet.
3382 	 *
3383 	 * Most drivers just ignore the return value - it only fails
3384 	 * with ENOMEM so an error is not likely.
3385 	 */
3386 	for (i = 0; i < sc->vtnet_req_vq_pairs; i++) {
3387 		rxq = &sc->vtnet_rxqs[i];
3388 		error = taskqueue_start_threads(&rxq->vtnrx_tq, 1, PI_NET,
3389 		    "%s rxq %d", device_get_nameunit(dev), rxq->vtnrx_id);
3390 		if (error) {
3391 			device_printf(dev, "failed to start rx taskq %d\n",
3392 			    rxq->vtnrx_id);
3393 		}
3394 
3395 		txq = &sc->vtnet_txqs[i];
3396 		error = taskqueue_start_threads(&txq->vtntx_tq, 1, PI_NET,
3397 		    "%s txq %d", device_get_nameunit(dev), txq->vtntx_id);
3398 		if (error) {
3399 			device_printf(dev, "failed to start tx taskq %d\n",
3400 			    txq->vtntx_id);
3401 		}
3402 	}
3403 }
3404 
3405 static void
3406 vtnet_free_taskqueues(struct vtnet_softc *sc)
3407 {
3408 	struct vtnet_rxq *rxq;
3409 	struct vtnet_txq *txq;
3410 	int i;
3411 
3412 	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
3413 		rxq = &sc->vtnet_rxqs[i];
3414 		if (rxq->vtnrx_tq != NULL) {
3415 			taskqueue_free(rxq->vtnrx_tq);
3416 			rxq->vtnrx_tq = NULL;
3417 		}
3418 
3419 		txq = &sc->vtnet_txqs[i];
3420 		if (txq->vtntx_tq != NULL) {
3421 			taskqueue_free(txq->vtntx_tq);
3422 			txq->vtntx_tq = NULL;
3423 		}
3424 	}
3425 }
3426 
3427 static void
3428 vtnet_drain_taskqueues(struct vtnet_softc *sc)
3429 {
3430 	struct vtnet_rxq *rxq;
3431 	struct vtnet_txq *txq;
3432 	int i;
3433 
3434 	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
3435 		rxq = &sc->vtnet_rxqs[i];
3436 		if (rxq->vtnrx_tq != NULL)
3437 			taskqueue_drain(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
3438 
3439 		txq = &sc->vtnet_txqs[i];
3440 		if (txq->vtntx_tq != NULL) {
3441 			taskqueue_drain(txq->vtntx_tq, &txq->vtntx_intrtask);
3442 			if (!VTNET_ALTQ_ENABLED)
3443 				taskqueue_drain(txq->vtntx_tq, &txq->vtntx_defrtask);
3444 		}
3445 	}
3446 }
3447 
3448 static void
3449 vtnet_drain_rxtx_queues(struct vtnet_softc *sc)
3450 {
3451 	struct vtnet_rxq *rxq;
3452 	struct vtnet_txq *txq;
3453 	int i;
3454 
3455 	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
3456 		rxq = &sc->vtnet_rxqs[i];
3457 		vtnet_rxq_free_mbufs(rxq);
3458 
3459 		txq = &sc->vtnet_txqs[i];
3460 		vtnet_txq_free_mbufs(txq);
3461 	}
3462 }
3463 
3464 static void
3465 vtnet_stop_rendezvous(struct vtnet_softc *sc)
3466 {
3467 	struct vtnet_rxq *rxq;
3468 	struct vtnet_txq *txq;
3469 	int i;
3470 
3471 	VTNET_CORE_LOCK_ASSERT(sc);
3472 
3473 	/*
3474 	 * Lock and unlock the per-queue mutex so we known the stop
3475 	 * state is visible. Doing only the active queues should be
3476 	 * sufficient, but it does not cost much extra to do all the
3477 	 * queues.
3478 	 */
3479 	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
3480 		rxq = &sc->vtnet_rxqs[i];
3481 		VTNET_RXQ_LOCK(rxq);
3482 		VTNET_RXQ_UNLOCK(rxq);
3483 
3484 		txq = &sc->vtnet_txqs[i];
3485 		VTNET_TXQ_LOCK(txq);
3486 		VTNET_TXQ_UNLOCK(txq);
3487 	}
3488 }
3489 
3490 static void
3491 vtnet_stop(struct vtnet_softc *sc)
3492 {
3493 	device_t dev;
3494 	if_t ifp;
3495 
3496 	dev = sc->vtnet_dev;
3497 	ifp = sc->vtnet_ifp;
3498 
3499 	VTNET_CORE_LOCK_ASSERT(sc);
3500 
3501 	if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
3502 	sc->vtnet_link_active = 0;
3503 	callout_stop(&sc->vtnet_tick_ch);
3504 
3505 	/* Only advisory. */
3506 	vtnet_disable_interrupts(sc);
3507 
3508 #ifdef DEV_NETMAP
3509 	/* Stop any pending txsync/rxsync and disable them. */
3510 	netmap_disable_all_rings(ifp);
3511 #endif /* DEV_NETMAP */
3512 
3513 	/*
3514 	 * Stop the host adapter. This resets it to the pre-initialized
3515 	 * state. It will not generate any interrupts until after it is
3516 	 * reinitialized.
3517 	 */
3518 	virtio_stop(dev);
3519 	vtnet_stop_rendezvous(sc);
3520 
3521 	vtnet_drain_rxtx_queues(sc);
3522 	sc->vtnet_act_vq_pairs = 1;
3523 }
3524 
3525 static int
3526 vtnet_virtio_reinit(struct vtnet_softc *sc)
3527 {
3528 	device_t dev;
3529 	if_t ifp;
3530 	uint64_t features;
3531 	int error;
3532 
3533 	dev = sc->vtnet_dev;
3534 	ifp = sc->vtnet_ifp;
3535 	features = sc->vtnet_negotiated_features;
3536 
3537 	/*
3538 	 * Re-negotiate with the host, removing any disabled receive
3539 	 * features. Transmit features are disabled only on our side
3540 	 * via if_capenable and if_hwassist.
3541 	 */
3542 
3543 	if ((if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) == 0)
3544 		features &= ~(VIRTIO_NET_F_GUEST_CSUM | VTNET_LRO_FEATURES);
3545 
3546 	if ((if_getcapenable(ifp) & IFCAP_LRO) == 0)
3547 		features &= ~VTNET_LRO_FEATURES;
3548 
3549 	if ((if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) == 0)
3550 		features &= ~VIRTIO_NET_F_CTRL_VLAN;
3551 
3552 	error = virtio_reinit(dev, features);
3553 	if (error) {
3554 		device_printf(dev, "virtio reinit error %d\n", error);
3555 		return (error);
3556 	}
3557 
3558 	sc->vtnet_features = features;
3559 	virtio_reinit_complete(dev);
3560 
3561 	return (0);
3562 }
3563 
3564 static void
3565 vtnet_init_rx_filters(struct vtnet_softc *sc)
3566 {
3567 	if_t ifp;
3568 
3569 	ifp = sc->vtnet_ifp;
3570 
3571 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
3572 		vtnet_rx_filter(sc);
3573 		vtnet_rx_filter_mac(sc);
3574 	}
3575 
3576 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
3577 		vtnet_rx_filter_vlan(sc);
3578 }
3579 
3580 static int
3581 vtnet_init_rx_queues(struct vtnet_softc *sc)
3582 {
3583 	device_t dev;
3584 	if_t ifp;
3585 	struct vtnet_rxq *rxq;
3586 	int i, clustersz, error;
3587 
3588 	dev = sc->vtnet_dev;
3589 	ifp = sc->vtnet_ifp;
3590 
3591 	clustersz = vtnet_rx_cluster_size(sc, if_getmtu(ifp));
3592 	sc->vtnet_rx_clustersz = clustersz;
3593 
3594 	if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG) {
3595 		sc->vtnet_rx_nmbufs = howmany(sizeof(struct vtnet_rx_header) +
3596 		    VTNET_MAX_RX_SIZE, clustersz);
3597 		KASSERT(sc->vtnet_rx_nmbufs < sc->vtnet_rx_nsegs,
3598 		    ("%s: too many rx mbufs %d for %d segments", __func__,
3599 		    sc->vtnet_rx_nmbufs, sc->vtnet_rx_nsegs));
3600 	} else
3601 		sc->vtnet_rx_nmbufs = 1;
3602 
3603 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
3604 		rxq = &sc->vtnet_rxqs[i];
3605 
3606 		/* Hold the lock to satisfy asserts. */
3607 		VTNET_RXQ_LOCK(rxq);
3608 		error = vtnet_rxq_populate(rxq);
3609 		VTNET_RXQ_UNLOCK(rxq);
3610 
3611 		if (error) {
3612 			device_printf(dev, "cannot populate Rx queue %d\n", i);
3613 			return (error);
3614 		}
3615 	}
3616 
3617 	return (0);
3618 }
3619 
3620 static int
3621 vtnet_init_tx_queues(struct vtnet_softc *sc)
3622 {
3623 	struct vtnet_txq *txq;
3624 	int i;
3625 
3626 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
3627 		txq = &sc->vtnet_txqs[i];
3628 		txq->vtntx_watchdog = 0;
3629 		txq->vtntx_intr_threshold = vtnet_txq_intr_threshold(txq);
3630 #ifdef DEV_NETMAP
3631 		netmap_reset(NA(sc->vtnet_ifp), NR_TX, i, 0);
3632 #endif /* DEV_NETMAP */
3633 	}
3634 
3635 	return (0);
3636 }
3637 
3638 static int
3639 vtnet_init_rxtx_queues(struct vtnet_softc *sc)
3640 {
3641 	int error;
3642 
3643 	error = vtnet_init_rx_queues(sc);
3644 	if (error)
3645 		return (error);
3646 
3647 	error = vtnet_init_tx_queues(sc);
3648 	if (error)
3649 		return (error);
3650 
3651 	return (0);
3652 }
3653 
3654 static void
3655 vtnet_set_active_vq_pairs(struct vtnet_softc *sc)
3656 {
3657 	device_t dev;
3658 	int npairs;
3659 
3660 	dev = sc->vtnet_dev;
3661 
3662 	if ((sc->vtnet_flags & VTNET_FLAG_MQ) == 0) {
3663 		sc->vtnet_act_vq_pairs = 1;
3664 		return;
3665 	}
3666 
3667 	npairs = sc->vtnet_req_vq_pairs;
3668 
3669 	if (vtnet_ctrl_mq_cmd(sc, npairs) != 0) {
3670 		device_printf(dev, "cannot set active queue pairs to %d, "
3671 		    "falling back to 1 queue pair\n", npairs);
3672 		npairs = 1;
3673 	}
3674 
3675 	sc->vtnet_act_vq_pairs = npairs;
3676 }
3677 
3678 static void
3679 vtnet_update_rx_offloads(struct vtnet_softc *sc)
3680 {
3681 	if_t ifp;
3682 	uint64_t features;
3683 	int error;
3684 
3685 	ifp = sc->vtnet_ifp;
3686 	features = sc->vtnet_features;
3687 
3688 	VTNET_CORE_LOCK_ASSERT(sc);
3689 
3690 	if (if_getcapabilities(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) {
3691 		if (if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6))
3692 			features |= VIRTIO_NET_F_GUEST_CSUM;
3693 		else
3694 			features &= ~VIRTIO_NET_F_GUEST_CSUM;
3695 	}
3696 
3697 	if (if_getcapabilities(ifp) & IFCAP_LRO && !vtnet_software_lro(sc)) {
3698 		if (if_getcapenable(ifp) & IFCAP_LRO)
3699 			features |= VTNET_LRO_FEATURES;
3700 		else
3701 			features &= ~VTNET_LRO_FEATURES;
3702 	}
3703 
3704 	error = vtnet_ctrl_guest_offloads(sc,
3705 	    features & (VIRTIO_NET_F_GUEST_CSUM | VIRTIO_NET_F_GUEST_TSO4 |
3706 		        VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_ECN  |
3707 			VIRTIO_NET_F_GUEST_UFO));
3708 	if (error) {
3709 		device_printf(sc->vtnet_dev,
3710 		    "%s: cannot update Rx features\n", __func__);
3711 		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
3712 			if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
3713 			vtnet_init_locked(sc, 0);
3714 		}
3715 	} else
3716 		sc->vtnet_features = features;
3717 }
3718 
3719 static int
3720 vtnet_reinit(struct vtnet_softc *sc)
3721 {
3722 	if_t ifp;
3723 	int error;
3724 
3725 	ifp = sc->vtnet_ifp;
3726 
3727 	bcopy(if_getlladdr(ifp), sc->vtnet_hwaddr, ETHER_ADDR_LEN);
3728 
3729 	error = vtnet_virtio_reinit(sc);
3730 	if (error)
3731 		return (error);
3732 
3733 	vtnet_set_macaddr(sc);
3734 	vtnet_set_active_vq_pairs(sc);
3735 
3736 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ)
3737 		vtnet_init_rx_filters(sc);
3738 
3739 	if_sethwassist(ifp, 0);
3740 	if (if_getcapenable(ifp) & IFCAP_TXCSUM)
3741 		if_sethwassistbits(ifp, VTNET_CSUM_OFFLOAD, 0);
3742 	if (if_getcapenable(ifp) & IFCAP_TXCSUM_IPV6)
3743 		if_sethwassistbits(ifp, VTNET_CSUM_OFFLOAD_IPV6, 0);
3744 	if (if_getcapenable(ifp) & IFCAP_TSO4)
3745 		if_sethwassistbits(ifp, CSUM_IP_TSO, 0);
3746 	if (if_getcapenable(ifp) & IFCAP_TSO6)
3747 		if_sethwassistbits(ifp, CSUM_IP6_TSO, 0);
3748 
3749 	error = vtnet_init_rxtx_queues(sc);
3750 	if (error)
3751 		return (error);
3752 
3753 	return (0);
3754 }
3755 
3756 static void
3757 vtnet_init_locked(struct vtnet_softc *sc, int init_mode)
3758 {
3759 	if_t ifp;
3760 
3761 	ifp = sc->vtnet_ifp;
3762 
3763 	VTNET_CORE_LOCK_ASSERT(sc);
3764 
3765 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
3766 		return;
3767 
3768 	vtnet_stop(sc);
3769 
3770 #ifdef DEV_NETMAP
3771 	/* Once stopped we can update the netmap flags, if necessary. */
3772 	switch (init_mode) {
3773 	case VTNET_INIT_NETMAP_ENTER:
3774 		nm_set_native_flags(NA(ifp));
3775 		break;
3776 	case VTNET_INIT_NETMAP_EXIT:
3777 		nm_clear_native_flags(NA(ifp));
3778 		break;
3779 	}
3780 #endif /* DEV_NETMAP */
3781 
3782 	if (vtnet_reinit(sc) != 0) {
3783 		vtnet_stop(sc);
3784 		return;
3785 	}
3786 
3787 	if_setdrvflagbits(ifp, IFF_DRV_RUNNING, 0);
3788 	vtnet_update_link_status(sc);
3789 	vtnet_enable_interrupts(sc);
3790 	callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc);
3791 
3792 #ifdef DEV_NETMAP
3793 	/* Re-enable txsync/rxsync. */
3794 	netmap_enable_all_rings(ifp);
3795 #endif /* DEV_NETMAP */
3796 }
3797 
3798 static void
3799 vtnet_init(void *xsc)
3800 {
3801 	struct vtnet_softc *sc;
3802 
3803 	sc = xsc;
3804 
3805 	VTNET_CORE_LOCK(sc);
3806 	vtnet_init_locked(sc, 0);
3807 	VTNET_CORE_UNLOCK(sc);
3808 }
3809 
3810 static void
3811 vtnet_free_ctrl_vq(struct vtnet_softc *sc)
3812 {
3813 
3814 	/*
3815 	 * The control virtqueue is only polled and therefore it should
3816 	 * already be empty.
3817 	 */
3818 	KASSERT(virtqueue_empty(sc->vtnet_ctrl_vq),
3819 	    ("%s: ctrl vq %p not empty", __func__, sc->vtnet_ctrl_vq));
3820 }
3821 
3822 static void
3823 vtnet_load_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
3824     int error)
3825 {
3826 	bus_addr_t *paddr;
3827 
3828 	if (error != 0)
3829 		return;
3830 
3831 	KASSERT(nsegs == 1, ("%s: %d segments returned!", __func__, nsegs));
3832 
3833 	paddr = (bus_addr_t *)arg;
3834 	*paddr = segs[0].ds_addr;
3835 }
3836 
3837 static int
3838 vtnet_exec_ctrl_cmd(struct vtnet_softc *sc, uint8_t *ack, struct sglist *sg,
3839     int readable, int writable)
3840 {
3841 	bus_dmamap_t ack_dmap;
3842 	bus_addr_t ack_paddr;
3843 	struct virtqueue *vq;
3844 	int error;
3845 
3846 	error = bus_dmamap_create(sc->vtnet_ack_dmat, 0, &ack_dmap);
3847 	if (error)
3848 		goto error_out;
3849 
3850 	error = bus_dmamap_load(sc->vtnet_ack_dmat, ack_dmap, ack,
3851 	    sizeof(uint8_t), vtnet_load_callback, &ack_paddr, BUS_DMA_NOWAIT);
3852 	if (error)
3853 		goto error_destroy;
3854 
3855 	bus_dmamap_sync(sc->vtnet_ack_dmat, ack_dmap, BUS_DMASYNC_PREWRITE);
3856 
3857 	error = sglist_append_phys(sg, ack_paddr, sizeof(uint8_t));
3858 	if (error)
3859 		goto error_unload;
3860 
3861 	vq = sc->vtnet_ctrl_vq;
3862 
3863 	MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_VQ);
3864 	VTNET_CORE_LOCK_ASSERT(sc);
3865 
3866 	if (!virtqueue_empty(vq))
3867 		goto error_unload;
3868 
3869 	/*
3870 	 * Poll for the response, but the command is likely completed before
3871 	 * returning from the notify.
3872 	 */
3873 	if (virtqueue_enqueue(vq, (void *)ack, sg, readable, writable) == 0)  {
3874 		virtqueue_notify(vq);
3875 		virtqueue_poll(vq, NULL);
3876 	}
3877 
3878 	bus_dmamap_sync(sc->vtnet_ack_dmat, ack_dmap, BUS_DMASYNC_POSTREAD);
3879 
3880 error_unload:
3881 	bus_dmamap_unload(sc->vtnet_ack_dmat, ack_dmap);
3882 error_destroy:
3883 	bus_dmamap_destroy(sc->vtnet_ack_dmat, ack_dmap);
3884 error_out:
3885 	return (error);
3886 }
3887 
3888 static int
3889 vtnet_ctrl_mac_cmd(struct vtnet_softc *sc, uint8_t *hwaddr)
3890 {
3891 	struct sglist_seg segs[3];
3892 	bus_dmamap_t hdr_dmap;
3893 	bus_addr_t hdr_paddr;
3894 	struct sglist sg;
3895 	struct {
3896 		struct virtio_net_ctrl_hdr hdr __aligned(2);
3897 		uint8_t pad1;
3898 		uint8_t addr[ETHER_ADDR_LEN] __aligned(8);
3899 		uint8_t pad2;
3900 	} s;
3901 	uint8_t ack;
3902 	int error;
3903 
3904 	error = bus_dmamap_create(sc->vtnet_hdr_dmat, 0, &hdr_dmap);
3905 	if (error)
3906 		goto error_out;
3907 
3908 	error = bus_dmamap_load(sc->vtnet_hdr_dmat, hdr_dmap, &s,
3909 	    sizeof(s), vtnet_load_callback, &hdr_paddr, BUS_DMA_NOWAIT);
3910 	if (error)
3911 		goto error_destroy_hdr;
3912 
3913 	MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_MAC);
3914 
3915 	s.hdr.class = VIRTIO_NET_CTRL_MAC;
3916 	s.hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
3917 	bcopy(hwaddr, &s.addr[0], ETHER_ADDR_LEN);
3918 	ack = VIRTIO_NET_ERR;
3919 	bus_dmamap_sync(sc->vtnet_hdr_dmat, hdr_dmap, BUS_DMASYNC_PREWRITE);
3920 
3921 	sglist_init(&sg, nitems(segs), segs);
3922 	error |= sglist_append_phys(&sg, hdr_paddr,
3923 	    sizeof(struct virtio_net_ctrl_hdr));
3924 	error |= sglist_append_phys(&sg,
3925 	    hdr_paddr + ((uintptr_t)&s.addr - (uintptr_t)&s),
3926 	    ETHER_ADDR_LEN);
3927 	MPASS(error == 0 && sg.sg_nseg == nitems(segs) - 1);
3928 
3929 	if (error == 0)
3930 		error = vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg, 1);
3931 	if (error == 0)
3932 		error = (ack == VIRTIO_NET_OK ? 0 : EIO);
3933 
3934 	bus_dmamap_unload(sc->vtnet_hdr_dmat, hdr_dmap);
3935 error_destroy_hdr:
3936 	bus_dmamap_destroy(sc->vtnet_hdr_dmat, hdr_dmap);
3937 error_out:
3938 	return (error);
3939 }
3940 
3941 static int
3942 vtnet_ctrl_guest_offloads(struct vtnet_softc *sc, uint64_t offloads)
3943 {
3944 	struct sglist_seg segs[3];
3945 	bus_dmamap_t hdr_dmap;
3946 	bus_addr_t hdr_paddr;
3947 	struct sglist sg;
3948 	struct {
3949 		struct virtio_net_ctrl_hdr hdr __aligned(2);
3950 		uint8_t pad1;
3951 		uint64_t offloads __aligned(8);
3952 		uint8_t pad2;
3953 	} s;
3954 	uint8_t ack;
3955 	int error;
3956 
3957 	error = bus_dmamap_create(sc->vtnet_hdr_dmat, 0, &hdr_dmap);
3958 	if (error)
3959 		goto error_out;
3960 
3961 	error = bus_dmamap_load(sc->vtnet_hdr_dmat, hdr_dmap, &s,
3962 	    sizeof(s), vtnet_load_callback, &hdr_paddr, BUS_DMA_NOWAIT);
3963 	if (error)
3964 		goto error_destroy_hdr;
3965 
3966 	MPASS(sc->vtnet_features & VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
3967 
3968 	s.hdr.class = VIRTIO_NET_CTRL_GUEST_OFFLOADS;
3969 	s.hdr.cmd = VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET;
3970 	s.offloads = vtnet_gtoh64(sc, offloads);
3971 	ack = VIRTIO_NET_ERR;
3972 	bus_dmamap_sync(sc->vtnet_hdr_dmat, hdr_dmap, BUS_DMASYNC_PREWRITE);
3973 
3974 	sglist_init(&sg, nitems(segs), segs);
3975 	error |= sglist_append_phys(&sg, hdr_paddr,
3976 	    sizeof(struct virtio_net_ctrl_hdr));
3977 	error |= sglist_append_phys(&sg,
3978 	    hdr_paddr + ((uintptr_t)&s.offloads - (uintptr_t)&s),
3979 	    sizeof(uint64_t));
3980 	MPASS(error == 0 && sg.sg_nseg == nitems(segs) - 1);
3981 
3982 	if (error == 0)
3983 		error = vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg, 1);
3984 	if (error == 0)
3985 		error = (ack == VIRTIO_NET_OK ? 0 : EIO);
3986 
3987 	bus_dmamap_unload(sc->vtnet_hdr_dmat, hdr_dmap);
3988 error_destroy_hdr:
3989 	bus_dmamap_destroy(sc->vtnet_hdr_dmat, hdr_dmap);
3990 error_out:
3991 	return (error);
3992 }
3993 
3994 static int
3995 vtnet_ctrl_mq_cmd(struct vtnet_softc *sc, uint16_t npairs)
3996 {
3997 	struct sglist_seg segs[3];
3998 	bus_dmamap_t hdr_dmap;
3999 	bus_addr_t hdr_paddr;
4000 	struct sglist sg;
4001 	struct {
4002 		struct virtio_net_ctrl_hdr hdr __aligned(2);
4003 		uint8_t pad1;
4004 		struct virtio_net_ctrl_mq mq __aligned(2);
4005 		uint8_t pad2;
4006 	} s;
4007 	uint8_t ack;
4008 	int error;
4009 
4010 	error = bus_dmamap_create(sc->vtnet_hdr_dmat, 0, &hdr_dmap);
4011 	if (error)
4012 		goto error_out;
4013 
4014 	error = bus_dmamap_load(sc->vtnet_hdr_dmat, hdr_dmap, &s,
4015 	    sizeof(s), vtnet_load_callback, &hdr_paddr, BUS_DMA_NOWAIT);
4016 	if (error)
4017 		goto error_destroy_hdr;
4018 
4019 	MPASS(sc->vtnet_flags & VTNET_FLAG_MQ);
4020 
4021 	s.hdr.class = VIRTIO_NET_CTRL_MQ;
4022 	s.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
4023 	s.mq.virtqueue_pairs = vtnet_gtoh16(sc, npairs);
4024 	ack = VIRTIO_NET_ERR;
4025 	bus_dmamap_sync(sc->vtnet_hdr_dmat, hdr_dmap, BUS_DMASYNC_PREWRITE);
4026 
4027 	sglist_init(&sg, nitems(segs), segs);
4028 	error |= sglist_append_phys(&sg, hdr_paddr,
4029 	    sizeof(struct virtio_net_ctrl_hdr));
4030 	error |= sglist_append_phys(&sg,
4031 	    hdr_paddr + ((uintptr_t)&s.mq - (uintptr_t)&s),
4032 	    sizeof(struct virtio_net_ctrl_mq));
4033 	MPASS(error == 0 && sg.sg_nseg == nitems(segs) - 1);
4034 
4035 	if (error == 0)
4036 		error = vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg, 1);
4037 	if (error == 0)
4038 		error = (ack == VIRTIO_NET_OK ? 0 : EIO);
4039 
4040 	bus_dmamap_unload(sc->vtnet_hdr_dmat, hdr_dmap);
4041 error_destroy_hdr:
4042 	bus_dmamap_destroy(sc->vtnet_hdr_dmat, hdr_dmap);
4043 error_out:
4044 	return (error);
4045 }
4046 
4047 static int
4048 vtnet_ctrl_rx_cmd(struct vtnet_softc *sc, uint8_t cmd, bool on)
4049 {
4050 	struct sglist_seg segs[3];
4051 	bus_dmamap_t hdr_dmap;
4052 	bus_addr_t hdr_paddr;
4053 	struct sglist sg;
4054 	struct {
4055 		struct virtio_net_ctrl_hdr hdr __aligned(2);
4056 		uint8_t pad1;
4057 		uint8_t onoff;
4058 		uint8_t pad2;
4059 	} s;
4060 	uint8_t ack;
4061 	int error;
4062 
4063 	error = bus_dmamap_create(sc->vtnet_hdr_dmat, 0, &hdr_dmap);
4064 	if (error)
4065 		goto error_out;
4066 
4067 	error = bus_dmamap_load(sc->vtnet_hdr_dmat, hdr_dmap, &s,
4068 	    sizeof(s), vtnet_load_callback, &hdr_paddr, BUS_DMA_NOWAIT);
4069 	if (error)
4070 		goto error_destroy_hdr;
4071 
4072 	MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_RX);
4073 
4074 	s.hdr.class = VIRTIO_NET_CTRL_RX;
4075 	s.hdr.cmd = cmd;
4076 	s.onoff = on;
4077 	ack = VIRTIO_NET_ERR;
4078 	bus_dmamap_sync(sc->vtnet_hdr_dmat, hdr_dmap, BUS_DMASYNC_PREWRITE);
4079 
4080 	sglist_init(&sg, nitems(segs), segs);
4081 	error |= sglist_append_phys(&sg, hdr_paddr,
4082 	    sizeof(struct virtio_net_ctrl_hdr));
4083 	error |= sglist_append_phys(&sg,
4084 	    hdr_paddr + ((uintptr_t)&s.onoff - (uintptr_t)&s),
4085 	    sizeof(uint8_t));
4086 	MPASS(error == 0 && sg.sg_nseg == nitems(segs) - 1);
4087 
4088 	if (error == 0)
4089 		error = vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg, 1);
4090 	if (error == 0)
4091 		error = (ack == VIRTIO_NET_OK ? 0 : EIO);
4092 
4093 	bus_dmamap_unload(sc->vtnet_hdr_dmat, hdr_dmap);
4094 error_destroy_hdr:
4095 	bus_dmamap_destroy(sc->vtnet_hdr_dmat, hdr_dmap);
4096 error_out:
4097 	return (error);
4098 }
4099 
4100 static int
4101 vtnet_set_promisc(struct vtnet_softc *sc, bool on)
4102 {
4103 	return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_PROMISC, on));
4104 }
4105 
4106 static int
4107 vtnet_set_allmulti(struct vtnet_softc *sc, bool on)
4108 {
4109 	return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, on));
4110 }
4111 
4112 static void
4113 vtnet_rx_filter(struct vtnet_softc *sc)
4114 {
4115 	device_t dev;
4116 	if_t ifp;
4117 
4118 	dev = sc->vtnet_dev;
4119 	ifp = sc->vtnet_ifp;
4120 
4121 	VTNET_CORE_LOCK_ASSERT(sc);
4122 
4123 	if (vtnet_set_promisc(sc, if_getflags(ifp) & IFF_PROMISC) != 0) {
4124 		device_printf(dev, "cannot %s promiscuous mode\n",
4125 		    if_getflags(ifp) & IFF_PROMISC ? "enable" : "disable");
4126 	}
4127 
4128 	if (vtnet_set_allmulti(sc, if_getflags(ifp) & IFF_ALLMULTI) != 0) {
4129 		device_printf(dev, "cannot %s all-multicast mode\n",
4130 		    if_getflags(ifp) & IFF_ALLMULTI ? "enable" : "disable");
4131 	}
4132 }
4133 
4134 static u_int
4135 vtnet_copy_ifaddr(void *arg, struct sockaddr_dl *sdl, u_int ucnt)
4136 {
4137 	struct vtnet_softc *sc = arg;
4138 
4139 	if (memcmp(LLADDR(sdl), sc->vtnet_hwaddr, ETHER_ADDR_LEN) == 0)
4140 		return (0);
4141 
4142 	if (ucnt < VTNET_MAX_MAC_ENTRIES)
4143 		bcopy(LLADDR(sdl),
4144 		    &sc->vtnet_mac_filter->vmf_unicast.macs[ucnt],
4145 		    ETHER_ADDR_LEN);
4146 
4147 	return (1);
4148 }
4149 
4150 static u_int
4151 vtnet_copy_maddr(void *arg, struct sockaddr_dl *sdl, u_int mcnt)
4152 {
4153 	struct vtnet_mac_filter *filter = arg;
4154 
4155 	if (mcnt < VTNET_MAX_MAC_ENTRIES)
4156 		bcopy(LLADDR(sdl), &filter->vmf_multicast.macs[mcnt],
4157 		    ETHER_ADDR_LEN);
4158 
4159 	return (1);
4160 }
4161 
4162 static void
4163 vtnet_rx_filter_mac(struct vtnet_softc *sc)
4164 {
4165 	struct virtio_net_ctrl_hdr hdr __aligned(2);
4166 	struct vtnet_mac_filter *filter;
4167 	struct sglist_seg segs[4];
4168 	bus_dmamap_t filter_dmap;
4169 	bus_addr_t filter_paddr;
4170 	bus_dmamap_t hdr_dmap;
4171 	bus_addr_t hdr_paddr;
4172 	struct sglist sg;
4173 	if_t ifp;
4174 	bool promisc, allmulti;
4175 	u_int ucnt, mcnt;
4176 	int error;
4177 	uint8_t ack;
4178 
4179 	ifp = sc->vtnet_ifp;
4180 	filter = sc->vtnet_mac_filter;
4181 	error = 0;
4182 
4183 	MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_RX);
4184 	VTNET_CORE_LOCK_ASSERT(sc);
4185 
4186 	/* Unicast MAC addresses: */
4187 	ucnt = if_foreach_lladdr(ifp, vtnet_copy_ifaddr, sc);
4188 	promisc = (ucnt > VTNET_MAX_MAC_ENTRIES);
4189 
4190 	if (promisc) {
4191 		ucnt = 0;
4192 		if_printf(ifp, "more than %d MAC addresses assigned, "
4193 		    "falling back to promiscuous mode\n",
4194 		    VTNET_MAX_MAC_ENTRIES);
4195 	}
4196 
4197 	/* Multicast MAC addresses: */
4198 	mcnt = if_foreach_llmaddr(ifp, vtnet_copy_maddr, filter);
4199 	allmulti = (mcnt > VTNET_MAX_MAC_ENTRIES);
4200 
4201 	if (allmulti) {
4202 		mcnt = 0;
4203 		if_printf(ifp, "more than %d multicast MAC addresses "
4204 		    "assigned, falling back to all-multicast mode\n",
4205 		    VTNET_MAX_MAC_ENTRIES);
4206 	}
4207 
4208 	if (promisc && allmulti)
4209 		goto out;
4210 
4211 	error = bus_dmamap_create(sc->vtnet_hdr_dmat, 0, &hdr_dmap);
4212 	if (error)
4213 		goto out_error;
4214 
4215 	error = bus_dmamap_load(sc->vtnet_hdr_dmat, hdr_dmap, &hdr,
4216 	    sizeof(hdr), vtnet_load_callback, &hdr_paddr, BUS_DMA_NOWAIT);
4217 	if (error)
4218 		goto out_destroy_hdr;
4219 
4220 	error = bus_dmamap_create(sc->vtnet_hdr_dmat, 0, &filter_dmap);
4221 	if (error)
4222 		goto out_unload_hdr;
4223 
4224 	error = bus_dmamap_load(sc->vtnet_hdr_dmat, hdr_dmap, filter,
4225 	    sizeof(*filter), vtnet_load_callback, &filter_paddr,
4226 	    BUS_DMA_NOWAIT);
4227 	if (error)
4228 		goto out_destroy_filter;
4229 
4230 	filter->vmf_unicast.nentries = vtnet_gtoh32(sc, ucnt);
4231 	filter->vmf_multicast.nentries = vtnet_gtoh32(sc, mcnt);
4232 
4233 	hdr.class = VIRTIO_NET_CTRL_MAC;
4234 	hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
4235 	ack = VIRTIO_NET_ERR;
4236 
4237 	sglist_init(&sg, nitems(segs), segs);
4238 	error |= sglist_append_phys(&sg, hdr_paddr,
4239 	    sizeof(struct virtio_net_ctrl_hdr));
4240 	error |= sglist_append_phys(&sg,
4241 	    filter_paddr + ((uintptr_t)&filter->vmf_unicast -
4242 	    (uintptr_t)filter),
4243 	    sizeof(uint32_t) + ucnt * ETHER_ADDR_LEN);
4244 	error |= sglist_append_phys(&sg,
4245 	    filter_paddr + ((uintptr_t)&filter->vmf_multicast -
4246 	    (uintptr_t)filter),
4247 	    sizeof(uint32_t) + mcnt * ETHER_ADDR_LEN);
4248 	MPASS(error == 0 && sg.sg_nseg == nitems(segs) - 1);
4249 
4250 	if (error == 0)
4251 		error = vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg, 1);
4252 	if (error == 0)
4253 		error = (ack == VIRTIO_NET_OK ? 0 : EIO);
4254 
4255 	bus_dmamap_unload(sc->vtnet_hdr_dmat, filter_dmap);
4256 out_destroy_filter:
4257 	bus_dmamap_destroy(sc->vtnet_hdr_dmat, filter_dmap);
4258 out_unload_hdr:
4259 	bus_dmamap_unload(sc->vtnet_hdr_dmat, hdr_dmap);
4260 out_destroy_hdr:
4261 	bus_dmamap_destroy(sc->vtnet_hdr_dmat, hdr_dmap);
4262 out_error:
4263 	if (error != 0)
4264 		if_printf(ifp, "error setting host MAC filter table\n");
4265 out:
4266 	if (promisc && vtnet_set_promisc(sc, true) != 0)
4267 		if_printf(ifp, "cannot enable promiscuous mode\n");
4268 	if (allmulti && vtnet_set_allmulti(sc, true) != 0)
4269 		if_printf(ifp, "cannot enable all-multicast mode\n");
4270 }
4271 
4272 static int
4273 vtnet_exec_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
4274 {
4275 	struct sglist_seg segs[3];
4276 	bus_dmamap_t hdr_dmap;
4277 	bus_addr_t hdr_paddr;
4278 	struct sglist sg;
4279 	struct {
4280 		struct virtio_net_ctrl_hdr hdr __aligned(2);
4281 		uint8_t pad1;
4282 		uint16_t tag __aligned(2);
4283 		uint8_t pad2;
4284 	} s;
4285 	uint8_t ack;
4286 	int error;
4287 
4288 	error = bus_dmamap_create(sc->vtnet_hdr_dmat, 0, &hdr_dmap);
4289 	if (error)
4290 		goto error_out;
4291 
4292 	error = bus_dmamap_load(sc->vtnet_hdr_dmat, hdr_dmap, &s,
4293 	    sizeof(s), vtnet_load_callback, &hdr_paddr, BUS_DMA_NOWAIT);
4294 	if (error)
4295 		goto error_destroy_hdr;
4296 
4297 	MPASS(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER);
4298 
4299 	s.hdr.class = VIRTIO_NET_CTRL_VLAN;
4300 	s.hdr.cmd = add ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
4301 	s.tag = vtnet_gtoh16(sc, tag);
4302 	ack = VIRTIO_NET_ERR;
4303 	bus_dmamap_sync(sc->vtnet_hdr_dmat, hdr_dmap, BUS_DMASYNC_PREWRITE);
4304 
4305 	sglist_init(&sg, nitems(segs), segs);
4306 	error |= sglist_append_phys(&sg, hdr_paddr,
4307 	    sizeof(struct virtio_net_ctrl_hdr));
4308 	error |= sglist_append_phys(&sg,
4309 	    hdr_paddr + ((uintptr_t)&s.tag - (uintptr_t)&s),
4310 	    sizeof(uint16_t));
4311 	MPASS(error == 0 && sg.sg_nseg == nitems(segs) - 1);
4312 
4313 	if (error == 0)
4314 		error = vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg, 1);
4315 	if (error == 0)
4316 		error = (ack == VIRTIO_NET_OK ? 0 : EIO);
4317 
4318 	bus_dmamap_unload(sc->vtnet_hdr_dmat, hdr_dmap);
4319 error_destroy_hdr:
4320 	bus_dmamap_destroy(sc->vtnet_hdr_dmat, hdr_dmap);
4321 error_out:
4322 	return (error);
4323 }
4324 
4325 static void
4326 vtnet_rx_filter_vlan(struct vtnet_softc *sc)
4327 {
4328 	int i, bit;
4329 	uint32_t w;
4330 	uint16_t tag;
4331 
4332 	MPASS(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER);
4333 	VTNET_CORE_LOCK_ASSERT(sc);
4334 
4335 	/* Enable the filter for each configured VLAN. */
4336 	for (i = 0; i < VTNET_VLAN_FILTER_NWORDS; i++) {
4337 		w = sc->vtnet_vlan_filter[i];
4338 
4339 		while ((bit = ffs(w) - 1) != -1) {
4340 			w &= ~(1 << bit);
4341 			tag = sizeof(w) * CHAR_BIT * i + bit;
4342 
4343 			if (vtnet_exec_vlan_filter(sc, 1, tag) != 0) {
4344 				device_printf(sc->vtnet_dev,
4345 				    "cannot enable VLAN %d filter\n", tag);
4346 			}
4347 		}
4348 	}
4349 }
4350 
4351 static void
4352 vtnet_update_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
4353 {
4354 	if_t ifp;
4355 	int idx, bit;
4356 
4357 	ifp = sc->vtnet_ifp;
4358 	idx = (tag >> 5) & 0x7F;
4359 	bit = tag & 0x1F;
4360 
4361 	if (tag == 0 || tag > 4095)
4362 		return;
4363 
4364 	VTNET_CORE_LOCK(sc);
4365 
4366 	if (add)
4367 		sc->vtnet_vlan_filter[idx] |= (1 << bit);
4368 	else
4369 		sc->vtnet_vlan_filter[idx] &= ~(1 << bit);
4370 
4371 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER &&
4372 	    if_getdrvflags(ifp) & IFF_DRV_RUNNING &&
4373 	    vtnet_exec_vlan_filter(sc, add, tag) != 0) {
4374 		device_printf(sc->vtnet_dev,
4375 		    "cannot %s VLAN %d %s the host filter table\n",
4376 		    add ? "add" : "remove", tag, add ? "to" : "from");
4377 	}
4378 
4379 	VTNET_CORE_UNLOCK(sc);
4380 }
4381 
4382 static void
4383 vtnet_register_vlan(void *arg, if_t ifp, uint16_t tag)
4384 {
4385 
4386 	if (if_getsoftc(ifp) != arg)
4387 		return;
4388 
4389 	vtnet_update_vlan_filter(arg, 1, tag);
4390 }
4391 
4392 static void
4393 vtnet_unregister_vlan(void *arg, if_t ifp, uint16_t tag)
4394 {
4395 
4396 	if (if_getsoftc(ifp) != arg)
4397 		return;
4398 
4399 	vtnet_update_vlan_filter(arg, 0, tag);
4400 }
4401 
4402 static void
4403 vtnet_update_speed_duplex(struct vtnet_softc *sc)
4404 {
4405 	if_t ifp;
4406 	uint32_t speed;
4407 
4408 	ifp = sc->vtnet_ifp;
4409 
4410 	if ((sc->vtnet_features & VIRTIO_NET_F_SPEED_DUPLEX) == 0)
4411 		return;
4412 
4413 	/* BMV: Ignore duplex. */
4414 	speed = virtio_read_dev_config_4(sc->vtnet_dev,
4415 	    offsetof(struct virtio_net_config, speed));
4416 	if (speed != UINT32_MAX)
4417 		if_setbaudrate(ifp, IF_Mbps(speed));
4418 }
4419 
4420 static int
4421 vtnet_is_link_up(struct vtnet_softc *sc)
4422 {
4423 	uint16_t status;
4424 
4425 	if ((sc->vtnet_features & VIRTIO_NET_F_STATUS) == 0)
4426 		return (1);
4427 
4428 	status = virtio_read_dev_config_2(sc->vtnet_dev,
4429 	    offsetof(struct virtio_net_config, status));
4430 
4431 	return ((status & VIRTIO_NET_S_LINK_UP) != 0);
4432 }
4433 
4434 static void
4435 vtnet_update_link_status(struct vtnet_softc *sc)
4436 {
4437 	if_t ifp;
4438 	int link;
4439 
4440 	ifp = sc->vtnet_ifp;
4441 	VTNET_CORE_LOCK_ASSERT(sc);
4442 	link = vtnet_is_link_up(sc);
4443 
4444 	/* Notify if the link status has changed. */
4445 	if (link != 0 && sc->vtnet_link_active == 0) {
4446 		vtnet_update_speed_duplex(sc);
4447 		sc->vtnet_link_active = 1;
4448 		if_link_state_change(ifp, LINK_STATE_UP);
4449 	} else if (link == 0 && sc->vtnet_link_active != 0) {
4450 		sc->vtnet_link_active = 0;
4451 		if_link_state_change(ifp, LINK_STATE_DOWN);
4452 	}
4453 }
4454 
4455 static int
4456 vtnet_ifmedia_upd(if_t ifp __unused)
4457 {
4458 	return (EOPNOTSUPP);
4459 }
4460 
4461 static void
4462 vtnet_ifmedia_sts(if_t ifp, struct ifmediareq *ifmr)
4463 {
4464 	struct vtnet_softc *sc;
4465 
4466 	sc = if_getsoftc(ifp);
4467 
4468 	ifmr->ifm_status = IFM_AVALID;
4469 	ifmr->ifm_active = IFM_ETHER;
4470 
4471 	VTNET_CORE_LOCK(sc);
4472 	if (vtnet_is_link_up(sc) != 0) {
4473 		ifmr->ifm_status |= IFM_ACTIVE;
4474 		ifmr->ifm_active |= IFM_10G_T | IFM_FDX;
4475 	} else
4476 		ifmr->ifm_active |= IFM_NONE;
4477 	VTNET_CORE_UNLOCK(sc);
4478 }
4479 
4480 static void
4481 vtnet_get_macaddr(struct vtnet_softc *sc)
4482 {
4483 
4484 	if (sc->vtnet_flags & VTNET_FLAG_MAC) {
4485 		virtio_read_device_config_array(sc->vtnet_dev,
4486 		    offsetof(struct virtio_net_config, mac),
4487 		    &sc->vtnet_hwaddr[0], sizeof(uint8_t), ETHER_ADDR_LEN);
4488 	} else {
4489 		/* Generate a random locally administered unicast address. */
4490 		sc->vtnet_hwaddr[0] = 0xB2;
4491 		arc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1, 0);
4492 	}
4493 }
4494 
4495 static void
4496 vtnet_set_macaddr(struct vtnet_softc *sc)
4497 {
4498 	device_t dev;
4499 	int error;
4500 
4501 	dev = sc->vtnet_dev;
4502 
4503 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_MAC) {
4504 		error = vtnet_ctrl_mac_cmd(sc, sc->vtnet_hwaddr);
4505 		if (error)
4506 			device_printf(dev, "unable to set MAC address\n");
4507 		return;
4508 	}
4509 
4510 	/* MAC in config is read-only in modern VirtIO. */
4511 	if (!vtnet_modern(sc) && sc->vtnet_flags & VTNET_FLAG_MAC) {
4512 		for (int i = 0; i < ETHER_ADDR_LEN; i++) {
4513 			virtio_write_dev_config_1(dev,
4514 			    offsetof(struct virtio_net_config, mac) + i,
4515 			    sc->vtnet_hwaddr[i]);
4516 		}
4517 	}
4518 }
4519 
4520 static void
4521 vtnet_attached_set_macaddr(struct vtnet_softc *sc)
4522 {
4523 
4524 	/* Assign MAC address if it was generated. */
4525 	if ((sc->vtnet_flags & VTNET_FLAG_MAC) == 0)
4526 		vtnet_set_macaddr(sc);
4527 }
4528 
4529 static void
4530 vtnet_vlan_tag_remove(struct mbuf *m)
4531 {
4532 	struct ether_vlan_header *evh;
4533 
4534 	evh = mtod(m, struct ether_vlan_header *);
4535 	m->m_pkthdr.ether_vtag = ntohs(evh->evl_tag);
4536 	m->m_flags |= M_VLANTAG;
4537 
4538 	/* Strip the 802.1Q header. */
4539 	bcopy((char *) evh, (char *) evh + ETHER_VLAN_ENCAP_LEN,
4540 	    ETHER_HDR_LEN - ETHER_TYPE_LEN);
4541 	m_adj(m, ETHER_VLAN_ENCAP_LEN);
4542 }
4543 
4544 static void
4545 vtnet_set_rx_process_limit(struct vtnet_softc *sc)
4546 {
4547 	int limit;
4548 
4549 	limit = vtnet_tunable_int(sc, "rx_process_limit",
4550 	    vtnet_rx_process_limit);
4551 	if (limit < 0)
4552 		limit = INT_MAX;
4553 	sc->vtnet_rx_process_limit = limit;
4554 }
4555 
4556 static void
4557 vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *ctx,
4558     struct sysctl_oid_list *child, struct vtnet_rxq *rxq)
4559 {
4560 	struct sysctl_oid *node;
4561 	struct sysctl_oid_list *list;
4562 	struct vtnet_rxq_stats *stats;
4563 	char namebuf[16];
4564 
4565 	snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vtnrx_id);
4566 	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
4567 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Receive Queue");
4568 	list = SYSCTL_CHILDREN(node);
4569 
4570 	stats = &rxq->vtnrx_stats;
4571 
4572 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets",
4573 	    CTLFLAG_RD | CTLFLAG_STATS,
4574 	    &stats->vrxs_ipackets, "Receive packets");
4575 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes",
4576 	    CTLFLAG_RD | CTLFLAG_STATS,
4577 	    &stats->vrxs_ibytes, "Receive bytes");
4578 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops",
4579 	    CTLFLAG_RD | CTLFLAG_STATS,
4580 	    &stats->vrxs_iqdrops, "Receive drops");
4581 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors",
4582 	    CTLFLAG_RD | CTLFLAG_STATS,
4583 	    &stats->vrxs_ierrors, "Receive errors");
4584 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum",
4585 	    CTLFLAG_RD | CTLFLAG_STATS,
4586 	    &stats->vrxs_csum, "Receive checksum offloaded");
4587 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum_failed",
4588 	    CTLFLAG_RD | CTLFLAG_STATS,
4589 	    &stats->vrxs_csum_failed, "Receive checksum offload failed");
4590 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "host_lro",
4591 	    CTLFLAG_RD | CTLFLAG_STATS,
4592 	    &stats->vrxs_host_lro, "Receive host segmentation offloaded");
4593 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled",
4594 	    CTLFLAG_RD | CTLFLAG_STATS,
4595 	    &stats->vrxs_rescheduled,
4596 	    "Receive interrupt handler rescheduled");
4597 }
4598 
4599 static void
4600 vtnet_setup_txq_sysctl(struct sysctl_ctx_list *ctx,
4601     struct sysctl_oid_list *child, struct vtnet_txq *txq)
4602 {
4603 	struct sysctl_oid *node;
4604 	struct sysctl_oid_list *list;
4605 	struct vtnet_txq_stats *stats;
4606 	char namebuf[16];
4607 
4608 	snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vtntx_id);
4609 	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
4610 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Transmit Queue");
4611 	list = SYSCTL_CHILDREN(node);
4612 
4613 	stats = &txq->vtntx_stats;
4614 
4615 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets",
4616 	    CTLFLAG_RD | CTLFLAG_STATS,
4617 	    &stats->vtxs_opackets, "Transmit packets");
4618 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes",
4619 	    CTLFLAG_RD | CTLFLAG_STATS,
4620 	    &stats->vtxs_obytes, "Transmit bytes");
4621 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts",
4622 	    CTLFLAG_RD | CTLFLAG_STATS,
4623 	    &stats->vtxs_omcasts, "Transmit multicasts");
4624 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum",
4625 	    CTLFLAG_RD | CTLFLAG_STATS,
4626 	    &stats->vtxs_csum, "Transmit checksum offloaded");
4627 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso",
4628 	    CTLFLAG_RD | CTLFLAG_STATS,
4629 	    &stats->vtxs_tso, "Transmit TCP segmentation offloaded");
4630 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled",
4631 	    CTLFLAG_RD | CTLFLAG_STATS,
4632 	    &stats->vtxs_rescheduled,
4633 	    "Transmit interrupt handler rescheduled");
4634 }
4635 
4636 static void
4637 vtnet_setup_queue_sysctl(struct vtnet_softc *sc)
4638 {
4639 	device_t dev;
4640 	struct sysctl_ctx_list *ctx;
4641 	struct sysctl_oid *tree;
4642 	struct sysctl_oid_list *child;
4643 	int i;
4644 
4645 	dev = sc->vtnet_dev;
4646 	ctx = device_get_sysctl_ctx(dev);
4647 	tree = device_get_sysctl_tree(dev);
4648 	child = SYSCTL_CHILDREN(tree);
4649 
4650 	for (i = 0; i < sc->vtnet_req_vq_pairs; i++) {
4651 		vtnet_setup_rxq_sysctl(ctx, child, &sc->vtnet_rxqs[i]);
4652 		vtnet_setup_txq_sysctl(ctx, child, &sc->vtnet_txqs[i]);
4653 	}
4654 }
4655 
4656 static int
4657 vtnet_sysctl_rx_csum_failed(SYSCTL_HANDLER_ARGS)
4658 {
4659 	struct vtnet_softc *sc = (struct vtnet_softc *)arg1;
4660 	struct vtnet_statistics *stats = &sc->vtnet_stats;
4661 	struct vtnet_rxq_stats *rxst;
4662 	int i;
4663 
4664 	stats->rx_csum_failed = 0;
4665 	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
4666 		rxst = &sc->vtnet_rxqs[i].vtnrx_stats;
4667 		stats->rx_csum_failed += rxst->vrxs_csum_failed;
4668 	}
4669 	return (sysctl_handle_64(oidp, NULL, stats->rx_csum_failed, req));
4670 }
4671 
4672 static int
4673 vtnet_sysctl_rx_csum_offloaded(SYSCTL_HANDLER_ARGS)
4674 {
4675 	struct vtnet_softc *sc = (struct vtnet_softc *)arg1;
4676 	struct vtnet_statistics *stats = &sc->vtnet_stats;
4677 	struct vtnet_rxq_stats *rxst;
4678 	int i;
4679 
4680 	stats->rx_csum_offloaded = 0;
4681 	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
4682 		rxst = &sc->vtnet_rxqs[i].vtnrx_stats;
4683 		stats->rx_csum_offloaded += rxst->vrxs_csum;
4684 	}
4685 	return (sysctl_handle_64(oidp, NULL, stats->rx_csum_offloaded, req));
4686 }
4687 
4688 static int
4689 vtnet_sysctl_rx_task_rescheduled(SYSCTL_HANDLER_ARGS)
4690 {
4691 	struct vtnet_softc *sc = (struct vtnet_softc *)arg1;
4692 	struct vtnet_statistics *stats = &sc->vtnet_stats;
4693 	struct vtnet_rxq_stats *rxst;
4694 	int i;
4695 
4696 	stats->rx_task_rescheduled = 0;
4697 	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
4698 		rxst = &sc->vtnet_rxqs[i].vtnrx_stats;
4699 		stats->rx_task_rescheduled += rxst->vrxs_rescheduled;
4700 	}
4701 	return (sysctl_handle_64(oidp, NULL, stats->rx_task_rescheduled, req));
4702 }
4703 
4704 static int
4705 vtnet_sysctl_tx_csum_offloaded(SYSCTL_HANDLER_ARGS)
4706 {
4707 	struct vtnet_softc *sc = (struct vtnet_softc *)arg1;
4708 	struct vtnet_statistics *stats = &sc->vtnet_stats;
4709 	struct vtnet_txq_stats *txst;
4710 	int i;
4711 
4712 	stats->tx_csum_offloaded = 0;
4713 	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
4714 		txst = &sc->vtnet_txqs[i].vtntx_stats;
4715 		stats->tx_csum_offloaded += txst->vtxs_csum;
4716 	}
4717 	return (sysctl_handle_64(oidp, NULL, stats->tx_csum_offloaded, req));
4718 }
4719 
4720 static int
4721 vtnet_sysctl_tx_tso_offloaded(SYSCTL_HANDLER_ARGS)
4722 {
4723 	struct vtnet_softc *sc = (struct vtnet_softc *)arg1;
4724 	struct vtnet_statistics *stats = &sc->vtnet_stats;
4725 	struct vtnet_txq_stats *txst;
4726 	int i;
4727 
4728 	stats->tx_tso_offloaded = 0;
4729 	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
4730 		txst = &sc->vtnet_txqs[i].vtntx_stats;
4731 		stats->tx_tso_offloaded += txst->vtxs_tso;
4732 	}
4733 	return (sysctl_handle_64(oidp, NULL, stats->tx_tso_offloaded, req));
4734 }
4735 
4736 static int
4737 vtnet_sysctl_tx_task_rescheduled(SYSCTL_HANDLER_ARGS)
4738 {
4739 	struct vtnet_softc *sc = (struct vtnet_softc *)arg1;
4740 	struct vtnet_statistics *stats = &sc->vtnet_stats;
4741 	struct vtnet_txq_stats *txst;
4742 	int i;
4743 
4744 	stats->tx_task_rescheduled = 0;
4745 	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
4746 		txst = &sc->vtnet_txqs[i].vtntx_stats;
4747 		stats->tx_task_rescheduled += txst->vtxs_rescheduled;
4748 	}
4749 	return (sysctl_handle_64(oidp, NULL, stats->tx_task_rescheduled, req));
4750 }
4751 
4752 static void
4753 vtnet_setup_stat_sysctl(struct sysctl_ctx_list *ctx,
4754     struct sysctl_oid_list *child, struct vtnet_softc *sc)
4755 {
4756 	struct vtnet_statistics *stats;
4757 	struct vtnet_rxq_stats rxaccum;
4758 	struct vtnet_txq_stats txaccum;
4759 
4760 	vtnet_accum_stats(sc, &rxaccum, &txaccum);
4761 
4762 	stats = &sc->vtnet_stats;
4763 	stats->rx_csum_offloaded = rxaccum.vrxs_csum;
4764 	stats->rx_csum_failed = rxaccum.vrxs_csum_failed;
4765 	stats->rx_task_rescheduled = rxaccum.vrxs_rescheduled;
4766 	stats->tx_csum_offloaded = txaccum.vtxs_csum;
4767 	stats->tx_tso_offloaded = txaccum.vtxs_tso;
4768 	stats->tx_task_rescheduled = txaccum.vtxs_rescheduled;
4769 
4770 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "mbuf_alloc_failed",
4771 	    CTLFLAG_RD | CTLFLAG_STATS, &stats->mbuf_alloc_failed,
4772 	    "Mbuf cluster allocation failures");
4773 
4774 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_frame_too_large",
4775 	    CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_frame_too_large,
4776 	    "Received frame larger than the mbuf chain");
4777 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_enq_replacement_failed",
4778 	    CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_enq_replacement_failed,
4779 	    "Enqueuing the replacement receive mbuf failed");
4780 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_mergeable_failed",
4781 	    CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_mergeable_failed,
4782 	    "Mergeable buffers receive failures");
4783 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ethtype",
4784 	    CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_csum_bad_ethtype,
4785 	    "Received checksum offloaded buffer with unsupported "
4786 	    "Ethernet type");
4787 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ipproto",
4788 	    CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_csum_bad_ipproto,
4789 	    "Received checksum offloaded buffer with incorrect IP protocol");
4790 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_inaccessible_ipproto",
4791 	    CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_csum_inaccessible_ipproto,
4792 	    "Received checksum offloaded buffer with inaccessible IP protocol");
4793 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_csum_failed",
4794 	    CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
4795 	    sc, 0, vtnet_sysctl_rx_csum_failed, "QU",
4796 	    "Received buffer checksum offload failed");
4797 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_csum_offloaded",
4798 	    CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
4799 	    sc, 0, vtnet_sysctl_rx_csum_offloaded, "QU",
4800 	    "Received buffer checksum offload succeeded");
4801 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_task_rescheduled",
4802 	    CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
4803 	    sc, 0, vtnet_sysctl_rx_task_rescheduled, "QU",
4804 	    "Times the receive interrupt task rescheduled itself");
4805 
4806 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_unknown_ethtype",
4807 	    CTLFLAG_RD | CTLFLAG_STATS, &stats->tx_csum_unknown_ethtype,
4808 	    "Aborted transmit of checksum offloaded buffer with unknown "
4809 	    "Ethernet type");
4810 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_proto_mismatch",
4811 	    CTLFLAG_RD | CTLFLAG_STATS, &stats->tx_csum_proto_mismatch,
4812 	    "Aborted transmit of checksum offloaded buffer because mismatched "
4813 	    "protocols");
4814 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_not_tcp",
4815 	    CTLFLAG_RD | CTLFLAG_STATS, &stats->tx_tso_not_tcp,
4816 	    "Aborted transmit of TSO buffer with non TCP protocol");
4817 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_without_csum",
4818 	    CTLFLAG_RD | CTLFLAG_STATS, &stats->tx_tso_without_csum,
4819 	    "Aborted transmit of TSO buffer without TCP checksum offload");
4820 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defragged",
4821 	    CTLFLAG_RD | CTLFLAG_STATS, &stats->tx_defragged,
4822 	    "Transmit mbufs defragged");
4823 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defrag_failed",
4824 	    CTLFLAG_RD | CTLFLAG_STATS, &stats->tx_defrag_failed,
4825 	    "Aborted transmit of buffer because defrag failed");
4826 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_csum_offloaded",
4827 	    CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
4828 	    sc, 0, vtnet_sysctl_tx_csum_offloaded, "QU",
4829 	    "Offloaded checksum of transmitted buffer");
4830 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_tso_offloaded",
4831 	    CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
4832 	    sc, 0, vtnet_sysctl_tx_tso_offloaded, "QU",
4833 	    "Segmentation offload of transmitted buffer");
4834 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_task_rescheduled",
4835 	    CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
4836 	    sc, 0, vtnet_sysctl_tx_task_rescheduled, "QU",
4837 	    "Times the transmit interrupt task rescheduled itself");
4838 }
4839 
4840 static int
4841 vtnet_sysctl_features(SYSCTL_HANDLER_ARGS)
4842 {
4843 	struct sbuf sb;
4844 	struct vtnet_softc *sc = (struct vtnet_softc *)arg1;
4845 	int error;
4846 
4847 	sbuf_new_for_sysctl(&sb, NULL, 0, req);
4848 	sbuf_printf(&sb, "%b", (uint32_t)sc->vtnet_features,
4849 	    VIRTIO_NET_FEATURE_BITS);
4850 	error = sbuf_finish(&sb);
4851 	sbuf_delete(&sb);
4852 	return (error);
4853 }
4854 
4855 static int
4856 vtnet_sysctl_flags(SYSCTL_HANDLER_ARGS)
4857 {
4858 	struct sbuf sb;
4859 	struct vtnet_softc *sc = (struct vtnet_softc *)arg1;
4860 	int error;
4861 
4862 	sbuf_new_for_sysctl(&sb, NULL, 0, req);
4863 	sbuf_printf(&sb, "%b", sc->vtnet_flags, VTNET_FLAGS_BITS);
4864 	error = sbuf_finish(&sb);
4865 	sbuf_delete(&sb);
4866 	return (error);
4867 }
4868 
4869 static void
4870 vtnet_setup_sysctl(struct vtnet_softc *sc)
4871 {
4872 	device_t dev;
4873 	struct sysctl_ctx_list *ctx;
4874 	struct sysctl_oid *tree;
4875 	struct sysctl_oid_list *child;
4876 
4877 	dev = sc->vtnet_dev;
4878 	ctx = device_get_sysctl_ctx(dev);
4879 	tree = device_get_sysctl_tree(dev);
4880 	child = SYSCTL_CHILDREN(tree);
4881 
4882 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_vq_pairs",
4883 	    CTLFLAG_RD, &sc->vtnet_max_vq_pairs, 0,
4884 	    "Number of maximum supported virtqueue pairs");
4885 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "req_vq_pairs",
4886 	    CTLFLAG_RD, &sc->vtnet_req_vq_pairs, 0,
4887 	    "Number of requested virtqueue pairs");
4888 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "act_vq_pairs",
4889 	    CTLFLAG_RD, &sc->vtnet_act_vq_pairs, 0,
4890 	    "Number of active virtqueue pairs");
4891 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "features",
4892 	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
4893 	    vtnet_sysctl_features, "A", "Features");
4894 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "flags",
4895 	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
4896 	    vtnet_sysctl_flags, "A", "Flags");
4897 
4898 	vtnet_setup_stat_sysctl(ctx, child, sc);
4899 }
4900 
4901 static void
4902 vtnet_load_tunables(struct vtnet_softc *sc)
4903 {
4904 
4905 	sc->vtnet_lro_entry_count = vtnet_tunable_int(sc,
4906 	    "lro_entry_count", vtnet_lro_entry_count);
4907 	if (sc->vtnet_lro_entry_count < TCP_LRO_ENTRIES)
4908 		sc->vtnet_lro_entry_count = TCP_LRO_ENTRIES;
4909 
4910 	sc->vtnet_lro_mbufq_depth = vtnet_tunable_int(sc,
4911 	    "lro_mbufq_depth", vtnet_lro_mbufq_depth);
4912 }
4913 
4914 static int
4915 vtnet_rxq_enable_intr(struct vtnet_rxq *rxq)
4916 {
4917 
4918 	return (virtqueue_enable_intr(rxq->vtnrx_vq));
4919 }
4920 
4921 static void
4922 vtnet_rxq_disable_intr(struct vtnet_rxq *rxq)
4923 {
4924 
4925 	virtqueue_disable_intr(rxq->vtnrx_vq);
4926 }
4927 
4928 static int
4929 vtnet_txq_enable_intr(struct vtnet_txq *txq)
4930 {
4931 	struct virtqueue *vq;
4932 
4933 	vq = txq->vtntx_vq;
4934 
4935 	if (vtnet_txq_below_threshold(txq) != 0)
4936 		return (virtqueue_postpone_intr(vq, VQ_POSTPONE_LONG));
4937 
4938 	/*
4939 	 * The free count is above our threshold. Keep the Tx interrupt
4940 	 * disabled until the queue is fuller.
4941 	 */
4942 	return (0);
4943 }
4944 
4945 static void
4946 vtnet_txq_disable_intr(struct vtnet_txq *txq)
4947 {
4948 
4949 	virtqueue_disable_intr(txq->vtntx_vq);
4950 }
4951 
4952 static void
4953 vtnet_enable_rx_interrupts(struct vtnet_softc *sc)
4954 {
4955 	struct vtnet_rxq *rxq;
4956 	int i;
4957 
4958 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
4959 		rxq = &sc->vtnet_rxqs[i];
4960 		if (vtnet_rxq_enable_intr(rxq) != 0)
4961 			taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
4962 	}
4963 }
4964 
4965 static void
4966 vtnet_enable_tx_interrupts(struct vtnet_softc *sc)
4967 {
4968 	int i;
4969 
4970 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
4971 		vtnet_txq_enable_intr(&sc->vtnet_txqs[i]);
4972 }
4973 
4974 static void
4975 vtnet_enable_interrupts(struct vtnet_softc *sc)
4976 {
4977 
4978 	vtnet_enable_rx_interrupts(sc);
4979 	vtnet_enable_tx_interrupts(sc);
4980 }
4981 
4982 static void
4983 vtnet_disable_rx_interrupts(struct vtnet_softc *sc)
4984 {
4985 	int i;
4986 
4987 	for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
4988 		vtnet_rxq_disable_intr(&sc->vtnet_rxqs[i]);
4989 }
4990 
4991 static void
4992 vtnet_disable_tx_interrupts(struct vtnet_softc *sc)
4993 {
4994 	int i;
4995 
4996 	for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
4997 		vtnet_txq_disable_intr(&sc->vtnet_txqs[i]);
4998 }
4999 
5000 static void
5001 vtnet_disable_interrupts(struct vtnet_softc *sc)
5002 {
5003 
5004 	vtnet_disable_rx_interrupts(sc);
5005 	vtnet_disable_tx_interrupts(sc);
5006 }
5007 
5008 static int
5009 vtnet_tunable_int(struct vtnet_softc *sc, const char *knob, int def)
5010 {
5011 	char path[64];
5012 
5013 	snprintf(path, sizeof(path),
5014 	    "hw.vtnet.%d.%s", device_get_unit(sc->vtnet_dev), knob);
5015 	TUNABLE_INT_FETCH(path, &def);
5016 
5017 	return (def);
5018 }
5019 
5020 #ifdef DEBUGNET
5021 static void
5022 vtnet_debugnet_init(if_t ifp, int *nrxr, int *ncl, int *clsize)
5023 {
5024 	struct vtnet_softc *sc;
5025 
5026 	sc = if_getsoftc(ifp);
5027 
5028 	VTNET_CORE_LOCK(sc);
5029 	*nrxr = sc->vtnet_req_vq_pairs;
5030 	*ncl = DEBUGNET_MAX_IN_FLIGHT;
5031 	*clsize = sc->vtnet_rx_clustersz;
5032 	VTNET_CORE_UNLOCK(sc);
5033 }
5034 
5035 static void
5036 vtnet_debugnet_event(if_t ifp __unused, enum debugnet_ev event)
5037 {
5038 	struct vtnet_softc *sc;
5039 	static bool sw_lro_enabled = false;
5040 
5041 	/*
5042 	 * Disable software LRO, since it would require entering the network
5043 	 * epoch when calling vtnet_txq_eof() in vtnet_debugnet_poll().
5044 	 */
5045 	sc = if_getsoftc(ifp);
5046 	switch (event) {
5047 	case DEBUGNET_START:
5048 		sw_lro_enabled = (sc->vtnet_flags & VTNET_FLAG_SW_LRO) != 0;
5049 		if (sw_lro_enabled)
5050 			sc->vtnet_flags &= ~VTNET_FLAG_SW_LRO;
5051 		break;
5052 	case DEBUGNET_END:
5053 		if (sw_lro_enabled)
5054 			sc->vtnet_flags |= VTNET_FLAG_SW_LRO;
5055 		break;
5056 	}
5057 }
5058 
5059 static int
5060 vtnet_debugnet_transmit(if_t ifp, struct mbuf *m)
5061 {
5062 	struct vtnet_softc *sc;
5063 	struct vtnet_txq *txq;
5064 	int error;
5065 
5066 	sc = if_getsoftc(ifp);
5067 	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
5068 	    IFF_DRV_RUNNING)
5069 		return (EBUSY);
5070 
5071 	txq = &sc->vtnet_txqs[0];
5072 	error = vtnet_txq_encap(txq, &m, M_NOWAIT | M_USE_RESERVE);
5073 	if (error == 0)
5074 		(void)vtnet_txq_notify(txq);
5075 	return (error);
5076 }
5077 
5078 static int
5079 vtnet_debugnet_poll(if_t ifp, int count)
5080 {
5081 	struct vtnet_softc *sc;
5082 	int i;
5083 
5084 	sc = if_getsoftc(ifp);
5085 	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
5086 	    IFF_DRV_RUNNING)
5087 		return (EBUSY);
5088 
5089 	(void)vtnet_txq_eof(&sc->vtnet_txqs[0]);
5090 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
5091 		(void)vtnet_rxq_eof(&sc->vtnet_rxqs[i]);
5092 	return (0);
5093 }
5094 #endif /* DEBUGNET */
5095