xref: /freebsd/sys/dev/virtio/network/if_vtnet.c (revision 06690044dac183ea1d93c2ae227e261da3bdca2a)
1  /*-
2   * SPDX-License-Identifier: BSD-2-Clause
3   *
4   * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
5   * All rights reserved.
6   *
7   * Redistribution and use in source and binary forms, with or without
8   * modification, are permitted provided that the following conditions
9   * are met:
10   * 1. Redistributions of source code must retain the above copyright
11   *    notice unmodified, this list of conditions, and the following
12   *    disclaimer.
13   * 2. Redistributions in binary form must reproduce the above copyright
14   *    notice, this list of conditions and the following disclaimer in the
15   *    documentation and/or other materials provided with the distribution.
16   *
17   * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18   * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20   * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21   * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23   * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24   * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25   * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26   * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27   */
28  
29  /* Driver for VirtIO network devices. */
30  
31  #include <sys/param.h>
32  #include <sys/eventhandler.h>
33  #include <sys/systm.h>
34  #include <sys/kernel.h>
35  #include <sys/sockio.h>
36  #include <sys/malloc.h>
37  #include <sys/mbuf.h>
38  #include <sys/module.h>
39  #include <sys/msan.h>
40  #include <sys/socket.h>
41  #include <sys/sysctl.h>
42  #include <sys/random.h>
43  #include <sys/sglist.h>
44  #include <sys/lock.h>
45  #include <sys/mutex.h>
46  #include <sys/taskqueue.h>
47  #include <sys/smp.h>
48  #include <machine/smp.h>
49  
50  #include <vm/uma.h>
51  
52  #include <net/debugnet.h>
53  #include <net/ethernet.h>
54  #include <net/pfil.h>
55  #include <net/if.h>
56  #include <net/if_var.h>
57  #include <net/if_arp.h>
58  #include <net/if_dl.h>
59  #include <net/if_types.h>
60  #include <net/if_media.h>
61  #include <net/if_vlan_var.h>
62  
63  #include <net/bpf.h>
64  
65  #include <netinet/in_systm.h>
66  #include <netinet/in.h>
67  #include <netinet/ip.h>
68  #include <netinet/ip6.h>
69  #include <netinet6/ip6_var.h>
70  #include <netinet/udp.h>
71  #include <netinet/tcp.h>
72  #include <netinet/tcp_lro.h>
73  
74  #include <machine/bus.h>
75  #include <machine/resource.h>
76  #include <sys/bus.h>
77  #include <sys/rman.h>
78  
79  #include <dev/virtio/virtio.h>
80  #include <dev/virtio/virtqueue.h>
81  #include <dev/virtio/network/virtio_net.h>
82  #include <dev/virtio/network/if_vtnetvar.h>
83  #include "virtio_if.h"
84  
85  #include "opt_inet.h"
86  #include "opt_inet6.h"
87  
88  #if defined(INET) || defined(INET6)
89  #include <machine/in_cksum.h>
90  #endif
91  
92  #ifdef __NO_STRICT_ALIGNMENT
93  #define VTNET_ETHER_ALIGN 0
94  #else /* Strict alignment */
95  #define VTNET_ETHER_ALIGN ETHER_ALIGN
96  #endif
97  
98  static int	vtnet_modevent(module_t, int, void *);
99  
100  static int	vtnet_probe(device_t);
101  static int	vtnet_attach(device_t);
102  static int	vtnet_detach(device_t);
103  static int	vtnet_suspend(device_t);
104  static int	vtnet_resume(device_t);
105  static int	vtnet_shutdown(device_t);
106  static int	vtnet_attach_completed(device_t);
107  static int	vtnet_config_change(device_t);
108  
109  static int	vtnet_negotiate_features(struct vtnet_softc *);
110  static int	vtnet_setup_features(struct vtnet_softc *);
111  static int	vtnet_init_rxq(struct vtnet_softc *, int);
112  static int	vtnet_init_txq(struct vtnet_softc *, int);
113  static int	vtnet_alloc_rxtx_queues(struct vtnet_softc *);
114  static void	vtnet_free_rxtx_queues(struct vtnet_softc *);
115  static int	vtnet_alloc_rx_filters(struct vtnet_softc *);
116  static void	vtnet_free_rx_filters(struct vtnet_softc *);
117  static int	vtnet_alloc_virtqueues(struct vtnet_softc *);
118  static void	vtnet_alloc_interface(struct vtnet_softc *);
119  static int	vtnet_setup_interface(struct vtnet_softc *);
120  static int	vtnet_ioctl_mtu(struct vtnet_softc *, u_int);
121  static int	vtnet_ioctl_ifflags(struct vtnet_softc *);
122  static int	vtnet_ioctl_multi(struct vtnet_softc *);
123  static int	vtnet_ioctl_ifcap(struct vtnet_softc *, struct ifreq *);
124  static int	vtnet_ioctl(if_t, u_long, caddr_t);
125  static uint64_t	vtnet_get_counter(if_t, ift_counter);
126  
127  static int	vtnet_rxq_populate(struct vtnet_rxq *);
128  static void	vtnet_rxq_free_mbufs(struct vtnet_rxq *);
129  static struct mbuf *
130  		vtnet_rx_alloc_buf(struct vtnet_softc *, int , struct mbuf **);
131  static int	vtnet_rxq_replace_lro_nomrg_buf(struct vtnet_rxq *,
132  		    struct mbuf *, int);
133  static int	vtnet_rxq_replace_buf(struct vtnet_rxq *, struct mbuf *, int);
134  static int	vtnet_rxq_enqueue_buf(struct vtnet_rxq *, struct mbuf *);
135  static int	vtnet_rxq_new_buf(struct vtnet_rxq *);
136  static int	vtnet_rxq_csum_needs_csum(struct vtnet_rxq *, struct mbuf *,
137  		     uint16_t, int, struct virtio_net_hdr *);
138  static int	vtnet_rxq_csum_data_valid(struct vtnet_rxq *, struct mbuf *,
139  		     uint16_t, int, struct virtio_net_hdr *);
140  static int	vtnet_rxq_csum(struct vtnet_rxq *, struct mbuf *,
141  		     struct virtio_net_hdr *);
142  static void	vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *, int);
143  static void	vtnet_rxq_discard_buf(struct vtnet_rxq *, struct mbuf *);
144  static int	vtnet_rxq_merged_eof(struct vtnet_rxq *, struct mbuf *, int);
145  static void	vtnet_rxq_input(struct vtnet_rxq *, struct mbuf *,
146  		    struct virtio_net_hdr *);
147  static int	vtnet_rxq_eof(struct vtnet_rxq *);
148  static void	vtnet_rx_vq_process(struct vtnet_rxq *rxq, int tries);
149  static void	vtnet_rx_vq_intr(void *);
150  static void	vtnet_rxq_tq_intr(void *, int);
151  
152  static int	vtnet_txq_intr_threshold(struct vtnet_txq *);
153  static int	vtnet_txq_below_threshold(struct vtnet_txq *);
154  static int	vtnet_txq_notify(struct vtnet_txq *);
155  static void	vtnet_txq_free_mbufs(struct vtnet_txq *);
156  static int	vtnet_txq_offload_ctx(struct vtnet_txq *, struct mbuf *,
157  		    int *, int *, int *);
158  static int	vtnet_txq_offload_tso(struct vtnet_txq *, struct mbuf *, int,
159  		    int, struct virtio_net_hdr *);
160  static struct mbuf *
161  		vtnet_txq_offload(struct vtnet_txq *, struct mbuf *,
162  		    struct virtio_net_hdr *);
163  static int	vtnet_txq_enqueue_buf(struct vtnet_txq *, struct mbuf **,
164  		    struct vtnet_tx_header *);
165  static int	vtnet_txq_encap(struct vtnet_txq *, struct mbuf **, int);
166  #ifdef VTNET_LEGACY_TX
167  static void	vtnet_start_locked(struct vtnet_txq *, if_t);
168  static void	vtnet_start(if_t);
169  #else
170  static int	vtnet_txq_mq_start_locked(struct vtnet_txq *, struct mbuf *);
171  static int	vtnet_txq_mq_start(if_t, struct mbuf *);
172  static void	vtnet_txq_tq_deferred(void *, int);
173  #endif
174  static void	vtnet_txq_start(struct vtnet_txq *);
175  static void	vtnet_txq_tq_intr(void *, int);
176  static int	vtnet_txq_eof(struct vtnet_txq *);
177  static void	vtnet_tx_vq_intr(void *);
178  static void	vtnet_tx_start_all(struct vtnet_softc *);
179  
180  #ifndef VTNET_LEGACY_TX
181  static void	vtnet_qflush(if_t);
182  #endif
183  
184  static int	vtnet_watchdog(struct vtnet_txq *);
185  static void	vtnet_accum_stats(struct vtnet_softc *,
186  		    struct vtnet_rxq_stats *, struct vtnet_txq_stats *);
187  static void	vtnet_tick(void *);
188  
189  static void	vtnet_start_taskqueues(struct vtnet_softc *);
190  static void	vtnet_free_taskqueues(struct vtnet_softc *);
191  static void	vtnet_drain_taskqueues(struct vtnet_softc *);
192  
193  static void	vtnet_drain_rxtx_queues(struct vtnet_softc *);
194  static void	vtnet_stop_rendezvous(struct vtnet_softc *);
195  static void	vtnet_stop(struct vtnet_softc *);
196  static int	vtnet_virtio_reinit(struct vtnet_softc *);
197  static void	vtnet_init_rx_filters(struct vtnet_softc *);
198  static int	vtnet_init_rx_queues(struct vtnet_softc *);
199  static int	vtnet_init_tx_queues(struct vtnet_softc *);
200  static int	vtnet_init_rxtx_queues(struct vtnet_softc *);
201  static void	vtnet_set_active_vq_pairs(struct vtnet_softc *);
202  static void	vtnet_update_rx_offloads(struct vtnet_softc *);
203  static int	vtnet_reinit(struct vtnet_softc *);
204  static void	vtnet_init_locked(struct vtnet_softc *, int);
205  static void	vtnet_init(void *);
206  
207  static void	vtnet_free_ctrl_vq(struct vtnet_softc *);
208  static void	vtnet_exec_ctrl_cmd(struct vtnet_softc *, void *,
209  		    struct sglist *, int, int);
210  static int	vtnet_ctrl_mac_cmd(struct vtnet_softc *, uint8_t *);
211  static int	vtnet_ctrl_guest_offloads(struct vtnet_softc *, uint64_t);
212  static int	vtnet_ctrl_mq_cmd(struct vtnet_softc *, uint16_t);
213  static int	vtnet_ctrl_rx_cmd(struct vtnet_softc *, uint8_t, bool);
214  static int	vtnet_set_promisc(struct vtnet_softc *, bool);
215  static int	vtnet_set_allmulti(struct vtnet_softc *, bool);
216  static void	vtnet_rx_filter(struct vtnet_softc *);
217  static void	vtnet_rx_filter_mac(struct vtnet_softc *);
218  static int	vtnet_exec_vlan_filter(struct vtnet_softc *, int, uint16_t);
219  static void	vtnet_rx_filter_vlan(struct vtnet_softc *);
220  static void	vtnet_update_vlan_filter(struct vtnet_softc *, int, uint16_t);
221  static void	vtnet_register_vlan(void *, if_t, uint16_t);
222  static void	vtnet_unregister_vlan(void *, if_t, uint16_t);
223  
224  static void	vtnet_update_speed_duplex(struct vtnet_softc *);
225  static int	vtnet_is_link_up(struct vtnet_softc *);
226  static void	vtnet_update_link_status(struct vtnet_softc *);
227  static int	vtnet_ifmedia_upd(if_t);
228  static void	vtnet_ifmedia_sts(if_t, struct ifmediareq *);
229  static void	vtnet_get_macaddr(struct vtnet_softc *);
230  static void	vtnet_set_macaddr(struct vtnet_softc *);
231  static void	vtnet_attached_set_macaddr(struct vtnet_softc *);
232  static void	vtnet_vlan_tag_remove(struct mbuf *);
233  static void	vtnet_set_rx_process_limit(struct vtnet_softc *);
234  
235  static void	vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *,
236  		    struct sysctl_oid_list *, struct vtnet_rxq *);
237  static void	vtnet_setup_txq_sysctl(struct sysctl_ctx_list *,
238  		    struct sysctl_oid_list *, struct vtnet_txq *);
239  static void	vtnet_setup_queue_sysctl(struct vtnet_softc *);
240  static void	vtnet_load_tunables(struct vtnet_softc *);
241  static void	vtnet_setup_sysctl(struct vtnet_softc *);
242  
243  static int	vtnet_rxq_enable_intr(struct vtnet_rxq *);
244  static void	vtnet_rxq_disable_intr(struct vtnet_rxq *);
245  static int	vtnet_txq_enable_intr(struct vtnet_txq *);
246  static void	vtnet_txq_disable_intr(struct vtnet_txq *);
247  static void	vtnet_enable_rx_interrupts(struct vtnet_softc *);
248  static void	vtnet_enable_tx_interrupts(struct vtnet_softc *);
249  static void	vtnet_enable_interrupts(struct vtnet_softc *);
250  static void	vtnet_disable_rx_interrupts(struct vtnet_softc *);
251  static void	vtnet_disable_tx_interrupts(struct vtnet_softc *);
252  static void	vtnet_disable_interrupts(struct vtnet_softc *);
253  
254  static int	vtnet_tunable_int(struct vtnet_softc *, const char *, int);
255  
256  DEBUGNET_DEFINE(vtnet);
257  
258  #define vtnet_htog16(_sc, _val)	virtio_htog16(vtnet_modern(_sc), _val)
259  #define vtnet_htog32(_sc, _val)	virtio_htog32(vtnet_modern(_sc), _val)
260  #define vtnet_htog64(_sc, _val)	virtio_htog64(vtnet_modern(_sc), _val)
261  #define vtnet_gtoh16(_sc, _val)	virtio_gtoh16(vtnet_modern(_sc), _val)
262  #define vtnet_gtoh32(_sc, _val)	virtio_gtoh32(vtnet_modern(_sc), _val)
263  #define vtnet_gtoh64(_sc, _val)	virtio_gtoh64(vtnet_modern(_sc), _val)
264  
265  /* Tunables. */
266  static SYSCTL_NODE(_hw, OID_AUTO, vtnet, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
267      "VirtIO Net driver parameters");
268  
269  static int vtnet_csum_disable = 0;
270  SYSCTL_INT(_hw_vtnet, OID_AUTO, csum_disable, CTLFLAG_RDTUN,
271      &vtnet_csum_disable, 0, "Disables receive and send checksum offload");
272  
273  static int vtnet_fixup_needs_csum = 0;
274  SYSCTL_INT(_hw_vtnet, OID_AUTO, fixup_needs_csum, CTLFLAG_RDTUN,
275      &vtnet_fixup_needs_csum, 0,
276      "Calculate valid checksum for NEEDS_CSUM packets");
277  
278  static int vtnet_tso_disable = 0;
279  SYSCTL_INT(_hw_vtnet, OID_AUTO, tso_disable, CTLFLAG_RDTUN,
280      &vtnet_tso_disable, 0, "Disables TSO");
281  
282  static int vtnet_lro_disable = 0;
283  SYSCTL_INT(_hw_vtnet, OID_AUTO, lro_disable, CTLFLAG_RDTUN,
284      &vtnet_lro_disable, 0, "Disables hardware LRO");
285  
286  static int vtnet_mq_disable = 0;
287  SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_disable, CTLFLAG_RDTUN,
288      &vtnet_mq_disable, 0, "Disables multiqueue support");
289  
290  static int vtnet_mq_max_pairs = VTNET_MAX_QUEUE_PAIRS;
291  SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_max_pairs, CTLFLAG_RDTUN,
292      &vtnet_mq_max_pairs, 0, "Maximum number of multiqueue pairs");
293  
294  static int vtnet_tso_maxlen = IP_MAXPACKET;
295  SYSCTL_INT(_hw_vtnet, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN,
296      &vtnet_tso_maxlen, 0, "TSO burst limit");
297  
298  static int vtnet_rx_process_limit = 1024;
299  SYSCTL_INT(_hw_vtnet, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
300      &vtnet_rx_process_limit, 0,
301      "Number of RX segments processed in one pass");
302  
303  static int vtnet_lro_entry_count = 128;
304  SYSCTL_INT(_hw_vtnet, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN,
305      &vtnet_lro_entry_count, 0, "Software LRO entry count");
306  
307  /* Enable sorted LRO, and the depth of the mbuf queue. */
308  static int vtnet_lro_mbufq_depth = 0;
309  SYSCTL_UINT(_hw_vtnet, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN,
310      &vtnet_lro_mbufq_depth, 0, "Depth of software LRO mbuf queue");
311  
312  static uma_zone_t vtnet_tx_header_zone;
313  
314  static struct virtio_feature_desc vtnet_feature_desc[] = {
315  	{ VIRTIO_NET_F_CSUM,			"TxChecksum"		},
316  	{ VIRTIO_NET_F_GUEST_CSUM,		"RxChecksum"		},
317  	{ VIRTIO_NET_F_CTRL_GUEST_OFFLOADS,	"CtrlRxOffloads"	},
318  	{ VIRTIO_NET_F_MAC,			"MAC"			},
319  	{ VIRTIO_NET_F_GSO,			"TxGSO"			},
320  	{ VIRTIO_NET_F_GUEST_TSO4,		"RxLROv4"		},
321  	{ VIRTIO_NET_F_GUEST_TSO6,		"RxLROv6"		},
322  	{ VIRTIO_NET_F_GUEST_ECN,		"RxLROECN"		},
323  	{ VIRTIO_NET_F_GUEST_UFO,		"RxUFO"			},
324  	{ VIRTIO_NET_F_HOST_TSO4,		"TxTSOv4"		},
325  	{ VIRTIO_NET_F_HOST_TSO6,		"TxTSOv6"		},
326  	{ VIRTIO_NET_F_HOST_ECN,		"TxTSOECN"		},
327  	{ VIRTIO_NET_F_HOST_UFO,		"TxUFO"			},
328  	{ VIRTIO_NET_F_MRG_RXBUF,		"MrgRxBuf"		},
329  	{ VIRTIO_NET_F_STATUS,			"Status"		},
330  	{ VIRTIO_NET_F_CTRL_VQ,			"CtrlVq"		},
331  	{ VIRTIO_NET_F_CTRL_RX,			"CtrlRxMode"		},
332  	{ VIRTIO_NET_F_CTRL_VLAN,		"CtrlVLANFilter"	},
333  	{ VIRTIO_NET_F_CTRL_RX_EXTRA,		"CtrlRxModeExtra"	},
334  	{ VIRTIO_NET_F_GUEST_ANNOUNCE,		"GuestAnnounce"		},
335  	{ VIRTIO_NET_F_MQ,			"Multiqueue"		},
336  	{ VIRTIO_NET_F_CTRL_MAC_ADDR,		"CtrlMacAddr"		},
337  	{ VIRTIO_NET_F_SPEED_DUPLEX,		"SpeedDuplex"		},
338  
339  	{ 0, NULL }
340  };
341  
342  static device_method_t vtnet_methods[] = {
343  	/* Device methods. */
344  	DEVMETHOD(device_probe,			vtnet_probe),
345  	DEVMETHOD(device_attach,		vtnet_attach),
346  	DEVMETHOD(device_detach,		vtnet_detach),
347  	DEVMETHOD(device_suspend,		vtnet_suspend),
348  	DEVMETHOD(device_resume,		vtnet_resume),
349  	DEVMETHOD(device_shutdown,		vtnet_shutdown),
350  
351  	/* VirtIO methods. */
352  	DEVMETHOD(virtio_attach_completed,	vtnet_attach_completed),
353  	DEVMETHOD(virtio_config_change,		vtnet_config_change),
354  
355  	DEVMETHOD_END
356  };
357  
358  #ifdef DEV_NETMAP
359  #include <dev/netmap/if_vtnet_netmap.h>
360  #endif
361  
362  static driver_t vtnet_driver = {
363      .name = "vtnet",
364      .methods = vtnet_methods,
365      .size = sizeof(struct vtnet_softc)
366  };
367  VIRTIO_DRIVER_MODULE(vtnet, vtnet_driver, vtnet_modevent, NULL);
368  MODULE_VERSION(vtnet, 1);
369  MODULE_DEPEND(vtnet, virtio, 1, 1, 1);
370  #ifdef DEV_NETMAP
371  MODULE_DEPEND(vtnet, netmap, 1, 1, 1);
372  #endif
373  
374  VIRTIO_SIMPLE_PNPINFO(vtnet, VIRTIO_ID_NETWORK, "VirtIO Networking Adapter");
375  
376  static int
377  vtnet_modevent(module_t mod __unused, int type, void *unused __unused)
378  {
379  	int error = 0;
380  	static int loaded = 0;
381  
382  	switch (type) {
383  	case MOD_LOAD:
384  		if (loaded++ == 0) {
385  			vtnet_tx_header_zone = uma_zcreate("vtnet_tx_hdr",
386  				sizeof(struct vtnet_tx_header),
387  				NULL, NULL, NULL, NULL, 0, 0);
388  #ifdef DEBUGNET
389  			/*
390  			 * We need to allocate from this zone in the transmit path, so ensure
391  			 * that we have at least one item per header available.
392  			 * XXX add a separate zone like we do for mbufs? otherwise we may alloc
393  			 * buckets
394  			 */
395  			uma_zone_reserve(vtnet_tx_header_zone, DEBUGNET_MAX_IN_FLIGHT * 2);
396  			uma_prealloc(vtnet_tx_header_zone, DEBUGNET_MAX_IN_FLIGHT * 2);
397  #endif
398  		}
399  		break;
400  	case MOD_QUIESCE:
401  		if (uma_zone_get_cur(vtnet_tx_header_zone) > 0)
402  			error = EBUSY;
403  		break;
404  	case MOD_UNLOAD:
405  		if (--loaded == 0) {
406  			uma_zdestroy(vtnet_tx_header_zone);
407  			vtnet_tx_header_zone = NULL;
408  		}
409  		break;
410  	case MOD_SHUTDOWN:
411  		break;
412  	default:
413  		error = EOPNOTSUPP;
414  		break;
415  	}
416  
417  	return (error);
418  }
419  
420  static int
421  vtnet_probe(device_t dev)
422  {
423  	return (VIRTIO_SIMPLE_PROBE(dev, vtnet));
424  }
425  
426  static int
427  vtnet_attach(device_t dev)
428  {
429  	struct vtnet_softc *sc;
430  	int error;
431  
432  	sc = device_get_softc(dev);
433  	sc->vtnet_dev = dev;
434  	virtio_set_feature_desc(dev, vtnet_feature_desc);
435  
436  	VTNET_CORE_LOCK_INIT(sc);
437  	callout_init_mtx(&sc->vtnet_tick_ch, VTNET_CORE_MTX(sc), 0);
438  	vtnet_load_tunables(sc);
439  
440  	vtnet_alloc_interface(sc);
441  	vtnet_setup_sysctl(sc);
442  
443  	error = vtnet_setup_features(sc);
444  	if (error) {
445  		device_printf(dev, "cannot setup features\n");
446  		goto fail;
447  	}
448  
449  	error = vtnet_alloc_rx_filters(sc);
450  	if (error) {
451  		device_printf(dev, "cannot allocate Rx filters\n");
452  		goto fail;
453  	}
454  
455  	error = vtnet_alloc_rxtx_queues(sc);
456  	if (error) {
457  		device_printf(dev, "cannot allocate queues\n");
458  		goto fail;
459  	}
460  
461  	error = vtnet_alloc_virtqueues(sc);
462  	if (error) {
463  		device_printf(dev, "cannot allocate virtqueues\n");
464  		goto fail;
465  	}
466  
467  	error = vtnet_setup_interface(sc);
468  	if (error) {
469  		device_printf(dev, "cannot setup interface\n");
470  		goto fail;
471  	}
472  
473  	error = virtio_setup_intr(dev, INTR_TYPE_NET);
474  	if (error) {
475  		device_printf(dev, "cannot setup interrupts\n");
476  		ether_ifdetach(sc->vtnet_ifp);
477  		goto fail;
478  	}
479  
480  #ifdef DEV_NETMAP
481  	vtnet_netmap_attach(sc);
482  #endif
483  	vtnet_start_taskqueues(sc);
484  
485  fail:
486  	if (error)
487  		vtnet_detach(dev);
488  
489  	return (error);
490  }
491  
492  static int
493  vtnet_detach(device_t dev)
494  {
495  	struct vtnet_softc *sc;
496  	if_t ifp;
497  
498  	sc = device_get_softc(dev);
499  	ifp = sc->vtnet_ifp;
500  
501  	if (device_is_attached(dev)) {
502  		VTNET_CORE_LOCK(sc);
503  		vtnet_stop(sc);
504  		VTNET_CORE_UNLOCK(sc);
505  
506  		callout_drain(&sc->vtnet_tick_ch);
507  		vtnet_drain_taskqueues(sc);
508  
509  		ether_ifdetach(ifp);
510  	}
511  
512  #ifdef DEV_NETMAP
513  	netmap_detach(ifp);
514  #endif
515  
516  	if (sc->vtnet_pfil != NULL) {
517  		pfil_head_unregister(sc->vtnet_pfil);
518  		sc->vtnet_pfil = NULL;
519  	}
520  
521  	vtnet_free_taskqueues(sc);
522  
523  	if (sc->vtnet_vlan_attach != NULL) {
524  		EVENTHANDLER_DEREGISTER(vlan_config, sc->vtnet_vlan_attach);
525  		sc->vtnet_vlan_attach = NULL;
526  	}
527  	if (sc->vtnet_vlan_detach != NULL) {
528  		EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vtnet_vlan_detach);
529  		sc->vtnet_vlan_detach = NULL;
530  	}
531  
532  	ifmedia_removeall(&sc->vtnet_media);
533  
534  	if (ifp != NULL) {
535  		if_free(ifp);
536  		sc->vtnet_ifp = NULL;
537  	}
538  
539  	vtnet_free_rxtx_queues(sc);
540  	vtnet_free_rx_filters(sc);
541  
542  	if (sc->vtnet_ctrl_vq != NULL)
543  		vtnet_free_ctrl_vq(sc);
544  
545  	VTNET_CORE_LOCK_DESTROY(sc);
546  
547  	return (0);
548  }
549  
550  static int
551  vtnet_suspend(device_t dev)
552  {
553  	struct vtnet_softc *sc;
554  
555  	sc = device_get_softc(dev);
556  
557  	VTNET_CORE_LOCK(sc);
558  	vtnet_stop(sc);
559  	sc->vtnet_flags |= VTNET_FLAG_SUSPENDED;
560  	VTNET_CORE_UNLOCK(sc);
561  
562  	return (0);
563  }
564  
565  static int
566  vtnet_resume(device_t dev)
567  {
568  	struct vtnet_softc *sc;
569  	if_t ifp;
570  
571  	sc = device_get_softc(dev);
572  	ifp = sc->vtnet_ifp;
573  
574  	VTNET_CORE_LOCK(sc);
575  	if (if_getflags(ifp) & IFF_UP)
576  		vtnet_init_locked(sc, 0);
577  	sc->vtnet_flags &= ~VTNET_FLAG_SUSPENDED;
578  	VTNET_CORE_UNLOCK(sc);
579  
580  	return (0);
581  }
582  
583  static int
584  vtnet_shutdown(device_t dev)
585  {
586  	/*
587  	 * Suspend already does all of what we need to
588  	 * do here; we just never expect to be resumed.
589  	 */
590  	return (vtnet_suspend(dev));
591  }
592  
593  static int
594  vtnet_attach_completed(device_t dev)
595  {
596  	struct vtnet_softc *sc;
597  
598  	sc = device_get_softc(dev);
599  
600  	VTNET_CORE_LOCK(sc);
601  	vtnet_attached_set_macaddr(sc);
602  	VTNET_CORE_UNLOCK(sc);
603  
604  	return (0);
605  }
606  
607  static int
608  vtnet_config_change(device_t dev)
609  {
610  	struct vtnet_softc *sc;
611  
612  	sc = device_get_softc(dev);
613  
614  	VTNET_CORE_LOCK(sc);
615  	vtnet_update_link_status(sc);
616  	if (sc->vtnet_link_active != 0)
617  		vtnet_tx_start_all(sc);
618  	VTNET_CORE_UNLOCK(sc);
619  
620  	return (0);
621  }
622  
623  static int
624  vtnet_negotiate_features(struct vtnet_softc *sc)
625  {
626  	device_t dev;
627  	uint64_t features, negotiated_features;
628  	int no_csum;
629  
630  	dev = sc->vtnet_dev;
631  	features = virtio_bus_is_modern(dev) ? VTNET_MODERN_FEATURES :
632  	    VTNET_LEGACY_FEATURES;
633  
634  	/*
635  	 * TSO and LRO are only available when their corresponding checksum
636  	 * offload feature is also negotiated.
637  	 */
638  	no_csum = vtnet_tunable_int(sc, "csum_disable", vtnet_csum_disable);
639  	if (no_csum)
640  		features &= ~(VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM);
641  	if (no_csum || vtnet_tunable_int(sc, "tso_disable", vtnet_tso_disable))
642  		features &= ~VTNET_TSO_FEATURES;
643  	if (no_csum || vtnet_tunable_int(sc, "lro_disable", vtnet_lro_disable))
644  		features &= ~VTNET_LRO_FEATURES;
645  
646  #ifndef VTNET_LEGACY_TX
647  	if (vtnet_tunable_int(sc, "mq_disable", vtnet_mq_disable))
648  		features &= ~VIRTIO_NET_F_MQ;
649  #else
650  	features &= ~VIRTIO_NET_F_MQ;
651  #endif
652  
653  	negotiated_features = virtio_negotiate_features(dev, features);
654  
655  	if (virtio_with_feature(dev, VIRTIO_NET_F_MTU)) {
656  		uint16_t mtu;
657  
658  		mtu = virtio_read_dev_config_2(dev,
659  		    offsetof(struct virtio_net_config, mtu));
660  		if (mtu < VTNET_MIN_MTU /* || mtu > VTNET_MAX_MTU */) {
661  			device_printf(dev, "Invalid MTU value: %d. "
662  			    "MTU feature disabled.\n", mtu);
663  			features &= ~VIRTIO_NET_F_MTU;
664  			negotiated_features =
665  			    virtio_negotiate_features(dev, features);
666  		}
667  	}
668  
669  	if (virtio_with_feature(dev, VIRTIO_NET_F_MQ)) {
670  		uint16_t npairs;
671  
672  		npairs = virtio_read_dev_config_2(dev,
673  		    offsetof(struct virtio_net_config, max_virtqueue_pairs));
674  		if (npairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
675  		    npairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX) {
676  			device_printf(dev, "Invalid max_virtqueue_pairs value: "
677  			    "%d. Multiqueue feature disabled.\n", npairs);
678  			features &= ~VIRTIO_NET_F_MQ;
679  			negotiated_features =
680  			    virtio_negotiate_features(dev, features);
681  		}
682  	}
683  
684  	if (virtio_with_feature(dev, VTNET_LRO_FEATURES) &&
685  	    virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF) == 0) {
686  		/*
687  		 * LRO without mergeable buffers requires special care. This
688  		 * is not ideal because every receive buffer must be large
689  		 * enough to hold the maximum TCP packet, the Ethernet header,
690  		 * and the header. This requires up to 34 descriptors with
691  		 * MCLBYTES clusters. If we do not have indirect descriptors,
692  		 * LRO is disabled since the virtqueue will not contain very
693  		 * many receive buffers.
694  		 */
695  		if (!virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) {
696  			device_printf(dev,
697  			    "Host LRO disabled since both mergeable buffers "
698  			    "and indirect descriptors were not negotiated\n");
699  			features &= ~VTNET_LRO_FEATURES;
700  			negotiated_features =
701  			    virtio_negotiate_features(dev, features);
702  		} else
703  			sc->vtnet_flags |= VTNET_FLAG_LRO_NOMRG;
704  	}
705  
706  	sc->vtnet_features = negotiated_features;
707  	sc->vtnet_negotiated_features = negotiated_features;
708  
709  	return (virtio_finalize_features(dev));
710  }
711  
712  static int
713  vtnet_setup_features(struct vtnet_softc *sc)
714  {
715  	device_t dev;
716  	int error;
717  
718  	dev = sc->vtnet_dev;
719  
720  	error = vtnet_negotiate_features(sc);
721  	if (error)
722  		return (error);
723  
724  	if (virtio_with_feature(dev, VIRTIO_F_VERSION_1))
725  		sc->vtnet_flags |= VTNET_FLAG_MODERN;
726  	if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC))
727  		sc->vtnet_flags |= VTNET_FLAG_INDIRECT;
728  	if (virtio_with_feature(dev, VIRTIO_RING_F_EVENT_IDX))
729  		sc->vtnet_flags |= VTNET_FLAG_EVENT_IDX;
730  
731  	if (virtio_with_feature(dev, VIRTIO_NET_F_MAC)) {
732  		/* This feature should always be negotiated. */
733  		sc->vtnet_flags |= VTNET_FLAG_MAC;
734  	}
735  
736  	if (virtio_with_feature(dev, VIRTIO_NET_F_MTU)) {
737  		sc->vtnet_max_mtu = virtio_read_dev_config_2(dev,
738  		    offsetof(struct virtio_net_config, mtu));
739  	} else
740  		sc->vtnet_max_mtu = VTNET_MAX_MTU;
741  
742  	if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) {
743  		sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS;
744  		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
745  	} else if (vtnet_modern(sc)) {
746  		/* This is identical to the mergeable header. */
747  		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_v1);
748  	} else
749  		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
750  
751  	if (vtnet_modern(sc) || sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS)
752  		sc->vtnet_rx_nsegs = VTNET_RX_SEGS_HDR_INLINE;
753  	else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
754  		sc->vtnet_rx_nsegs = VTNET_RX_SEGS_LRO_NOMRG;
755  	else
756  		sc->vtnet_rx_nsegs = VTNET_RX_SEGS_HDR_SEPARATE;
757  
758  	/*
759  	 * Favor "hardware" LRO if negotiated, but support software LRO as
760  	 * a fallback; there is usually little benefit (or worse) with both.
761  	 */
762  	if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) == 0 &&
763  	    virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6) == 0)
764  		sc->vtnet_flags |= VTNET_FLAG_SW_LRO;
765  
766  	if (virtio_with_feature(dev, VIRTIO_NET_F_GSO) ||
767  	    virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4) ||
768  	    virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
769  		sc->vtnet_tx_nsegs = VTNET_TX_SEGS_MAX;
770  	else
771  		sc->vtnet_tx_nsegs = VTNET_TX_SEGS_MIN;
772  
773  	sc->vtnet_req_vq_pairs = 1;
774  	sc->vtnet_max_vq_pairs = 1;
775  
776  	if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VQ)) {
777  		sc->vtnet_flags |= VTNET_FLAG_CTRL_VQ;
778  
779  		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX))
780  			sc->vtnet_flags |= VTNET_FLAG_CTRL_RX;
781  		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VLAN))
782  			sc->vtnet_flags |= VTNET_FLAG_VLAN_FILTER;
783  		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_MAC_ADDR))
784  			sc->vtnet_flags |= VTNET_FLAG_CTRL_MAC;
785  
786  		if (virtio_with_feature(dev, VIRTIO_NET_F_MQ)) {
787  			sc->vtnet_max_vq_pairs = virtio_read_dev_config_2(dev,
788  			    offsetof(struct virtio_net_config,
789  			    max_virtqueue_pairs));
790  		}
791  	}
792  
793  	if (sc->vtnet_max_vq_pairs > 1) {
794  		int req;
795  
796  		/*
797  		 * Limit the maximum number of requested queue pairs to the
798  		 * number of CPUs and the configured maximum.
799  		 */
800  		req = vtnet_tunable_int(sc, "mq_max_pairs", vtnet_mq_max_pairs);
801  		if (req < 0)
802  			req = 1;
803  		if (req == 0)
804  			req = mp_ncpus;
805  		if (req > sc->vtnet_max_vq_pairs)
806  			req = sc->vtnet_max_vq_pairs;
807  		if (req > mp_ncpus)
808  			req = mp_ncpus;
809  		if (req > 1) {
810  			sc->vtnet_req_vq_pairs = req;
811  			sc->vtnet_flags |= VTNET_FLAG_MQ;
812  		}
813  	}
814  
815  	return (0);
816  }
817  
818  static int
819  vtnet_init_rxq(struct vtnet_softc *sc, int id)
820  {
821  	struct vtnet_rxq *rxq;
822  
823  	rxq = &sc->vtnet_rxqs[id];
824  
825  	snprintf(rxq->vtnrx_name, sizeof(rxq->vtnrx_name), "%s-rx%d",
826  	    device_get_nameunit(sc->vtnet_dev), id);
827  	mtx_init(&rxq->vtnrx_mtx, rxq->vtnrx_name, NULL, MTX_DEF);
828  
829  	rxq->vtnrx_sc = sc;
830  	rxq->vtnrx_id = id;
831  
832  	rxq->vtnrx_sg = sglist_alloc(sc->vtnet_rx_nsegs, M_NOWAIT);
833  	if (rxq->vtnrx_sg == NULL)
834  		return (ENOMEM);
835  
836  #if defined(INET) || defined(INET6)
837  	if (vtnet_software_lro(sc)) {
838  		if (tcp_lro_init_args(&rxq->vtnrx_lro, sc->vtnet_ifp,
839  		    sc->vtnet_lro_entry_count, sc->vtnet_lro_mbufq_depth) != 0)
840  			return (ENOMEM);
841  	}
842  #endif
843  
844  	NET_TASK_INIT(&rxq->vtnrx_intrtask, 0, vtnet_rxq_tq_intr, rxq);
845  	rxq->vtnrx_tq = taskqueue_create(rxq->vtnrx_name, M_NOWAIT,
846  	    taskqueue_thread_enqueue, &rxq->vtnrx_tq);
847  
848  	return (rxq->vtnrx_tq == NULL ? ENOMEM : 0);
849  }
850  
851  static int
852  vtnet_init_txq(struct vtnet_softc *sc, int id)
853  {
854  	struct vtnet_txq *txq;
855  
856  	txq = &sc->vtnet_txqs[id];
857  
858  	snprintf(txq->vtntx_name, sizeof(txq->vtntx_name), "%s-tx%d",
859  	    device_get_nameunit(sc->vtnet_dev), id);
860  	mtx_init(&txq->vtntx_mtx, txq->vtntx_name, NULL, MTX_DEF);
861  
862  	txq->vtntx_sc = sc;
863  	txq->vtntx_id = id;
864  
865  	txq->vtntx_sg = sglist_alloc(sc->vtnet_tx_nsegs, M_NOWAIT);
866  	if (txq->vtntx_sg == NULL)
867  		return (ENOMEM);
868  
869  #ifndef VTNET_LEGACY_TX
870  	txq->vtntx_br = buf_ring_alloc(VTNET_DEFAULT_BUFRING_SIZE, M_DEVBUF,
871  	    M_NOWAIT, &txq->vtntx_mtx);
872  	if (txq->vtntx_br == NULL)
873  		return (ENOMEM);
874  
875  	TASK_INIT(&txq->vtntx_defrtask, 0, vtnet_txq_tq_deferred, txq);
876  #endif
877  	TASK_INIT(&txq->vtntx_intrtask, 0, vtnet_txq_tq_intr, txq);
878  	txq->vtntx_tq = taskqueue_create(txq->vtntx_name, M_NOWAIT,
879  	    taskqueue_thread_enqueue, &txq->vtntx_tq);
880  	if (txq->vtntx_tq == NULL)
881  		return (ENOMEM);
882  
883  	return (0);
884  }
885  
886  static int
887  vtnet_alloc_rxtx_queues(struct vtnet_softc *sc)
888  {
889  	int i, npairs, error;
890  
891  	npairs = sc->vtnet_max_vq_pairs;
892  
893  	sc->vtnet_rxqs = malloc(sizeof(struct vtnet_rxq) * npairs, M_DEVBUF,
894  	    M_NOWAIT | M_ZERO);
895  	sc->vtnet_txqs = malloc(sizeof(struct vtnet_txq) * npairs, M_DEVBUF,
896  	    M_NOWAIT | M_ZERO);
897  	if (sc->vtnet_rxqs == NULL || sc->vtnet_txqs == NULL)
898  		return (ENOMEM);
899  
900  	for (i = 0; i < npairs; i++) {
901  		error = vtnet_init_rxq(sc, i);
902  		if (error)
903  			return (error);
904  		error = vtnet_init_txq(sc, i);
905  		if (error)
906  			return (error);
907  	}
908  
909  	vtnet_set_rx_process_limit(sc);
910  	vtnet_setup_queue_sysctl(sc);
911  
912  	return (0);
913  }
914  
915  static void
916  vtnet_destroy_rxq(struct vtnet_rxq *rxq)
917  {
918  
919  	rxq->vtnrx_sc = NULL;
920  	rxq->vtnrx_id = -1;
921  
922  #if defined(INET) || defined(INET6)
923  	tcp_lro_free(&rxq->vtnrx_lro);
924  #endif
925  
926  	if (rxq->vtnrx_sg != NULL) {
927  		sglist_free(rxq->vtnrx_sg);
928  		rxq->vtnrx_sg = NULL;
929  	}
930  
931  	if (mtx_initialized(&rxq->vtnrx_mtx) != 0)
932  		mtx_destroy(&rxq->vtnrx_mtx);
933  }
934  
935  static void
936  vtnet_destroy_txq(struct vtnet_txq *txq)
937  {
938  
939  	txq->vtntx_sc = NULL;
940  	txq->vtntx_id = -1;
941  
942  	if (txq->vtntx_sg != NULL) {
943  		sglist_free(txq->vtntx_sg);
944  		txq->vtntx_sg = NULL;
945  	}
946  
947  #ifndef VTNET_LEGACY_TX
948  	if (txq->vtntx_br != NULL) {
949  		buf_ring_free(txq->vtntx_br, M_DEVBUF);
950  		txq->vtntx_br = NULL;
951  	}
952  #endif
953  
954  	if (mtx_initialized(&txq->vtntx_mtx) != 0)
955  		mtx_destroy(&txq->vtntx_mtx);
956  }
957  
958  static void
959  vtnet_free_rxtx_queues(struct vtnet_softc *sc)
960  {
961  	int i;
962  
963  	if (sc->vtnet_rxqs != NULL) {
964  		for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
965  			vtnet_destroy_rxq(&sc->vtnet_rxqs[i]);
966  		free(sc->vtnet_rxqs, M_DEVBUF);
967  		sc->vtnet_rxqs = NULL;
968  	}
969  
970  	if (sc->vtnet_txqs != NULL) {
971  		for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
972  			vtnet_destroy_txq(&sc->vtnet_txqs[i]);
973  		free(sc->vtnet_txqs, M_DEVBUF);
974  		sc->vtnet_txqs = NULL;
975  	}
976  }
977  
978  static int
979  vtnet_alloc_rx_filters(struct vtnet_softc *sc)
980  {
981  
982  	if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
983  		sc->vtnet_mac_filter = malloc(sizeof(struct vtnet_mac_filter),
984  		    M_DEVBUF, M_NOWAIT | M_ZERO);
985  		if (sc->vtnet_mac_filter == NULL)
986  			return (ENOMEM);
987  	}
988  
989  	if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
990  		sc->vtnet_vlan_filter = malloc(sizeof(uint32_t) *
991  		    VTNET_VLAN_FILTER_NWORDS, M_DEVBUF, M_NOWAIT | M_ZERO);
992  		if (sc->vtnet_vlan_filter == NULL)
993  			return (ENOMEM);
994  	}
995  
996  	return (0);
997  }
998  
999  static void
1000  vtnet_free_rx_filters(struct vtnet_softc *sc)
1001  {
1002  
1003  	if (sc->vtnet_mac_filter != NULL) {
1004  		free(sc->vtnet_mac_filter, M_DEVBUF);
1005  		sc->vtnet_mac_filter = NULL;
1006  	}
1007  
1008  	if (sc->vtnet_vlan_filter != NULL) {
1009  		free(sc->vtnet_vlan_filter, M_DEVBUF);
1010  		sc->vtnet_vlan_filter = NULL;
1011  	}
1012  }
1013  
1014  static int
1015  vtnet_alloc_virtqueues(struct vtnet_softc *sc)
1016  {
1017  	device_t dev;
1018  	struct vq_alloc_info *info;
1019  	struct vtnet_rxq *rxq;
1020  	struct vtnet_txq *txq;
1021  	int i, idx, nvqs, error;
1022  
1023  	dev = sc->vtnet_dev;
1024  
1025  	nvqs = sc->vtnet_max_vq_pairs * 2;
1026  	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ)
1027  		nvqs++;
1028  
1029  	info = malloc(sizeof(struct vq_alloc_info) * nvqs, M_TEMP, M_NOWAIT);
1030  	if (info == NULL)
1031  		return (ENOMEM);
1032  
1033  	for (i = 0, idx = 0; i < sc->vtnet_req_vq_pairs; i++, idx += 2) {
1034  		rxq = &sc->vtnet_rxqs[i];
1035  		VQ_ALLOC_INFO_INIT(&info[idx], sc->vtnet_rx_nsegs,
1036  		    vtnet_rx_vq_intr, rxq, &rxq->vtnrx_vq,
1037  		    "%s-rx%d", device_get_nameunit(dev), rxq->vtnrx_id);
1038  
1039  		txq = &sc->vtnet_txqs[i];
1040  		VQ_ALLOC_INFO_INIT(&info[idx+1], sc->vtnet_tx_nsegs,
1041  		    vtnet_tx_vq_intr, txq, &txq->vtntx_vq,
1042  		    "%s-tx%d", device_get_nameunit(dev), txq->vtntx_id);
1043  	}
1044  
1045  	/* These queues will not be used so allocate the minimum resources. */
1046  	for (/**/; i < sc->vtnet_max_vq_pairs; i++, idx += 2) {
1047  		rxq = &sc->vtnet_rxqs[i];
1048  		VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, rxq, &rxq->vtnrx_vq,
1049  		    "%s-rx%d", device_get_nameunit(dev), rxq->vtnrx_id);
1050  
1051  		txq = &sc->vtnet_txqs[i];
1052  		VQ_ALLOC_INFO_INIT(&info[idx+1], 0, NULL, txq, &txq->vtntx_vq,
1053  		    "%s-tx%d", device_get_nameunit(dev), txq->vtntx_id);
1054  	}
1055  
1056  	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
1057  		VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, NULL,
1058  		    &sc->vtnet_ctrl_vq, "%s ctrl", device_get_nameunit(dev));
1059  	}
1060  
1061  	error = virtio_alloc_virtqueues(dev, nvqs, info);
1062  	free(info, M_TEMP);
1063  
1064  	return (error);
1065  }
1066  
1067  static void
1068  vtnet_alloc_interface(struct vtnet_softc *sc)
1069  {
1070  	device_t dev;
1071  	if_t ifp;
1072  
1073  	dev = sc->vtnet_dev;
1074  
1075  	ifp = if_alloc(IFT_ETHER);
1076  	sc->vtnet_ifp = ifp;
1077  	if_setsoftc(ifp, sc);
1078  	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1079  }
1080  
1081  static int
1082  vtnet_setup_interface(struct vtnet_softc *sc)
1083  {
1084  	device_t dev;
1085  	struct pfil_head_args pa;
1086  	if_t ifp;
1087  
1088  	dev = sc->vtnet_dev;
1089  	ifp = sc->vtnet_ifp;
1090  
1091  	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
1092  	if_setbaudrate(ifp, IF_Gbps(10));
1093  	if_setinitfn(ifp, vtnet_init);
1094  	if_setioctlfn(ifp, vtnet_ioctl);
1095  	if_setgetcounterfn(ifp, vtnet_get_counter);
1096  #ifndef VTNET_LEGACY_TX
1097  	if_settransmitfn(ifp, vtnet_txq_mq_start);
1098  	if_setqflushfn(ifp, vtnet_qflush);
1099  #else
1100  	struct virtqueue *vq = sc->vtnet_txqs[0].vtntx_vq;
1101  	if_setstartfn(ifp, vtnet_start);
1102  	if_setsendqlen(ifp, virtqueue_size(vq) - 1);
1103  	if_setsendqready(ifp);
1104  #endif
1105  
1106  	vtnet_get_macaddr(sc);
1107  
1108  	if (virtio_with_feature(dev, VIRTIO_NET_F_STATUS))
1109  		if_setcapabilitiesbit(ifp, IFCAP_LINKSTATE, 0);
1110  
1111  	ifmedia_init(&sc->vtnet_media, 0, vtnet_ifmedia_upd, vtnet_ifmedia_sts);
1112  	ifmedia_add(&sc->vtnet_media, IFM_ETHER | IFM_AUTO, 0, NULL);
1113  	ifmedia_set(&sc->vtnet_media, IFM_ETHER | IFM_AUTO);
1114  
1115  	if (virtio_with_feature(dev, VIRTIO_NET_F_CSUM)) {
1116  		int gso;
1117  
1118  		if_setcapabilitiesbit(ifp, IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6, 0);
1119  
1120  		gso = virtio_with_feature(dev, VIRTIO_NET_F_GSO);
1121  		if (gso || virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4))
1122  			if_setcapabilitiesbit(ifp, IFCAP_TSO4, 0);
1123  		if (gso || virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
1124  			if_setcapabilitiesbit(ifp, IFCAP_TSO6, 0);
1125  		if (gso || virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN))
1126  			sc->vtnet_flags |= VTNET_FLAG_TSO_ECN;
1127  
1128  		if (if_getcapabilities(ifp) & (IFCAP_TSO4 | IFCAP_TSO6)) {
1129  			int tso_maxlen;
1130  
1131  			if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTSO, 0);
1132  
1133  			tso_maxlen = vtnet_tunable_int(sc, "tso_maxlen",
1134  			    vtnet_tso_maxlen);
1135  			if_sethwtsomax(ifp, tso_maxlen -
1136  			    (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN));
1137  			if_sethwtsomaxsegcount(ifp, sc->vtnet_tx_nsegs - 1);
1138  			if_sethwtsomaxsegsize(ifp, PAGE_SIZE);
1139  		}
1140  	}
1141  
1142  	if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM)) {
1143  		if_setcapabilitiesbit(ifp, IFCAP_RXCSUM, 0);
1144  #ifdef notyet
1145  		/* BMV: Rx checksums not distinguished between IPv4 and IPv6. */
1146  		if_setcapabilitiesbit(ifp, IFCAP_RXCSUM_IPV6, 0);
1147  #endif
1148  
1149  		if (vtnet_tunable_int(sc, "fixup_needs_csum",
1150  		    vtnet_fixup_needs_csum) != 0)
1151  			sc->vtnet_flags |= VTNET_FLAG_FIXUP_NEEDS_CSUM;
1152  
1153  		/* Support either "hardware" or software LRO. */
1154  		if_setcapabilitiesbit(ifp, IFCAP_LRO, 0);
1155  	}
1156  
1157  	if (if_getcapabilities(ifp) & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6)) {
1158  		/*
1159  		 * VirtIO does not support VLAN tagging, but we can fake
1160  		 * it by inserting and removing the 802.1Q header during
1161  		 * transmit and receive. We are then able to do checksum
1162  		 * offloading of VLAN frames.
1163  		 */
1164  		if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM, 0);
1165  	}
1166  
1167  	if (sc->vtnet_max_mtu >= ETHERMTU_JUMBO)
1168  		if_setcapabilitiesbit(ifp, IFCAP_JUMBO_MTU, 0);
1169  	if_setcapabilitiesbit(ifp, IFCAP_VLAN_MTU, 0);
1170  
1171  	/*
1172  	 * Capabilities after here are not enabled by default.
1173  	 */
1174  	if_setcapenable(ifp, if_getcapabilities(ifp));
1175  
1176  	if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
1177  		if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER, 0);
1178  
1179  		sc->vtnet_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
1180  		    vtnet_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
1181  		sc->vtnet_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
1182  		    vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
1183  	}
1184  
1185  	ether_ifattach(ifp, sc->vtnet_hwaddr);
1186  
1187  	/* Tell the upper layer(s) we support long frames. */
1188  	if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
1189  
1190  	DEBUGNET_SET(ifp, vtnet);
1191  
1192  	pa.pa_version = PFIL_VERSION;
1193  	pa.pa_flags = PFIL_IN;
1194  	pa.pa_type = PFIL_TYPE_ETHERNET;
1195  	pa.pa_headname = if_name(ifp);
1196  	sc->vtnet_pfil = pfil_head_register(&pa);
1197  
1198  	return (0);
1199  }
1200  
1201  static int
1202  vtnet_rx_cluster_size(struct vtnet_softc *sc, int mtu)
1203  {
1204  	int framesz;
1205  
1206  	if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS)
1207  		return (MJUMPAGESIZE);
1208  	else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
1209  		return (MCLBYTES);
1210  
1211  	/*
1212  	 * Try to scale the receive mbuf cluster size from the MTU. We
1213  	 * could also use the VQ size to influence the selected size,
1214  	 * but that would only matter for very small queues.
1215  	 */
1216  	if (vtnet_modern(sc)) {
1217  		MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr_v1));
1218  		framesz = sizeof(struct virtio_net_hdr_v1);
1219  	} else
1220  		framesz = sizeof(struct vtnet_rx_header);
1221  	framesz += sizeof(struct ether_vlan_header) + mtu;
1222  	/*
1223  	 * Account for the offsetting we'll do elsewhere so we allocate the
1224  	 * right size for the mtu.
1225  	 */
1226  	if (VTNET_ETHER_ALIGN != 0 && sc->vtnet_hdr_size % 4 == 0) {
1227  		framesz += VTNET_ETHER_ALIGN;
1228  	}
1229  
1230  	if (framesz <= MCLBYTES)
1231  		return (MCLBYTES);
1232  	else if (framesz <= MJUMPAGESIZE)
1233  		return (MJUMPAGESIZE);
1234  	else if (framesz <= MJUM9BYTES)
1235  		return (MJUM9BYTES);
1236  
1237  	/* Sane default; avoid 16KB clusters. */
1238  	return (MCLBYTES);
1239  }
1240  
1241  static int
1242  vtnet_ioctl_mtu(struct vtnet_softc *sc, u_int mtu)
1243  {
1244  	if_t ifp;
1245  	int clustersz;
1246  
1247  	ifp = sc->vtnet_ifp;
1248  	VTNET_CORE_LOCK_ASSERT(sc);
1249  
1250  	if (if_getmtu(ifp) == mtu)
1251  		return (0);
1252  	else if (mtu < ETHERMIN || mtu > sc->vtnet_max_mtu)
1253  		return (EINVAL);
1254  
1255  	if_setmtu(ifp, mtu);
1256  	clustersz = vtnet_rx_cluster_size(sc, mtu);
1257  
1258  	if (clustersz != sc->vtnet_rx_clustersz &&
1259  	    if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1260  		if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
1261  		vtnet_init_locked(sc, 0);
1262  	}
1263  
1264  	return (0);
1265  }
1266  
1267  static int
1268  vtnet_ioctl_ifflags(struct vtnet_softc *sc)
1269  {
1270  	if_t ifp;
1271  	int drv_running;
1272  
1273  	ifp = sc->vtnet_ifp;
1274  	drv_running = (if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0;
1275  
1276  	VTNET_CORE_LOCK_ASSERT(sc);
1277  
1278  	if ((if_getflags(ifp) & IFF_UP) == 0) {
1279  		if (drv_running)
1280  			vtnet_stop(sc);
1281  		goto out;
1282  	}
1283  
1284  	if (!drv_running) {
1285  		vtnet_init_locked(sc, 0);
1286  		goto out;
1287  	}
1288  
1289  	if ((if_getflags(ifp) ^ sc->vtnet_if_flags) &
1290  	    (IFF_PROMISC | IFF_ALLMULTI)) {
1291  		if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX)
1292  			vtnet_rx_filter(sc);
1293  		else {
1294  			/*
1295  			 * We don't support filtering out multicast, so
1296  			 * ALLMULTI is always set.
1297  			 */
1298  			if_setflagbits(ifp, IFF_ALLMULTI, 0);
1299  			if_setflagbits(ifp, IFF_PROMISC, 0);
1300  		}
1301  	}
1302  
1303  out:
1304  	sc->vtnet_if_flags = if_getflags(ifp);
1305  	return (0);
1306  }
1307  
1308  static int
1309  vtnet_ioctl_multi(struct vtnet_softc *sc)
1310  {
1311  	if_t ifp;
1312  
1313  	ifp = sc->vtnet_ifp;
1314  
1315  	VTNET_CORE_LOCK_ASSERT(sc);
1316  
1317  	if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX &&
1318  	    if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1319  		vtnet_rx_filter_mac(sc);
1320  
1321  	return (0);
1322  }
1323  
1324  static int
1325  vtnet_ioctl_ifcap(struct vtnet_softc *sc, struct ifreq *ifr)
1326  {
1327  	if_t ifp;
1328  	int mask, reinit, update;
1329  
1330  	ifp = sc->vtnet_ifp;
1331  	mask = (ifr->ifr_reqcap & if_getcapabilities(ifp)) ^ if_getcapenable(ifp);
1332  	reinit = update = 0;
1333  
1334  	VTNET_CORE_LOCK_ASSERT(sc);
1335  
1336  	if (mask & IFCAP_TXCSUM)
1337  		if_togglecapenable(ifp, IFCAP_TXCSUM);
1338  	if (mask & IFCAP_TXCSUM_IPV6)
1339  		if_togglecapenable(ifp, IFCAP_TXCSUM_IPV6);
1340  	if (mask & IFCAP_TSO4)
1341  		if_togglecapenable(ifp, IFCAP_TSO4);
1342  	if (mask & IFCAP_TSO6)
1343  		if_togglecapenable(ifp, IFCAP_TSO6);
1344  
1345  	if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO)) {
1346  		/*
1347  		 * These Rx features require the negotiated features to
1348  		 * be updated. Avoid a full reinit if possible.
1349  		 */
1350  		if (sc->vtnet_features & VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)
1351  			update = 1;
1352  		else
1353  			reinit = 1;
1354  
1355  		/* BMV: Avoid needless renegotiation for just software LRO. */
1356  		if ((mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO)) ==
1357  		    IFCAP_LRO && vtnet_software_lro(sc))
1358  			reinit = update = 0;
1359  
1360  		if (mask & IFCAP_RXCSUM)
1361  			if_togglecapenable(ifp, IFCAP_RXCSUM);
1362  		if (mask & IFCAP_RXCSUM_IPV6)
1363  			if_togglecapenable(ifp, IFCAP_RXCSUM_IPV6);
1364  		if (mask & IFCAP_LRO)
1365  			if_togglecapenable(ifp, IFCAP_LRO);
1366  
1367  		/*
1368  		 * VirtIO does not distinguish between IPv4 and IPv6 checksums
1369  		 * so treat them as a pair. Guest TSO (LRO) requires receive
1370  		 * checksums.
1371  		 */
1372  		if (if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) {
1373  			if_setcapenablebit(ifp, IFCAP_RXCSUM, 0);
1374  #ifdef notyet
1375  			if_setcapenablebit(ifp, IFCAP_RXCSUM_IPV6, 0);
1376  #endif
1377  		} else
1378  			if_setcapenablebit(ifp, 0,
1379  			    (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO));
1380  	}
1381  
1382  	if (mask & IFCAP_VLAN_HWFILTER) {
1383  		/* These Rx features require renegotiation. */
1384  		reinit = 1;
1385  
1386  		if (mask & IFCAP_VLAN_HWFILTER)
1387  			if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER);
1388  	}
1389  
1390  	if (mask & IFCAP_VLAN_HWTSO)
1391  		if_togglecapenable(ifp, IFCAP_VLAN_HWTSO);
1392  	if (mask & IFCAP_VLAN_HWTAGGING)
1393  		if_togglecapenable(ifp, IFCAP_VLAN_HWTAGGING);
1394  
1395  	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1396  		if (reinit) {
1397  			if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
1398  			vtnet_init_locked(sc, 0);
1399  		} else if (update)
1400  			vtnet_update_rx_offloads(sc);
1401  	}
1402  
1403  	return (0);
1404  }
1405  
1406  static int
1407  vtnet_ioctl(if_t ifp, u_long cmd, caddr_t data)
1408  {
1409  	struct vtnet_softc *sc;
1410  	struct ifreq *ifr;
1411  	int error;
1412  
1413  	sc = if_getsoftc(ifp);
1414  	ifr = (struct ifreq *) data;
1415  	error = 0;
1416  
1417  	switch (cmd) {
1418  	case SIOCSIFMTU:
1419  		VTNET_CORE_LOCK(sc);
1420  		error = vtnet_ioctl_mtu(sc, ifr->ifr_mtu);
1421  		VTNET_CORE_UNLOCK(sc);
1422  		break;
1423  
1424  	case SIOCSIFFLAGS:
1425  		VTNET_CORE_LOCK(sc);
1426  		error = vtnet_ioctl_ifflags(sc);
1427  		VTNET_CORE_UNLOCK(sc);
1428  		break;
1429  
1430  	case SIOCADDMULTI:
1431  	case SIOCDELMULTI:
1432  		VTNET_CORE_LOCK(sc);
1433  		error = vtnet_ioctl_multi(sc);
1434  		VTNET_CORE_UNLOCK(sc);
1435  		break;
1436  
1437  	case SIOCSIFMEDIA:
1438  	case SIOCGIFMEDIA:
1439  		error = ifmedia_ioctl(ifp, ifr, &sc->vtnet_media, cmd);
1440  		break;
1441  
1442  	case SIOCSIFCAP:
1443  		VTNET_CORE_LOCK(sc);
1444  		error = vtnet_ioctl_ifcap(sc, ifr);
1445  		VTNET_CORE_UNLOCK(sc);
1446  		VLAN_CAPABILITIES(ifp);
1447  		break;
1448  
1449  	default:
1450  		error = ether_ioctl(ifp, cmd, data);
1451  		break;
1452  	}
1453  
1454  	VTNET_CORE_LOCK_ASSERT_NOTOWNED(sc);
1455  
1456  	return (error);
1457  }
1458  
1459  static int
1460  vtnet_rxq_populate(struct vtnet_rxq *rxq)
1461  {
1462  	struct virtqueue *vq;
1463  	int nbufs, error;
1464  
1465  #ifdef DEV_NETMAP
1466  	error = vtnet_netmap_rxq_populate(rxq);
1467  	if (error >= 0)
1468  		return (error);
1469  #endif  /* DEV_NETMAP */
1470  
1471  	vq = rxq->vtnrx_vq;
1472  	error = ENOSPC;
1473  
1474  	for (nbufs = 0; !virtqueue_full(vq); nbufs++) {
1475  		error = vtnet_rxq_new_buf(rxq);
1476  		if (error)
1477  			break;
1478  	}
1479  
1480  	if (nbufs > 0) {
1481  		virtqueue_notify(vq);
1482  		/*
1483  		 * EMSGSIZE signifies the virtqueue did not have enough
1484  		 * entries available to hold the last mbuf. This is not
1485  		 * an error.
1486  		 */
1487  		if (error == EMSGSIZE)
1488  			error = 0;
1489  	}
1490  
1491  	return (error);
1492  }
1493  
1494  static void
1495  vtnet_rxq_free_mbufs(struct vtnet_rxq *rxq)
1496  {
1497  	struct virtqueue *vq;
1498  	struct mbuf *m;
1499  	int last;
1500  #ifdef DEV_NETMAP
1501  	struct netmap_kring *kring = netmap_kring_on(NA(rxq->vtnrx_sc->vtnet_ifp),
1502  							rxq->vtnrx_id, NR_RX);
1503  #else  /* !DEV_NETMAP */
1504  	void *kring = NULL;
1505  #endif /* !DEV_NETMAP */
1506  
1507  	vq = rxq->vtnrx_vq;
1508  	last = 0;
1509  
1510  	while ((m = virtqueue_drain(vq, &last)) != NULL) {
1511  		if (kring == NULL)
1512  			m_freem(m);
1513  	}
1514  
1515  	KASSERT(virtqueue_empty(vq),
1516  	    ("%s: mbufs remaining in rx queue %p", __func__, rxq));
1517  }
1518  
1519  static struct mbuf *
1520  vtnet_rx_alloc_buf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp)
1521  {
1522  	struct mbuf *m_head, *m_tail, *m;
1523  	int i, size;
1524  
1525  	m_head = NULL;
1526  	size = sc->vtnet_rx_clustersz;
1527  
1528  	KASSERT(nbufs == 1 || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
1529  	    ("%s: mbuf %d chain requested without LRO_NOMRG", __func__, nbufs));
1530  
1531  	for (i = 0; i < nbufs; i++) {
1532  		m = m_getjcl(M_NOWAIT, MT_DATA, i == 0 ? M_PKTHDR : 0, size);
1533  		if (m == NULL) {
1534  			sc->vtnet_stats.mbuf_alloc_failed++;
1535  			m_freem(m_head);
1536  			return (NULL);
1537  		}
1538  
1539  		m->m_len = size;
1540  		/*
1541  		 * Need to offset the mbuf if the header we're going to add
1542  		 * will misalign.
1543  		 */
1544  		if (VTNET_ETHER_ALIGN != 0 && sc->vtnet_hdr_size % 4 == 0) {
1545  			m_adj(m, VTNET_ETHER_ALIGN);
1546  		}
1547  		if (m_head != NULL) {
1548  			m_tail->m_next = m;
1549  			m_tail = m;
1550  		} else
1551  			m_head = m_tail = m;
1552  	}
1553  
1554  	if (m_tailp != NULL)
1555  		*m_tailp = m_tail;
1556  
1557  	return (m_head);
1558  }
1559  
1560  /*
1561   * Slow path for when LRO without mergeable buffers is negotiated.
1562   */
1563  static int
1564  vtnet_rxq_replace_lro_nomrg_buf(struct vtnet_rxq *rxq, struct mbuf *m0,
1565      int len0)
1566  {
1567  	struct vtnet_softc *sc;
1568  	struct mbuf *m, *m_prev, *m_new, *m_tail;
1569  	int len, clustersz, nreplace, error;
1570  
1571  	sc = rxq->vtnrx_sc;
1572  	clustersz = sc->vtnet_rx_clustersz;
1573  	/*
1574  	 * Need to offset the mbuf if the header we're going to add will
1575  	 * misalign, account for that here.
1576  	 */
1577  	if (VTNET_ETHER_ALIGN != 0 && sc->vtnet_hdr_size % 4 == 0)
1578  		clustersz -= VTNET_ETHER_ALIGN;
1579  
1580  	m_prev = NULL;
1581  	m_tail = NULL;
1582  	nreplace = 0;
1583  
1584  	m = m0;
1585  	len = len0;
1586  
1587  	/*
1588  	 * Since these mbuf chains are so large, avoid allocating a complete
1589  	 * replacement when the received frame did not consume the entire
1590  	 * chain. Unused mbufs are moved to the tail of the replacement mbuf.
1591  	 */
1592  	while (len > 0) {
1593  		if (m == NULL) {
1594  			sc->vtnet_stats.rx_frame_too_large++;
1595  			return (EMSGSIZE);
1596  		}
1597  
1598  		/*
1599  		 * Every mbuf should have the expected cluster size since that
1600  		 * is also used to allocate the replacements.
1601  		 */
1602  		KASSERT(m->m_len == clustersz,
1603  		    ("%s: mbuf size %d not expected cluster size %d", __func__,
1604  		    m->m_len, clustersz));
1605  
1606  		m->m_len = MIN(m->m_len, len);
1607  		len -= m->m_len;
1608  
1609  		m_prev = m;
1610  		m = m->m_next;
1611  		nreplace++;
1612  	}
1613  
1614  	KASSERT(nreplace > 0 && nreplace <= sc->vtnet_rx_nmbufs,
1615  	    ("%s: invalid replacement mbuf count %d max %d", __func__,
1616  	    nreplace, sc->vtnet_rx_nmbufs));
1617  
1618  	m_new = vtnet_rx_alloc_buf(sc, nreplace, &m_tail);
1619  	if (m_new == NULL) {
1620  		m_prev->m_len = clustersz;
1621  		return (ENOBUFS);
1622  	}
1623  
1624  	/*
1625  	 * Move any unused mbufs from the received mbuf chain onto the
1626  	 * end of the replacement chain.
1627  	 */
1628  	if (m_prev->m_next != NULL) {
1629  		m_tail->m_next = m_prev->m_next;
1630  		m_prev->m_next = NULL;
1631  	}
1632  
1633  	error = vtnet_rxq_enqueue_buf(rxq, m_new);
1634  	if (error) {
1635  		/*
1636  		 * The replacement is suppose to be an copy of the one
1637  		 * dequeued so this is a very unexpected error.
1638  		 *
1639  		 * Restore the m0 chain to the original state if it was
1640  		 * modified so we can then discard it.
1641  		 */
1642  		if (m_tail->m_next != NULL) {
1643  			m_prev->m_next = m_tail->m_next;
1644  			m_tail->m_next = NULL;
1645  		}
1646  		m_prev->m_len = clustersz;
1647  		sc->vtnet_stats.rx_enq_replacement_failed++;
1648  		m_freem(m_new);
1649  	}
1650  
1651  	return (error);
1652  }
1653  
1654  static int
1655  vtnet_rxq_replace_buf(struct vtnet_rxq *rxq, struct mbuf *m, int len)
1656  {
1657  	struct vtnet_softc *sc;
1658  	struct mbuf *m_new;
1659  	int error;
1660  
1661  	sc = rxq->vtnrx_sc;
1662  
1663  	if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
1664  		return (vtnet_rxq_replace_lro_nomrg_buf(rxq, m, len));
1665  
1666  	MPASS(m->m_next == NULL);
1667  	if (m->m_len < len)
1668  		return (EMSGSIZE);
1669  
1670  	m_new = vtnet_rx_alloc_buf(sc, 1, NULL);
1671  	if (m_new == NULL)
1672  		return (ENOBUFS);
1673  
1674  	error = vtnet_rxq_enqueue_buf(rxq, m_new);
1675  	if (error) {
1676  		sc->vtnet_stats.rx_enq_replacement_failed++;
1677  		m_freem(m_new);
1678  	} else
1679  		m->m_len = len;
1680  
1681  	return (error);
1682  }
1683  
1684  static int
1685  vtnet_rxq_enqueue_buf(struct vtnet_rxq *rxq, struct mbuf *m)
1686  {
1687  	struct vtnet_softc *sc;
1688  	struct sglist *sg;
1689  	int header_inlined, error;
1690  
1691  	sc = rxq->vtnrx_sc;
1692  	sg = rxq->vtnrx_sg;
1693  
1694  	KASSERT(m->m_next == NULL || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
1695  	    ("%s: mbuf chain without LRO_NOMRG", __func__));
1696  	VTNET_RXQ_LOCK_ASSERT(rxq);
1697  
1698  	sglist_reset(sg);
1699  	header_inlined = vtnet_modern(sc) ||
1700  	    (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) != 0; /* TODO: ANY_LAYOUT */
1701  
1702  	/*
1703  	 * Note: The mbuf has been already adjusted when we allocate it if we
1704  	 * have to do strict alignment.
1705  	 */
1706  	if (header_inlined)
1707  		error = sglist_append_mbuf(sg, m);
1708  	else {
1709  		struct vtnet_rx_header *rxhdr =
1710  		    mtod(m, struct vtnet_rx_header *);
1711  		MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr));
1712  
1713  		/* Append the header and remaining mbuf data. */
1714  		error = sglist_append(sg, &rxhdr->vrh_hdr, sc->vtnet_hdr_size);
1715  		if (error)
1716  			return (error);
1717  		error = sglist_append(sg, &rxhdr[1],
1718  		    m->m_len - sizeof(struct vtnet_rx_header));
1719  		if (error)
1720  			return (error);
1721  
1722  		if (m->m_next != NULL)
1723  			error = sglist_append_mbuf(sg, m->m_next);
1724  	}
1725  
1726  	if (error)
1727  		return (error);
1728  
1729  	return (virtqueue_enqueue(rxq->vtnrx_vq, m, sg, 0, sg->sg_nseg));
1730  }
1731  
1732  static int
1733  vtnet_rxq_new_buf(struct vtnet_rxq *rxq)
1734  {
1735  	struct vtnet_softc *sc;
1736  	struct mbuf *m;
1737  	int error;
1738  
1739  	sc = rxq->vtnrx_sc;
1740  
1741  	m = vtnet_rx_alloc_buf(sc, sc->vtnet_rx_nmbufs, NULL);
1742  	if (m == NULL)
1743  		return (ENOBUFS);
1744  
1745  	error = vtnet_rxq_enqueue_buf(rxq, m);
1746  	if (error)
1747  		m_freem(m);
1748  
1749  	return (error);
1750  }
1751  
1752  static int
1753  vtnet_rxq_csum_needs_csum(struct vtnet_rxq *rxq, struct mbuf *m, uint16_t etype,
1754      int hoff, struct virtio_net_hdr *hdr)
1755  {
1756  	struct vtnet_softc *sc;
1757  	int error;
1758  
1759  	sc = rxq->vtnrx_sc;
1760  
1761  	/*
1762  	 * NEEDS_CSUM corresponds to Linux's CHECKSUM_PARTIAL, but FreeBSD does
1763  	 * not have an analogous CSUM flag. The checksum has been validated,
1764  	 * but is incomplete (TCP/UDP pseudo header).
1765  	 *
1766  	 * The packet is likely from another VM on the same host that itself
1767  	 * performed checksum offloading so Tx/Rx is basically a memcpy and
1768  	 * the checksum has little value.
1769  	 *
1770  	 * Default to receiving the packet as-is for performance reasons, but
1771  	 * this can cause issues if the packet is to be forwarded because it
1772  	 * does not contain a valid checksum. This patch may be helpful:
1773  	 * https://reviews.freebsd.org/D6611. In the meantime, have the driver
1774  	 * compute the checksum if requested.
1775  	 *
1776  	 * BMV: Need to add an CSUM_PARTIAL flag?
1777  	 */
1778  	if ((sc->vtnet_flags & VTNET_FLAG_FIXUP_NEEDS_CSUM) == 0) {
1779  		error = vtnet_rxq_csum_data_valid(rxq, m, etype, hoff, hdr);
1780  		return (error);
1781  	}
1782  
1783  	/*
1784  	 * Compute the checksum in the driver so the packet will contain a
1785  	 * valid checksum. The checksum is at csum_offset from csum_start.
1786  	 */
1787  	switch (etype) {
1788  #if defined(INET) || defined(INET6)
1789  	case ETHERTYPE_IP:
1790  	case ETHERTYPE_IPV6: {
1791  		int csum_off, csum_end;
1792  		uint16_t csum;
1793  
1794  		csum_off = hdr->csum_start + hdr->csum_offset;
1795  		csum_end = csum_off + sizeof(uint16_t);
1796  
1797  		/* Assume checksum will be in the first mbuf. */
1798  		if (m->m_len < csum_end || m->m_pkthdr.len < csum_end)
1799  			return (1);
1800  
1801  		/*
1802  		 * Like in_delayed_cksum()/in6_delayed_cksum(), compute the
1803  		 * checksum and write it at the specified offset. We could
1804  		 * try to verify the packet: csum_start should probably
1805  		 * correspond to the start of the TCP/UDP header.
1806  		 *
1807  		 * BMV: Need to properly handle UDP with zero checksum. Is
1808  		 * the IPv4 header checksum implicitly validated?
1809  		 */
1810  		csum = in_cksum_skip(m, m->m_pkthdr.len, hdr->csum_start);
1811  		*(uint16_t *)(mtodo(m, csum_off)) = csum;
1812  		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1813  		m->m_pkthdr.csum_data = 0xFFFF;
1814  		break;
1815  	}
1816  #endif
1817  	default:
1818  		sc->vtnet_stats.rx_csum_bad_ethtype++;
1819  		return (1);
1820  	}
1821  
1822  	return (0);
1823  }
1824  
1825  static int
1826  vtnet_rxq_csum_data_valid(struct vtnet_rxq *rxq, struct mbuf *m,
1827      uint16_t etype, int hoff, struct virtio_net_hdr *hdr __unused)
1828  {
1829  #if 0
1830  	struct vtnet_softc *sc;
1831  #endif
1832  	int protocol;
1833  
1834  #if 0
1835  	sc = rxq->vtnrx_sc;
1836  #endif
1837  
1838  	switch (etype) {
1839  #if defined(INET)
1840  	case ETHERTYPE_IP:
1841  		if (__predict_false(m->m_len < hoff + sizeof(struct ip)))
1842  			protocol = IPPROTO_DONE;
1843  		else {
1844  			struct ip *ip = (struct ip *)(m->m_data + hoff);
1845  			protocol = ip->ip_p;
1846  		}
1847  		break;
1848  #endif
1849  #if defined(INET6)
1850  	case ETHERTYPE_IPV6:
1851  		if (__predict_false(m->m_len < hoff + sizeof(struct ip6_hdr))
1852  		    || ip6_lasthdr(m, hoff, IPPROTO_IPV6, &protocol) < 0)
1853  			protocol = IPPROTO_DONE;
1854  		break;
1855  #endif
1856  	default:
1857  		protocol = IPPROTO_DONE;
1858  		break;
1859  	}
1860  
1861  	switch (protocol) {
1862  	case IPPROTO_TCP:
1863  	case IPPROTO_UDP:
1864  		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1865  		m->m_pkthdr.csum_data = 0xFFFF;
1866  		break;
1867  	default:
1868  		/*
1869  		 * FreeBSD does not support checksum offloading of this
1870  		 * protocol. Let the stack re-verify the checksum later
1871  		 * if the protocol is supported.
1872  		 */
1873  #if 0
1874  		if_printf(sc->vtnet_ifp,
1875  		    "%s: checksum offload of unsupported protocol "
1876  		    "etype=%#x protocol=%d csum_start=%d csum_offset=%d\n",
1877  		    __func__, etype, protocol, hdr->csum_start,
1878  		    hdr->csum_offset);
1879  #endif
1880  		break;
1881  	}
1882  
1883  	return (0);
1884  }
1885  
1886  static int
1887  vtnet_rxq_csum(struct vtnet_rxq *rxq, struct mbuf *m,
1888      struct virtio_net_hdr *hdr)
1889  {
1890  	const struct ether_header *eh;
1891  	int hoff;
1892  	uint16_t etype;
1893  
1894  	eh = mtod(m, const struct ether_header *);
1895  	etype = ntohs(eh->ether_type);
1896  	if (etype == ETHERTYPE_VLAN) {
1897  		/* TODO BMV: Handle QinQ. */
1898  		const struct ether_vlan_header *evh =
1899  		    mtod(m, const struct ether_vlan_header *);
1900  		etype = ntohs(evh->evl_proto);
1901  		hoff = sizeof(struct ether_vlan_header);
1902  	} else
1903  		hoff = sizeof(struct ether_header);
1904  
1905  	if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
1906  		return (vtnet_rxq_csum_needs_csum(rxq, m, etype, hoff, hdr));
1907  	else /* VIRTIO_NET_HDR_F_DATA_VALID */
1908  		return (vtnet_rxq_csum_data_valid(rxq, m, etype, hoff, hdr));
1909  }
1910  
1911  static void
1912  vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *rxq, int nbufs)
1913  {
1914  	struct mbuf *m;
1915  
1916  	while (--nbufs > 0) {
1917  		m = virtqueue_dequeue(rxq->vtnrx_vq, NULL);
1918  		if (m == NULL)
1919  			break;
1920  		vtnet_rxq_discard_buf(rxq, m);
1921  	}
1922  }
1923  
1924  static void
1925  vtnet_rxq_discard_buf(struct vtnet_rxq *rxq, struct mbuf *m)
1926  {
1927  	int error __diagused;
1928  
1929  	/*
1930  	 * Requeue the discarded mbuf. This should always be successful
1931  	 * since it was just dequeued.
1932  	 */
1933  	error = vtnet_rxq_enqueue_buf(rxq, m);
1934  	KASSERT(error == 0,
1935  	    ("%s: cannot requeue discarded mbuf %d", __func__, error));
1936  }
1937  
1938  static int
1939  vtnet_rxq_merged_eof(struct vtnet_rxq *rxq, struct mbuf *m_head, int nbufs)
1940  {
1941  	struct vtnet_softc *sc;
1942  	struct virtqueue *vq;
1943  	struct mbuf *m_tail;
1944  
1945  	sc = rxq->vtnrx_sc;
1946  	vq = rxq->vtnrx_vq;
1947  	m_tail = m_head;
1948  
1949  	while (--nbufs > 0) {
1950  		struct mbuf *m;
1951  		uint32_t len;
1952  
1953  		m = virtqueue_dequeue(vq, &len);
1954  		if (m == NULL) {
1955  			rxq->vtnrx_stats.vrxs_ierrors++;
1956  			goto fail;
1957  		}
1958  
1959  		if (vtnet_rxq_new_buf(rxq) != 0) {
1960  			rxq->vtnrx_stats.vrxs_iqdrops++;
1961  			vtnet_rxq_discard_buf(rxq, m);
1962  			if (nbufs > 1)
1963  				vtnet_rxq_discard_merged_bufs(rxq, nbufs);
1964  			goto fail;
1965  		}
1966  
1967  		if (m->m_len < len)
1968  			len = m->m_len;
1969  
1970  		m->m_len = len;
1971  		m->m_flags &= ~M_PKTHDR;
1972  
1973  		m_head->m_pkthdr.len += len;
1974  		m_tail->m_next = m;
1975  		m_tail = m;
1976  	}
1977  
1978  	return (0);
1979  
1980  fail:
1981  	sc->vtnet_stats.rx_mergeable_failed++;
1982  	m_freem(m_head);
1983  
1984  	return (1);
1985  }
1986  
1987  #if defined(INET) || defined(INET6)
1988  static int
1989  vtnet_lro_rx(struct vtnet_rxq *rxq, struct mbuf *m)
1990  {
1991  	struct lro_ctrl *lro;
1992  
1993  	lro = &rxq->vtnrx_lro;
1994  
1995  	if (lro->lro_mbuf_max != 0) {
1996  		tcp_lro_queue_mbuf(lro, m);
1997  		return (0);
1998  	}
1999  
2000  	return (tcp_lro_rx(lro, m, 0));
2001  }
2002  #endif
2003  
2004  static void
2005  vtnet_rxq_input(struct vtnet_rxq *rxq, struct mbuf *m,
2006      struct virtio_net_hdr *hdr)
2007  {
2008  	struct vtnet_softc *sc;
2009  	if_t ifp;
2010  
2011  	sc = rxq->vtnrx_sc;
2012  	ifp = sc->vtnet_ifp;
2013  
2014  	if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) {
2015  		struct ether_header *eh = mtod(m, struct ether_header *);
2016  		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2017  			vtnet_vlan_tag_remove(m);
2018  			/*
2019  			 * With the 802.1Q header removed, update the
2020  			 * checksum starting location accordingly.
2021  			 */
2022  			if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
2023  				hdr->csum_start -= ETHER_VLAN_ENCAP_LEN;
2024  		}
2025  	}
2026  
2027  	m->m_pkthdr.flowid = rxq->vtnrx_id;
2028  	M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2029  
2030  	if (hdr->flags &
2031  	    (VIRTIO_NET_HDR_F_NEEDS_CSUM | VIRTIO_NET_HDR_F_DATA_VALID)) {
2032  		if (vtnet_rxq_csum(rxq, m, hdr) == 0)
2033  			rxq->vtnrx_stats.vrxs_csum++;
2034  		else
2035  			rxq->vtnrx_stats.vrxs_csum_failed++;
2036  	}
2037  
2038  	if (hdr->gso_size != 0) {
2039  		switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
2040  		case VIRTIO_NET_HDR_GSO_TCPV4:
2041  		case VIRTIO_NET_HDR_GSO_TCPV6:
2042  			m->m_pkthdr.lro_nsegs =
2043  			    howmany(m->m_pkthdr.len, hdr->gso_size);
2044  			rxq->vtnrx_stats.vrxs_host_lro++;
2045  			break;
2046  		}
2047  	}
2048  
2049  	rxq->vtnrx_stats.vrxs_ipackets++;
2050  	rxq->vtnrx_stats.vrxs_ibytes += m->m_pkthdr.len;
2051  
2052  #if defined(INET) || defined(INET6)
2053  	if (vtnet_software_lro(sc) && if_getcapenable(ifp) & IFCAP_LRO) {
2054  		if (vtnet_lro_rx(rxq, m) == 0)
2055  			return;
2056  	}
2057  #endif
2058  
2059  	if_input(ifp, m);
2060  }
2061  
2062  static int
2063  vtnet_rxq_eof(struct vtnet_rxq *rxq)
2064  {
2065  	struct virtio_net_hdr lhdr, *hdr;
2066  	struct vtnet_softc *sc;
2067  	if_t ifp;
2068  	struct virtqueue *vq;
2069  	int deq, count;
2070  
2071  	sc = rxq->vtnrx_sc;
2072  	vq = rxq->vtnrx_vq;
2073  	ifp = sc->vtnet_ifp;
2074  	deq = 0;
2075  	count = sc->vtnet_rx_process_limit;
2076  
2077  	VTNET_RXQ_LOCK_ASSERT(rxq);
2078  
2079  	CURVNET_SET(if_getvnet(ifp));
2080  	while (count-- > 0) {
2081  		struct mbuf *m;
2082  		uint32_t len, nbufs, adjsz;
2083  
2084  		m = virtqueue_dequeue(vq, &len);
2085  		if (m == NULL)
2086  			break;
2087  		deq++;
2088  
2089  		if (len < sc->vtnet_hdr_size + ETHER_HDR_LEN) {
2090  			rxq->vtnrx_stats.vrxs_ierrors++;
2091  			vtnet_rxq_discard_buf(rxq, m);
2092  			continue;
2093  		}
2094  
2095  		if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) {
2096  			struct virtio_net_hdr_mrg_rxbuf *mhdr =
2097  			    mtod(m, struct virtio_net_hdr_mrg_rxbuf *);
2098  			kmsan_mark(mhdr, sizeof(*mhdr), KMSAN_STATE_INITED);
2099  			nbufs = vtnet_htog16(sc, mhdr->num_buffers);
2100  			adjsz = sizeof(struct virtio_net_hdr_mrg_rxbuf);
2101  		} else if (vtnet_modern(sc)) {
2102  			nbufs = 1; /* num_buffers is always 1 */
2103  			adjsz = sizeof(struct virtio_net_hdr_v1);
2104  		} else {
2105  			nbufs = 1;
2106  			adjsz = sizeof(struct vtnet_rx_header);
2107  			/*
2108  			 * Account for our gap between the header and start of
2109  			 * data to keep the segments separated.
2110  			 */
2111  			len += VTNET_RX_HEADER_PAD;
2112  		}
2113  
2114  		if (vtnet_rxq_replace_buf(rxq, m, len) != 0) {
2115  			rxq->vtnrx_stats.vrxs_iqdrops++;
2116  			vtnet_rxq_discard_buf(rxq, m);
2117  			if (nbufs > 1)
2118  				vtnet_rxq_discard_merged_bufs(rxq, nbufs);
2119  			continue;
2120  		}
2121  
2122  		m->m_pkthdr.len = len;
2123  		m->m_pkthdr.rcvif = ifp;
2124  		m->m_pkthdr.csum_flags = 0;
2125  
2126  		if (nbufs > 1) {
2127  			/* Dequeue the rest of chain. */
2128  			if (vtnet_rxq_merged_eof(rxq, m, nbufs) != 0)
2129  				continue;
2130  		}
2131  
2132  		kmsan_mark_mbuf(m, KMSAN_STATE_INITED);
2133  
2134  		/*
2135  		 * Save an endian swapped version of the header prior to it
2136  		 * being stripped. The header is always at the start of the
2137  		 * mbuf data. num_buffers was already saved (and not needed)
2138  		 * so use the standard header.
2139  		 */
2140  		hdr = mtod(m, struct virtio_net_hdr *);
2141  		lhdr.flags = hdr->flags;
2142  		lhdr.gso_type = hdr->gso_type;
2143  		lhdr.hdr_len = vtnet_htog16(sc, hdr->hdr_len);
2144  		lhdr.gso_size = vtnet_htog16(sc, hdr->gso_size);
2145  		lhdr.csum_start = vtnet_htog16(sc, hdr->csum_start);
2146  		lhdr.csum_offset = vtnet_htog16(sc, hdr->csum_offset);
2147  		m_adj(m, adjsz);
2148  
2149  		if (PFIL_HOOKED_IN(sc->vtnet_pfil)) {
2150  			pfil_return_t pfil;
2151  
2152  			pfil = pfil_mbuf_in(sc->vtnet_pfil, &m, ifp, NULL);
2153  			switch (pfil) {
2154  			case PFIL_DROPPED:
2155  			case PFIL_CONSUMED:
2156  				continue;
2157  			default:
2158  				KASSERT(pfil == PFIL_PASS,
2159  				    ("Filter returned %d!", pfil));
2160  			}
2161  		}
2162  
2163  		vtnet_rxq_input(rxq, m, &lhdr);
2164  	}
2165  
2166  	if (deq > 0) {
2167  #if defined(INET) || defined(INET6)
2168  		if (vtnet_software_lro(sc))
2169  			tcp_lro_flush_all(&rxq->vtnrx_lro);
2170  #endif
2171  		virtqueue_notify(vq);
2172  	}
2173  	CURVNET_RESTORE();
2174  
2175  	return (count > 0 ? 0 : EAGAIN);
2176  }
2177  
2178  static void
2179  vtnet_rx_vq_process(struct vtnet_rxq *rxq, int tries)
2180  {
2181  	struct vtnet_softc *sc;
2182  	if_t ifp;
2183  	u_int more;
2184  #ifdef DEV_NETMAP
2185  	int nmirq;
2186  #endif /* DEV_NETMAP */
2187  
2188  	sc = rxq->vtnrx_sc;
2189  	ifp = sc->vtnet_ifp;
2190  
2191  	if (__predict_false(rxq->vtnrx_id >= sc->vtnet_act_vq_pairs)) {
2192  		/*
2193  		 * Ignore this interrupt. Either this is a spurious interrupt
2194  		 * or multiqueue without per-VQ MSIX so every queue needs to
2195  		 * be polled (a brain dead configuration we could try harder
2196  		 * to avoid).
2197  		 */
2198  		vtnet_rxq_disable_intr(rxq);
2199  		return;
2200  	}
2201  
2202  	VTNET_RXQ_LOCK(rxq);
2203  
2204  #ifdef DEV_NETMAP
2205  	/*
2206  	 * We call netmap_rx_irq() under lock to prevent concurrent calls.
2207  	 * This is not necessary to serialize the access to the RX vq, but
2208  	 * rather to avoid races that may happen if this interface is
2209  	 * attached to a VALE switch, which would cause received packets
2210  	 * to stall in the RX queue (nm_kr_tryget() could find the kring
2211  	 * busy when called from netmap_bwrap_intr_notify()).
2212  	 */
2213  	nmirq = netmap_rx_irq(ifp, rxq->vtnrx_id, &more);
2214  	if (nmirq != NM_IRQ_PASS) {
2215  		VTNET_RXQ_UNLOCK(rxq);
2216  		if (nmirq == NM_IRQ_RESCHED) {
2217  			taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
2218  		}
2219  		return;
2220  	}
2221  #endif /* DEV_NETMAP */
2222  
2223  again:
2224  	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
2225  		VTNET_RXQ_UNLOCK(rxq);
2226  		return;
2227  	}
2228  
2229  	more = vtnet_rxq_eof(rxq);
2230  	if (more || vtnet_rxq_enable_intr(rxq) != 0) {
2231  		if (!more)
2232  			vtnet_rxq_disable_intr(rxq);
2233  		/*
2234  		 * This is an occasional condition or race (when !more),
2235  		 * so retry a few times before scheduling the taskqueue.
2236  		 */
2237  		if (tries-- > 0)
2238  			goto again;
2239  
2240  		rxq->vtnrx_stats.vrxs_rescheduled++;
2241  		VTNET_RXQ_UNLOCK(rxq);
2242  		taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
2243  	} else
2244  		VTNET_RXQ_UNLOCK(rxq);
2245  }
2246  
2247  static void
2248  vtnet_rx_vq_intr(void *xrxq)
2249  {
2250  	struct vtnet_rxq *rxq;
2251  
2252  	rxq = xrxq;
2253  	vtnet_rx_vq_process(rxq, VTNET_INTR_DISABLE_RETRIES);
2254  }
2255  
2256  static void
2257  vtnet_rxq_tq_intr(void *xrxq, int pending __unused)
2258  {
2259  	struct vtnet_rxq *rxq;
2260  
2261  	rxq = xrxq;
2262  	vtnet_rx_vq_process(rxq, 0);
2263  }
2264  
2265  static int
2266  vtnet_txq_intr_threshold(struct vtnet_txq *txq)
2267  {
2268  	struct vtnet_softc *sc;
2269  	int threshold;
2270  
2271  	sc = txq->vtntx_sc;
2272  
2273  	/*
2274  	 * The Tx interrupt is disabled until the queue free count falls
2275  	 * below our threshold. Completed frames are drained from the Tx
2276  	 * virtqueue before transmitting new frames and in the watchdog
2277  	 * callout, so the frequency of Tx interrupts is greatly reduced,
2278  	 * at the cost of not freeing mbufs as quickly as they otherwise
2279  	 * would be.
2280  	 */
2281  	threshold = virtqueue_size(txq->vtntx_vq) / 4;
2282  
2283  	/*
2284  	 * Without indirect descriptors, leave enough room for the most
2285  	 * segments we handle.
2286  	 */
2287  	if ((sc->vtnet_flags & VTNET_FLAG_INDIRECT) == 0 &&
2288  	    threshold < sc->vtnet_tx_nsegs)
2289  		threshold = sc->vtnet_tx_nsegs;
2290  
2291  	return (threshold);
2292  }
2293  
2294  static int
2295  vtnet_txq_below_threshold(struct vtnet_txq *txq)
2296  {
2297  	struct virtqueue *vq;
2298  
2299  	vq = txq->vtntx_vq;
2300  
2301  	return (virtqueue_nfree(vq) <= txq->vtntx_intr_threshold);
2302  }
2303  
2304  static int
2305  vtnet_txq_notify(struct vtnet_txq *txq)
2306  {
2307  	struct virtqueue *vq;
2308  
2309  	vq = txq->vtntx_vq;
2310  
2311  	txq->vtntx_watchdog = VTNET_TX_TIMEOUT;
2312  	virtqueue_notify(vq);
2313  
2314  	if (vtnet_txq_enable_intr(txq) == 0)
2315  		return (0);
2316  
2317  	/*
2318  	 * Drain frames that were completed since last checked. If this
2319  	 * causes the queue to go above the threshold, the caller should
2320  	 * continue transmitting.
2321  	 */
2322  	if (vtnet_txq_eof(txq) != 0 && vtnet_txq_below_threshold(txq) == 0) {
2323  		virtqueue_disable_intr(vq);
2324  		return (1);
2325  	}
2326  
2327  	return (0);
2328  }
2329  
2330  static void
2331  vtnet_txq_free_mbufs(struct vtnet_txq *txq)
2332  {
2333  	struct virtqueue *vq;
2334  	struct vtnet_tx_header *txhdr;
2335  	int last;
2336  #ifdef DEV_NETMAP
2337  	struct netmap_kring *kring = netmap_kring_on(NA(txq->vtntx_sc->vtnet_ifp),
2338  							txq->vtntx_id, NR_TX);
2339  #else  /* !DEV_NETMAP */
2340  	void *kring = NULL;
2341  #endif /* !DEV_NETMAP */
2342  
2343  	vq = txq->vtntx_vq;
2344  	last = 0;
2345  
2346  	while ((txhdr = virtqueue_drain(vq, &last)) != NULL) {
2347  		if (kring == NULL) {
2348  			m_freem(txhdr->vth_mbuf);
2349  			uma_zfree(vtnet_tx_header_zone, txhdr);
2350  		}
2351  	}
2352  
2353  	KASSERT(virtqueue_empty(vq),
2354  	    ("%s: mbufs remaining in tx queue %p", __func__, txq));
2355  }
2356  
2357  /*
2358   * BMV: This can go away once we finally have offsets in the mbuf header.
2359   */
2360  static int
2361  vtnet_txq_offload_ctx(struct vtnet_txq *txq, struct mbuf *m, int *etype,
2362      int *proto, int *start)
2363  {
2364  	struct vtnet_softc *sc;
2365  	struct ether_vlan_header *evh;
2366  #if defined(INET) || defined(INET6)
2367  	int offset;
2368  #endif
2369  
2370  	sc = txq->vtntx_sc;
2371  
2372  	evh = mtod(m, struct ether_vlan_header *);
2373  	if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2374  		/* BMV: We should handle nested VLAN tags too. */
2375  		*etype = ntohs(evh->evl_proto);
2376  #if defined(INET) || defined(INET6)
2377  		offset = sizeof(struct ether_vlan_header);
2378  #endif
2379  	} else {
2380  		*etype = ntohs(evh->evl_encap_proto);
2381  #if defined(INET) || defined(INET6)
2382  		offset = sizeof(struct ether_header);
2383  #endif
2384  	}
2385  
2386  	switch (*etype) {
2387  #if defined(INET)
2388  	case ETHERTYPE_IP: {
2389  		struct ip *ip, iphdr;
2390  		if (__predict_false(m->m_len < offset + sizeof(struct ip))) {
2391  			m_copydata(m, offset, sizeof(struct ip),
2392  			    (caddr_t) &iphdr);
2393  			ip = &iphdr;
2394  		} else
2395  			ip = (struct ip *)(m->m_data + offset);
2396  		*proto = ip->ip_p;
2397  		*start = offset + (ip->ip_hl << 2);
2398  		break;
2399  	}
2400  #endif
2401  #if defined(INET6)
2402  	case ETHERTYPE_IPV6:
2403  		*proto = -1;
2404  		*start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto);
2405  		/* Assert the network stack sent us a valid packet. */
2406  		KASSERT(*start > offset,
2407  		    ("%s: mbuf %p start %d offset %d proto %d", __func__, m,
2408  		    *start, offset, *proto));
2409  		break;
2410  #endif
2411  	default:
2412  		sc->vtnet_stats.tx_csum_unknown_ethtype++;
2413  		return (EINVAL);
2414  	}
2415  
2416  	return (0);
2417  }
2418  
2419  static int
2420  vtnet_txq_offload_tso(struct vtnet_txq *txq, struct mbuf *m, int eth_type,
2421      int offset, struct virtio_net_hdr *hdr)
2422  {
2423  	static struct timeval lastecn;
2424  	static int curecn;
2425  	struct vtnet_softc *sc;
2426  	struct tcphdr *tcp, tcphdr;
2427  
2428  	sc = txq->vtntx_sc;
2429  
2430  	if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) {
2431  		m_copydata(m, offset, sizeof(struct tcphdr), (caddr_t) &tcphdr);
2432  		tcp = &tcphdr;
2433  	} else
2434  		tcp = (struct tcphdr *)(m->m_data + offset);
2435  
2436  	hdr->hdr_len = vtnet_gtoh16(sc, offset + (tcp->th_off << 2));
2437  	hdr->gso_size = vtnet_gtoh16(sc, m->m_pkthdr.tso_segsz);
2438  	hdr->gso_type = eth_type == ETHERTYPE_IP ? VIRTIO_NET_HDR_GSO_TCPV4 :
2439  	    VIRTIO_NET_HDR_GSO_TCPV6;
2440  
2441  	if (__predict_false(tcp_get_flags(tcp) & TH_CWR)) {
2442  		/*
2443  		 * Drop if VIRTIO_NET_F_HOST_ECN was not negotiated. In
2444  		 * FreeBSD, ECN support is not on a per-interface basis,
2445  		 * but globally via the net.inet.tcp.ecn.enable sysctl
2446  		 * knob. The default is off.
2447  		 */
2448  		if ((sc->vtnet_flags & VTNET_FLAG_TSO_ECN) == 0) {
2449  			if (ppsratecheck(&lastecn, &curecn, 1))
2450  				if_printf(sc->vtnet_ifp,
2451  				    "TSO with ECN not negotiated with host\n");
2452  			return (ENOTSUP);
2453  		}
2454  		hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
2455  	}
2456  
2457  	txq->vtntx_stats.vtxs_tso++;
2458  
2459  	return (0);
2460  }
2461  
2462  static struct mbuf *
2463  vtnet_txq_offload(struct vtnet_txq *txq, struct mbuf *m,
2464      struct virtio_net_hdr *hdr)
2465  {
2466  	struct vtnet_softc *sc;
2467  	int flags, etype, csum_start, proto, error;
2468  
2469  	sc = txq->vtntx_sc;
2470  	flags = m->m_pkthdr.csum_flags;
2471  
2472  	error = vtnet_txq_offload_ctx(txq, m, &etype, &proto, &csum_start);
2473  	if (error)
2474  		goto drop;
2475  
2476  	if (flags & (VTNET_CSUM_OFFLOAD | VTNET_CSUM_OFFLOAD_IPV6)) {
2477  		/* Sanity check the parsed mbuf matches the offload flags. */
2478  		if (__predict_false((flags & VTNET_CSUM_OFFLOAD &&
2479  		    etype != ETHERTYPE_IP) || (flags & VTNET_CSUM_OFFLOAD_IPV6
2480  		    && etype != ETHERTYPE_IPV6))) {
2481  			sc->vtnet_stats.tx_csum_proto_mismatch++;
2482  			goto drop;
2483  		}
2484  
2485  		hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM;
2486  		hdr->csum_start = vtnet_gtoh16(sc, csum_start);
2487  		hdr->csum_offset = vtnet_gtoh16(sc, m->m_pkthdr.csum_data);
2488  		txq->vtntx_stats.vtxs_csum++;
2489  	}
2490  
2491  	if (flags & (CSUM_IP_TSO | CSUM_IP6_TSO)) {
2492  		/*
2493  		 * Sanity check the parsed mbuf IP protocol is TCP, and
2494  		 * VirtIO TSO reqires the checksum offloading above.
2495  		 */
2496  		if (__predict_false(proto != IPPROTO_TCP)) {
2497  			sc->vtnet_stats.tx_tso_not_tcp++;
2498  			goto drop;
2499  		} else if (__predict_false((hdr->flags &
2500  		    VIRTIO_NET_HDR_F_NEEDS_CSUM) == 0)) {
2501  			sc->vtnet_stats.tx_tso_without_csum++;
2502  			goto drop;
2503  		}
2504  
2505  		error = vtnet_txq_offload_tso(txq, m, etype, csum_start, hdr);
2506  		if (error)
2507  			goto drop;
2508  	}
2509  
2510  	return (m);
2511  
2512  drop:
2513  	m_freem(m);
2514  	return (NULL);
2515  }
2516  
2517  static int
2518  vtnet_txq_enqueue_buf(struct vtnet_txq *txq, struct mbuf **m_head,
2519      struct vtnet_tx_header *txhdr)
2520  {
2521  	struct vtnet_softc *sc;
2522  	struct virtqueue *vq;
2523  	struct sglist *sg;
2524  	struct mbuf *m;
2525  	int error;
2526  
2527  	sc = txq->vtntx_sc;
2528  	vq = txq->vtntx_vq;
2529  	sg = txq->vtntx_sg;
2530  	m = *m_head;
2531  
2532  	sglist_reset(sg);
2533  	error = sglist_append(sg, &txhdr->vth_uhdr, sc->vtnet_hdr_size);
2534  	if (error != 0 || sg->sg_nseg != 1) {
2535  		KASSERT(0, ("%s: cannot add header to sglist error %d nseg %d",
2536  		    __func__, error, sg->sg_nseg));
2537  		goto fail;
2538  	}
2539  
2540  	error = sglist_append_mbuf(sg, m);
2541  	if (error) {
2542  		m = m_defrag(m, M_NOWAIT);
2543  		if (m == NULL)
2544  			goto fail;
2545  
2546  		*m_head = m;
2547  		sc->vtnet_stats.tx_defragged++;
2548  
2549  		error = sglist_append_mbuf(sg, m);
2550  		if (error)
2551  			goto fail;
2552  	}
2553  
2554  	txhdr->vth_mbuf = m;
2555  	error = virtqueue_enqueue(vq, txhdr, sg, sg->sg_nseg, 0);
2556  
2557  	return (error);
2558  
2559  fail:
2560  	sc->vtnet_stats.tx_defrag_failed++;
2561  	m_freem(*m_head);
2562  	*m_head = NULL;
2563  
2564  	return (ENOBUFS);
2565  }
2566  
2567  static int
2568  vtnet_txq_encap(struct vtnet_txq *txq, struct mbuf **m_head, int flags)
2569  {
2570  	struct vtnet_tx_header *txhdr;
2571  	struct virtio_net_hdr *hdr;
2572  	struct mbuf *m;
2573  	int error;
2574  
2575  	m = *m_head;
2576  	M_ASSERTPKTHDR(m);
2577  
2578  	txhdr = uma_zalloc(vtnet_tx_header_zone, flags | M_ZERO);
2579  	if (txhdr == NULL) {
2580  		m_freem(m);
2581  		*m_head = NULL;
2582  		return (ENOMEM);
2583  	}
2584  
2585  	/*
2586  	 * Always use the non-mergeable header, regardless if mergable headers
2587  	 * were negotiated, because for transmit num_buffers is always zero.
2588  	 * The vtnet_hdr_size is used to enqueue the right header size segment.
2589  	 */
2590  	hdr = &txhdr->vth_uhdr.hdr;
2591  
2592  	if (m->m_flags & M_VLANTAG) {
2593  		m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
2594  		if ((*m_head = m) == NULL) {
2595  			error = ENOBUFS;
2596  			goto fail;
2597  		}
2598  		m->m_flags &= ~M_VLANTAG;
2599  	}
2600  
2601  	if (m->m_pkthdr.csum_flags & VTNET_CSUM_ALL_OFFLOAD) {
2602  		m = vtnet_txq_offload(txq, m, hdr);
2603  		if ((*m_head = m) == NULL) {
2604  			error = ENOBUFS;
2605  			goto fail;
2606  		}
2607  	}
2608  
2609  	error = vtnet_txq_enqueue_buf(txq, m_head, txhdr);
2610  fail:
2611  	if (error)
2612  		uma_zfree(vtnet_tx_header_zone, txhdr);
2613  
2614  	return (error);
2615  }
2616  
2617  #ifdef VTNET_LEGACY_TX
2618  
2619  static void
2620  vtnet_start_locked(struct vtnet_txq *txq, if_t ifp)
2621  {
2622  	struct vtnet_softc *sc;
2623  	struct virtqueue *vq;
2624  	struct mbuf *m0;
2625  	int tries, enq;
2626  
2627  	sc = txq->vtntx_sc;
2628  	vq = txq->vtntx_vq;
2629  	tries = 0;
2630  
2631  	VTNET_TXQ_LOCK_ASSERT(txq);
2632  
2633  	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0 ||
2634  	    sc->vtnet_link_active == 0)
2635  		return;
2636  
2637  	vtnet_txq_eof(txq);
2638  
2639  again:
2640  	enq = 0;
2641  
2642  	while (!if_sendq_empty(ifp)) {
2643  		if (virtqueue_full(vq))
2644  			break;
2645  
2646  		m0 = if_dequeue(ifp);
2647  		if (m0 == NULL)
2648  			break;
2649  
2650  		if (vtnet_txq_encap(txq, &m0, M_NOWAIT) != 0) {
2651  			if (m0 != NULL)
2652  				if_sendq_prepend(ifp, m0);
2653  			break;
2654  		}
2655  
2656  		enq++;
2657  		ETHER_BPF_MTAP(ifp, m0);
2658  	}
2659  
2660  	if (enq > 0 && vtnet_txq_notify(txq) != 0) {
2661  		if (tries++ < VTNET_NOTIFY_RETRIES)
2662  			goto again;
2663  
2664  		txq->vtntx_stats.vtxs_rescheduled++;
2665  		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
2666  	}
2667  }
2668  
2669  static void
2670  vtnet_start(if_t ifp)
2671  {
2672  	struct vtnet_softc *sc;
2673  	struct vtnet_txq *txq;
2674  
2675  	sc = if_getsoftc(ifp);
2676  	txq = &sc->vtnet_txqs[0];
2677  
2678  	VTNET_TXQ_LOCK(txq);
2679  	vtnet_start_locked(txq, ifp);
2680  	VTNET_TXQ_UNLOCK(txq);
2681  }
2682  
2683  #else /* !VTNET_LEGACY_TX */
2684  
2685  static int
2686  vtnet_txq_mq_start_locked(struct vtnet_txq *txq, struct mbuf *m)
2687  {
2688  	struct vtnet_softc *sc;
2689  	struct virtqueue *vq;
2690  	struct buf_ring *br;
2691  	if_t ifp;
2692  	int enq, tries, error;
2693  
2694  	sc = txq->vtntx_sc;
2695  	vq = txq->vtntx_vq;
2696  	br = txq->vtntx_br;
2697  	ifp = sc->vtnet_ifp;
2698  	tries = 0;
2699  	error = 0;
2700  
2701  	VTNET_TXQ_LOCK_ASSERT(txq);
2702  
2703  	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0 ||
2704  	    sc->vtnet_link_active == 0) {
2705  		if (m != NULL)
2706  			error = drbr_enqueue(ifp, br, m);
2707  		return (error);
2708  	}
2709  
2710  	if (m != NULL) {
2711  		error = drbr_enqueue(ifp, br, m);
2712  		if (error)
2713  			return (error);
2714  	}
2715  
2716  	vtnet_txq_eof(txq);
2717  
2718  again:
2719  	enq = 0;
2720  
2721  	while ((m = drbr_peek(ifp, br)) != NULL) {
2722  		if (virtqueue_full(vq)) {
2723  			drbr_putback(ifp, br, m);
2724  			break;
2725  		}
2726  
2727  		if (vtnet_txq_encap(txq, &m, M_NOWAIT) != 0) {
2728  			if (m != NULL)
2729  				drbr_putback(ifp, br, m);
2730  			else
2731  				drbr_advance(ifp, br);
2732  			break;
2733  		}
2734  		drbr_advance(ifp, br);
2735  
2736  		enq++;
2737  		ETHER_BPF_MTAP(ifp, m);
2738  	}
2739  
2740  	if (enq > 0 && vtnet_txq_notify(txq) != 0) {
2741  		if (tries++ < VTNET_NOTIFY_RETRIES)
2742  			goto again;
2743  
2744  		txq->vtntx_stats.vtxs_rescheduled++;
2745  		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
2746  	}
2747  
2748  	return (0);
2749  }
2750  
2751  static int
2752  vtnet_txq_mq_start(if_t ifp, struct mbuf *m)
2753  {
2754  	struct vtnet_softc *sc;
2755  	struct vtnet_txq *txq;
2756  	int i, npairs, error;
2757  
2758  	sc = if_getsoftc(ifp);
2759  	npairs = sc->vtnet_act_vq_pairs;
2760  
2761  	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
2762  		i = m->m_pkthdr.flowid % npairs;
2763  	else
2764  		i = curcpu % npairs;
2765  
2766  	txq = &sc->vtnet_txqs[i];
2767  
2768  	if (VTNET_TXQ_TRYLOCK(txq) != 0) {
2769  		error = vtnet_txq_mq_start_locked(txq, m);
2770  		VTNET_TXQ_UNLOCK(txq);
2771  	} else {
2772  		error = drbr_enqueue(ifp, txq->vtntx_br, m);
2773  		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_defrtask);
2774  	}
2775  
2776  	return (error);
2777  }
2778  
2779  static void
2780  vtnet_txq_tq_deferred(void *xtxq, int pending __unused)
2781  {
2782  	struct vtnet_softc *sc;
2783  	struct vtnet_txq *txq;
2784  
2785  	txq = xtxq;
2786  	sc = txq->vtntx_sc;
2787  
2788  	VTNET_TXQ_LOCK(txq);
2789  	if (!drbr_empty(sc->vtnet_ifp, txq->vtntx_br))
2790  		vtnet_txq_mq_start_locked(txq, NULL);
2791  	VTNET_TXQ_UNLOCK(txq);
2792  }
2793  
2794  #endif /* VTNET_LEGACY_TX */
2795  
2796  static void
2797  vtnet_txq_start(struct vtnet_txq *txq)
2798  {
2799  	struct vtnet_softc *sc;
2800  	if_t ifp;
2801  
2802  	sc = txq->vtntx_sc;
2803  	ifp = sc->vtnet_ifp;
2804  
2805  #ifdef VTNET_LEGACY_TX
2806  	if (!if_sendq_empty(ifp))
2807  		vtnet_start_locked(txq, ifp);
2808  #else
2809  	if (!drbr_empty(ifp, txq->vtntx_br))
2810  		vtnet_txq_mq_start_locked(txq, NULL);
2811  #endif
2812  }
2813  
2814  static void
2815  vtnet_txq_tq_intr(void *xtxq, int pending __unused)
2816  {
2817  	struct vtnet_softc *sc;
2818  	struct vtnet_txq *txq;
2819  	if_t ifp;
2820  
2821  	txq = xtxq;
2822  	sc = txq->vtntx_sc;
2823  	ifp = sc->vtnet_ifp;
2824  
2825  	VTNET_TXQ_LOCK(txq);
2826  
2827  	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
2828  		VTNET_TXQ_UNLOCK(txq);
2829  		return;
2830  	}
2831  
2832  	vtnet_txq_eof(txq);
2833  	vtnet_txq_start(txq);
2834  
2835  	VTNET_TXQ_UNLOCK(txq);
2836  }
2837  
2838  static int
2839  vtnet_txq_eof(struct vtnet_txq *txq)
2840  {
2841  	struct virtqueue *vq;
2842  	struct vtnet_tx_header *txhdr;
2843  	struct mbuf *m;
2844  	int deq;
2845  
2846  	vq = txq->vtntx_vq;
2847  	deq = 0;
2848  	VTNET_TXQ_LOCK_ASSERT(txq);
2849  
2850  	while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) {
2851  		m = txhdr->vth_mbuf;
2852  		deq++;
2853  
2854  		txq->vtntx_stats.vtxs_opackets++;
2855  		txq->vtntx_stats.vtxs_obytes += m->m_pkthdr.len;
2856  		if (m->m_flags & M_MCAST)
2857  			txq->vtntx_stats.vtxs_omcasts++;
2858  
2859  		m_freem(m);
2860  		uma_zfree(vtnet_tx_header_zone, txhdr);
2861  	}
2862  
2863  	if (virtqueue_empty(vq))
2864  		txq->vtntx_watchdog = 0;
2865  
2866  	return (deq);
2867  }
2868  
2869  static void
2870  vtnet_tx_vq_intr(void *xtxq)
2871  {
2872  	struct vtnet_softc *sc;
2873  	struct vtnet_txq *txq;
2874  	if_t ifp;
2875  
2876  	txq = xtxq;
2877  	sc = txq->vtntx_sc;
2878  	ifp = sc->vtnet_ifp;
2879  
2880  	if (__predict_false(txq->vtntx_id >= sc->vtnet_act_vq_pairs)) {
2881  		/*
2882  		 * Ignore this interrupt. Either this is a spurious interrupt
2883  		 * or multiqueue without per-VQ MSIX so every queue needs to
2884  		 * be polled (a brain dead configuration we could try harder
2885  		 * to avoid).
2886  		 */
2887  		vtnet_txq_disable_intr(txq);
2888  		return;
2889  	}
2890  
2891  #ifdef DEV_NETMAP
2892  	if (netmap_tx_irq(ifp, txq->vtntx_id) != NM_IRQ_PASS)
2893  		return;
2894  #endif /* DEV_NETMAP */
2895  
2896  	VTNET_TXQ_LOCK(txq);
2897  
2898  	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
2899  		VTNET_TXQ_UNLOCK(txq);
2900  		return;
2901  	}
2902  
2903  	vtnet_txq_eof(txq);
2904  	vtnet_txq_start(txq);
2905  
2906  	VTNET_TXQ_UNLOCK(txq);
2907  }
2908  
2909  static void
2910  vtnet_tx_start_all(struct vtnet_softc *sc)
2911  {
2912  	struct vtnet_txq *txq;
2913  	int i;
2914  
2915  	VTNET_CORE_LOCK_ASSERT(sc);
2916  
2917  	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2918  		txq = &sc->vtnet_txqs[i];
2919  
2920  		VTNET_TXQ_LOCK(txq);
2921  		vtnet_txq_start(txq);
2922  		VTNET_TXQ_UNLOCK(txq);
2923  	}
2924  }
2925  
2926  #ifndef VTNET_LEGACY_TX
2927  static void
2928  vtnet_qflush(if_t ifp)
2929  {
2930  	struct vtnet_softc *sc;
2931  	struct vtnet_txq *txq;
2932  	struct mbuf *m;
2933  	int i;
2934  
2935  	sc = if_getsoftc(ifp);
2936  
2937  	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2938  		txq = &sc->vtnet_txqs[i];
2939  
2940  		VTNET_TXQ_LOCK(txq);
2941  		while ((m = buf_ring_dequeue_sc(txq->vtntx_br)) != NULL)
2942  			m_freem(m);
2943  		VTNET_TXQ_UNLOCK(txq);
2944  	}
2945  
2946  	if_qflush(ifp);
2947  }
2948  #endif
2949  
2950  static int
2951  vtnet_watchdog(struct vtnet_txq *txq)
2952  {
2953  	if_t ifp;
2954  
2955  	ifp = txq->vtntx_sc->vtnet_ifp;
2956  
2957  	VTNET_TXQ_LOCK(txq);
2958  	if (txq->vtntx_watchdog == 1) {
2959  		/*
2960  		 * Only drain completed frames if the watchdog is about to
2961  		 * expire. If any frames were drained, there may be enough
2962  		 * free descriptors now available to transmit queued frames.
2963  		 * In that case, the timer will immediately be decremented
2964  		 * below, but the timeout is generous enough that should not
2965  		 * be a problem.
2966  		 */
2967  		if (vtnet_txq_eof(txq) != 0)
2968  			vtnet_txq_start(txq);
2969  	}
2970  
2971  	if (txq->vtntx_watchdog == 0 || --txq->vtntx_watchdog) {
2972  		VTNET_TXQ_UNLOCK(txq);
2973  		return (0);
2974  	}
2975  	VTNET_TXQ_UNLOCK(txq);
2976  
2977  	if_printf(ifp, "watchdog timeout on queue %d\n", txq->vtntx_id);
2978  	return (1);
2979  }
2980  
2981  static void
2982  vtnet_accum_stats(struct vtnet_softc *sc, struct vtnet_rxq_stats *rxacc,
2983      struct vtnet_txq_stats *txacc)
2984  {
2985  
2986  	bzero(rxacc, sizeof(struct vtnet_rxq_stats));
2987  	bzero(txacc, sizeof(struct vtnet_txq_stats));
2988  
2989  	for (int i = 0; i < sc->vtnet_max_vq_pairs; i++) {
2990  		struct vtnet_rxq_stats *rxst;
2991  		struct vtnet_txq_stats *txst;
2992  
2993  		rxst = &sc->vtnet_rxqs[i].vtnrx_stats;
2994  		rxacc->vrxs_ipackets += rxst->vrxs_ipackets;
2995  		rxacc->vrxs_ibytes += rxst->vrxs_ibytes;
2996  		rxacc->vrxs_iqdrops += rxst->vrxs_iqdrops;
2997  		rxacc->vrxs_csum += rxst->vrxs_csum;
2998  		rxacc->vrxs_csum_failed += rxst->vrxs_csum_failed;
2999  		rxacc->vrxs_rescheduled += rxst->vrxs_rescheduled;
3000  
3001  		txst = &sc->vtnet_txqs[i].vtntx_stats;
3002  		txacc->vtxs_opackets += txst->vtxs_opackets;
3003  		txacc->vtxs_obytes += txst->vtxs_obytes;
3004  		txacc->vtxs_csum += txst->vtxs_csum;
3005  		txacc->vtxs_tso += txst->vtxs_tso;
3006  		txacc->vtxs_rescheduled += txst->vtxs_rescheduled;
3007  	}
3008  }
3009  
3010  static uint64_t
3011  vtnet_get_counter(if_t ifp, ift_counter cnt)
3012  {
3013  	struct vtnet_softc *sc;
3014  	struct vtnet_rxq_stats rxaccum;
3015  	struct vtnet_txq_stats txaccum;
3016  
3017  	sc = if_getsoftc(ifp);
3018  	vtnet_accum_stats(sc, &rxaccum, &txaccum);
3019  
3020  	switch (cnt) {
3021  	case IFCOUNTER_IPACKETS:
3022  		return (rxaccum.vrxs_ipackets);
3023  	case IFCOUNTER_IQDROPS:
3024  		return (rxaccum.vrxs_iqdrops);
3025  	case IFCOUNTER_IERRORS:
3026  		return (rxaccum.vrxs_ierrors);
3027  	case IFCOUNTER_OPACKETS:
3028  		return (txaccum.vtxs_opackets);
3029  #ifndef VTNET_LEGACY_TX
3030  	case IFCOUNTER_OBYTES:
3031  		return (txaccum.vtxs_obytes);
3032  	case IFCOUNTER_OMCASTS:
3033  		return (txaccum.vtxs_omcasts);
3034  #endif
3035  	default:
3036  		return (if_get_counter_default(ifp, cnt));
3037  	}
3038  }
3039  
3040  static void
3041  vtnet_tick(void *xsc)
3042  {
3043  	struct vtnet_softc *sc;
3044  	if_t ifp;
3045  	int i, timedout;
3046  
3047  	sc = xsc;
3048  	ifp = sc->vtnet_ifp;
3049  	timedout = 0;
3050  
3051  	VTNET_CORE_LOCK_ASSERT(sc);
3052  
3053  	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
3054  		timedout |= vtnet_watchdog(&sc->vtnet_txqs[i]);
3055  
3056  	if (timedout != 0) {
3057  		if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
3058  		vtnet_init_locked(sc, 0);
3059  	} else
3060  		callout_schedule(&sc->vtnet_tick_ch, hz);
3061  }
3062  
3063  static void
3064  vtnet_start_taskqueues(struct vtnet_softc *sc)
3065  {
3066  	device_t dev;
3067  	struct vtnet_rxq *rxq;
3068  	struct vtnet_txq *txq;
3069  	int i, error;
3070  
3071  	dev = sc->vtnet_dev;
3072  
3073  	/*
3074  	 * Errors here are very difficult to recover from - we cannot
3075  	 * easily fail because, if this is during boot, we will hang
3076  	 * when freeing any successfully started taskqueues because
3077  	 * the scheduler isn't up yet.
3078  	 *
3079  	 * Most drivers just ignore the return value - it only fails
3080  	 * with ENOMEM so an error is not likely.
3081  	 */
3082  	for (i = 0; i < sc->vtnet_req_vq_pairs; i++) {
3083  		rxq = &sc->vtnet_rxqs[i];
3084  		error = taskqueue_start_threads(&rxq->vtnrx_tq, 1, PI_NET,
3085  		    "%s rxq %d", device_get_nameunit(dev), rxq->vtnrx_id);
3086  		if (error) {
3087  			device_printf(dev, "failed to start rx taskq %d\n",
3088  			    rxq->vtnrx_id);
3089  		}
3090  
3091  		txq = &sc->vtnet_txqs[i];
3092  		error = taskqueue_start_threads(&txq->vtntx_tq, 1, PI_NET,
3093  		    "%s txq %d", device_get_nameunit(dev), txq->vtntx_id);
3094  		if (error) {
3095  			device_printf(dev, "failed to start tx taskq %d\n",
3096  			    txq->vtntx_id);
3097  		}
3098  	}
3099  }
3100  
3101  static void
3102  vtnet_free_taskqueues(struct vtnet_softc *sc)
3103  {
3104  	struct vtnet_rxq *rxq;
3105  	struct vtnet_txq *txq;
3106  	int i;
3107  
3108  	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
3109  		rxq = &sc->vtnet_rxqs[i];
3110  		if (rxq->vtnrx_tq != NULL) {
3111  			taskqueue_free(rxq->vtnrx_tq);
3112  			rxq->vtnrx_tq = NULL;
3113  		}
3114  
3115  		txq = &sc->vtnet_txqs[i];
3116  		if (txq->vtntx_tq != NULL) {
3117  			taskqueue_free(txq->vtntx_tq);
3118  			txq->vtntx_tq = NULL;
3119  		}
3120  	}
3121  }
3122  
3123  static void
3124  vtnet_drain_taskqueues(struct vtnet_softc *sc)
3125  {
3126  	struct vtnet_rxq *rxq;
3127  	struct vtnet_txq *txq;
3128  	int i;
3129  
3130  	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
3131  		rxq = &sc->vtnet_rxqs[i];
3132  		if (rxq->vtnrx_tq != NULL)
3133  			taskqueue_drain(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
3134  
3135  		txq = &sc->vtnet_txqs[i];
3136  		if (txq->vtntx_tq != NULL) {
3137  			taskqueue_drain(txq->vtntx_tq, &txq->vtntx_intrtask);
3138  #ifndef VTNET_LEGACY_TX
3139  			taskqueue_drain(txq->vtntx_tq, &txq->vtntx_defrtask);
3140  #endif
3141  		}
3142  	}
3143  }
3144  
3145  static void
3146  vtnet_drain_rxtx_queues(struct vtnet_softc *sc)
3147  {
3148  	struct vtnet_rxq *rxq;
3149  	struct vtnet_txq *txq;
3150  	int i;
3151  
3152  	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
3153  		rxq = &sc->vtnet_rxqs[i];
3154  		vtnet_rxq_free_mbufs(rxq);
3155  
3156  		txq = &sc->vtnet_txqs[i];
3157  		vtnet_txq_free_mbufs(txq);
3158  	}
3159  }
3160  
3161  static void
3162  vtnet_stop_rendezvous(struct vtnet_softc *sc)
3163  {
3164  	struct vtnet_rxq *rxq;
3165  	struct vtnet_txq *txq;
3166  	int i;
3167  
3168  	VTNET_CORE_LOCK_ASSERT(sc);
3169  
3170  	/*
3171  	 * Lock and unlock the per-queue mutex so we known the stop
3172  	 * state is visible. Doing only the active queues should be
3173  	 * sufficient, but it does not cost much extra to do all the
3174  	 * queues.
3175  	 */
3176  	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
3177  		rxq = &sc->vtnet_rxqs[i];
3178  		VTNET_RXQ_LOCK(rxq);
3179  		VTNET_RXQ_UNLOCK(rxq);
3180  
3181  		txq = &sc->vtnet_txqs[i];
3182  		VTNET_TXQ_LOCK(txq);
3183  		VTNET_TXQ_UNLOCK(txq);
3184  	}
3185  }
3186  
3187  static void
3188  vtnet_stop(struct vtnet_softc *sc)
3189  {
3190  	device_t dev;
3191  	if_t ifp;
3192  
3193  	dev = sc->vtnet_dev;
3194  	ifp = sc->vtnet_ifp;
3195  
3196  	VTNET_CORE_LOCK_ASSERT(sc);
3197  
3198  	if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
3199  	sc->vtnet_link_active = 0;
3200  	callout_stop(&sc->vtnet_tick_ch);
3201  
3202  	/* Only advisory. */
3203  	vtnet_disable_interrupts(sc);
3204  
3205  #ifdef DEV_NETMAP
3206  	/* Stop any pending txsync/rxsync and disable them. */
3207  	netmap_disable_all_rings(ifp);
3208  #endif /* DEV_NETMAP */
3209  
3210  	/*
3211  	 * Stop the host adapter. This resets it to the pre-initialized
3212  	 * state. It will not generate any interrupts until after it is
3213  	 * reinitialized.
3214  	 */
3215  	virtio_stop(dev);
3216  	vtnet_stop_rendezvous(sc);
3217  
3218  	vtnet_drain_rxtx_queues(sc);
3219  	sc->vtnet_act_vq_pairs = 1;
3220  }
3221  
3222  static int
3223  vtnet_virtio_reinit(struct vtnet_softc *sc)
3224  {
3225  	device_t dev;
3226  	if_t ifp;
3227  	uint64_t features;
3228  	int error;
3229  
3230  	dev = sc->vtnet_dev;
3231  	ifp = sc->vtnet_ifp;
3232  	features = sc->vtnet_negotiated_features;
3233  
3234  	/*
3235  	 * Re-negotiate with the host, removing any disabled receive
3236  	 * features. Transmit features are disabled only on our side
3237  	 * via if_capenable and if_hwassist.
3238  	 */
3239  
3240  	if ((if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) == 0)
3241  		features &= ~(VIRTIO_NET_F_GUEST_CSUM | VTNET_LRO_FEATURES);
3242  
3243  	if ((if_getcapenable(ifp) & IFCAP_LRO) == 0)
3244  		features &= ~VTNET_LRO_FEATURES;
3245  
3246  	if ((if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) == 0)
3247  		features &= ~VIRTIO_NET_F_CTRL_VLAN;
3248  
3249  	error = virtio_reinit(dev, features);
3250  	if (error) {
3251  		device_printf(dev, "virtio reinit error %d\n", error);
3252  		return (error);
3253  	}
3254  
3255  	sc->vtnet_features = features;
3256  	virtio_reinit_complete(dev);
3257  
3258  	return (0);
3259  }
3260  
3261  static void
3262  vtnet_init_rx_filters(struct vtnet_softc *sc)
3263  {
3264  	if_t ifp;
3265  
3266  	ifp = sc->vtnet_ifp;
3267  
3268  	if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
3269  		vtnet_rx_filter(sc);
3270  		vtnet_rx_filter_mac(sc);
3271  	}
3272  
3273  	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
3274  		vtnet_rx_filter_vlan(sc);
3275  }
3276  
3277  static int
3278  vtnet_init_rx_queues(struct vtnet_softc *sc)
3279  {
3280  	device_t dev;
3281  	if_t ifp;
3282  	struct vtnet_rxq *rxq;
3283  	int i, clustersz, error;
3284  
3285  	dev = sc->vtnet_dev;
3286  	ifp = sc->vtnet_ifp;
3287  
3288  	clustersz = vtnet_rx_cluster_size(sc, if_getmtu(ifp));
3289  	sc->vtnet_rx_clustersz = clustersz;
3290  
3291  	if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG) {
3292  		sc->vtnet_rx_nmbufs = howmany(sizeof(struct vtnet_rx_header) +
3293  		    VTNET_MAX_RX_SIZE, clustersz);
3294  		KASSERT(sc->vtnet_rx_nmbufs < sc->vtnet_rx_nsegs,
3295  		    ("%s: too many rx mbufs %d for %d segments", __func__,
3296  		    sc->vtnet_rx_nmbufs, sc->vtnet_rx_nsegs));
3297  	} else
3298  		sc->vtnet_rx_nmbufs = 1;
3299  
3300  	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
3301  		rxq = &sc->vtnet_rxqs[i];
3302  
3303  		/* Hold the lock to satisfy asserts. */
3304  		VTNET_RXQ_LOCK(rxq);
3305  		error = vtnet_rxq_populate(rxq);
3306  		VTNET_RXQ_UNLOCK(rxq);
3307  
3308  		if (error) {
3309  			device_printf(dev, "cannot populate Rx queue %d\n", i);
3310  			return (error);
3311  		}
3312  	}
3313  
3314  	return (0);
3315  }
3316  
3317  static int
3318  vtnet_init_tx_queues(struct vtnet_softc *sc)
3319  {
3320  	struct vtnet_txq *txq;
3321  	int i;
3322  
3323  	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
3324  		txq = &sc->vtnet_txqs[i];
3325  		txq->vtntx_watchdog = 0;
3326  		txq->vtntx_intr_threshold = vtnet_txq_intr_threshold(txq);
3327  #ifdef DEV_NETMAP
3328  		netmap_reset(NA(sc->vtnet_ifp), NR_TX, i, 0);
3329  #endif /* DEV_NETMAP */
3330  	}
3331  
3332  	return (0);
3333  }
3334  
3335  static int
3336  vtnet_init_rxtx_queues(struct vtnet_softc *sc)
3337  {
3338  	int error;
3339  
3340  	error = vtnet_init_rx_queues(sc);
3341  	if (error)
3342  		return (error);
3343  
3344  	error = vtnet_init_tx_queues(sc);
3345  	if (error)
3346  		return (error);
3347  
3348  	return (0);
3349  }
3350  
3351  static void
3352  vtnet_set_active_vq_pairs(struct vtnet_softc *sc)
3353  {
3354  	device_t dev;
3355  	int npairs;
3356  
3357  	dev = sc->vtnet_dev;
3358  
3359  	if ((sc->vtnet_flags & VTNET_FLAG_MQ) == 0) {
3360  		sc->vtnet_act_vq_pairs = 1;
3361  		return;
3362  	}
3363  
3364  	npairs = sc->vtnet_req_vq_pairs;
3365  
3366  	if (vtnet_ctrl_mq_cmd(sc, npairs) != 0) {
3367  		device_printf(dev, "cannot set active queue pairs to %d, "
3368  		    "falling back to 1 queue pair\n", npairs);
3369  		npairs = 1;
3370  	}
3371  
3372  	sc->vtnet_act_vq_pairs = npairs;
3373  }
3374  
3375  static void
3376  vtnet_update_rx_offloads(struct vtnet_softc *sc)
3377  {
3378  	if_t ifp;
3379  	uint64_t features;
3380  	int error;
3381  
3382  	ifp = sc->vtnet_ifp;
3383  	features = sc->vtnet_features;
3384  
3385  	VTNET_CORE_LOCK_ASSERT(sc);
3386  
3387  	if (if_getcapabilities(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) {
3388  		if (if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6))
3389  			features |= VIRTIO_NET_F_GUEST_CSUM;
3390  		else
3391  			features &= ~VIRTIO_NET_F_GUEST_CSUM;
3392  	}
3393  
3394  	if (if_getcapabilities(ifp) & IFCAP_LRO && !vtnet_software_lro(sc)) {
3395  		if (if_getcapenable(ifp) & IFCAP_LRO)
3396  			features |= VTNET_LRO_FEATURES;
3397  		else
3398  			features &= ~VTNET_LRO_FEATURES;
3399  	}
3400  
3401  	error = vtnet_ctrl_guest_offloads(sc,
3402  	    features & (VIRTIO_NET_F_GUEST_CSUM | VIRTIO_NET_F_GUEST_TSO4 |
3403  		        VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_ECN  |
3404  			VIRTIO_NET_F_GUEST_UFO));
3405  	if (error) {
3406  		device_printf(sc->vtnet_dev,
3407  		    "%s: cannot update Rx features\n", __func__);
3408  		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
3409  			if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
3410  			vtnet_init_locked(sc, 0);
3411  		}
3412  	} else
3413  		sc->vtnet_features = features;
3414  }
3415  
3416  static int
3417  vtnet_reinit(struct vtnet_softc *sc)
3418  {
3419  	if_t ifp;
3420  	int error;
3421  
3422  	ifp = sc->vtnet_ifp;
3423  
3424  	bcopy(if_getlladdr(ifp), sc->vtnet_hwaddr, ETHER_ADDR_LEN);
3425  
3426  	error = vtnet_virtio_reinit(sc);
3427  	if (error)
3428  		return (error);
3429  
3430  	vtnet_set_macaddr(sc);
3431  	vtnet_set_active_vq_pairs(sc);
3432  
3433  	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ)
3434  		vtnet_init_rx_filters(sc);
3435  
3436  	if_sethwassist(ifp, 0);
3437  	if (if_getcapenable(ifp) & IFCAP_TXCSUM)
3438  		if_sethwassistbits(ifp, VTNET_CSUM_OFFLOAD, 0);
3439  	if (if_getcapenable(ifp) & IFCAP_TXCSUM_IPV6)
3440  		if_sethwassistbits(ifp, VTNET_CSUM_OFFLOAD_IPV6, 0);
3441  	if (if_getcapenable(ifp) & IFCAP_TSO4)
3442  		if_sethwassistbits(ifp, CSUM_IP_TSO, 0);
3443  	if (if_getcapenable(ifp) & IFCAP_TSO6)
3444  		if_sethwassistbits(ifp, CSUM_IP6_TSO, 0);
3445  
3446  	error = vtnet_init_rxtx_queues(sc);
3447  	if (error)
3448  		return (error);
3449  
3450  	return (0);
3451  }
3452  
3453  static void
3454  vtnet_init_locked(struct vtnet_softc *sc, int init_mode)
3455  {
3456  	if_t ifp;
3457  
3458  	ifp = sc->vtnet_ifp;
3459  
3460  	VTNET_CORE_LOCK_ASSERT(sc);
3461  
3462  	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
3463  		return;
3464  
3465  	vtnet_stop(sc);
3466  
3467  #ifdef DEV_NETMAP
3468  	/* Once stopped we can update the netmap flags, if necessary. */
3469  	switch (init_mode) {
3470  	case VTNET_INIT_NETMAP_ENTER:
3471  		nm_set_native_flags(NA(ifp));
3472  		break;
3473  	case VTNET_INIT_NETMAP_EXIT:
3474  		nm_clear_native_flags(NA(ifp));
3475  		break;
3476  	}
3477  #endif /* DEV_NETMAP */
3478  
3479  	if (vtnet_reinit(sc) != 0) {
3480  		vtnet_stop(sc);
3481  		return;
3482  	}
3483  
3484  	if_setdrvflagbits(ifp, IFF_DRV_RUNNING, 0);
3485  	vtnet_update_link_status(sc);
3486  	vtnet_enable_interrupts(sc);
3487  	callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc);
3488  
3489  #ifdef DEV_NETMAP
3490  	/* Re-enable txsync/rxsync. */
3491  	netmap_enable_all_rings(ifp);
3492  #endif /* DEV_NETMAP */
3493  }
3494  
3495  static void
3496  vtnet_init(void *xsc)
3497  {
3498  	struct vtnet_softc *sc;
3499  
3500  	sc = xsc;
3501  
3502  	VTNET_CORE_LOCK(sc);
3503  	vtnet_init_locked(sc, 0);
3504  	VTNET_CORE_UNLOCK(sc);
3505  }
3506  
3507  static void
3508  vtnet_free_ctrl_vq(struct vtnet_softc *sc)
3509  {
3510  
3511  	/*
3512  	 * The control virtqueue is only polled and therefore it should
3513  	 * already be empty.
3514  	 */
3515  	KASSERT(virtqueue_empty(sc->vtnet_ctrl_vq),
3516  	    ("%s: ctrl vq %p not empty", __func__, sc->vtnet_ctrl_vq));
3517  }
3518  
3519  static void
3520  vtnet_exec_ctrl_cmd(struct vtnet_softc *sc, void *cookie,
3521      struct sglist *sg, int readable, int writable)
3522  {
3523  	struct virtqueue *vq;
3524  
3525  	vq = sc->vtnet_ctrl_vq;
3526  
3527  	MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_VQ);
3528  	VTNET_CORE_LOCK_ASSERT(sc);
3529  
3530  	if (!virtqueue_empty(vq))
3531  		return;
3532  
3533  	/*
3534  	 * Poll for the response, but the command is likely completed before
3535  	 * returning from the notify.
3536  	 */
3537  	if (virtqueue_enqueue(vq, cookie, sg, readable, writable) == 0)  {
3538  		virtqueue_notify(vq);
3539  		virtqueue_poll(vq, NULL);
3540  	}
3541  }
3542  
3543  static int
3544  vtnet_ctrl_mac_cmd(struct vtnet_softc *sc, uint8_t *hwaddr)
3545  {
3546  	struct sglist_seg segs[3];
3547  	struct sglist sg;
3548  	struct {
3549  		struct virtio_net_ctrl_hdr hdr __aligned(2);
3550  		uint8_t pad1;
3551  		uint8_t addr[ETHER_ADDR_LEN] __aligned(8);
3552  		uint8_t pad2;
3553  		uint8_t ack;
3554  	} s;
3555  	int error;
3556  
3557  	error = 0;
3558  	MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_MAC);
3559  
3560  	s.hdr.class = VIRTIO_NET_CTRL_MAC;
3561  	s.hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
3562  	bcopy(hwaddr, &s.addr[0], ETHER_ADDR_LEN);
3563  	s.ack = VIRTIO_NET_ERR;
3564  
3565  	sglist_init(&sg, nitems(segs), segs);
3566  	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3567  	error |= sglist_append(&sg, &s.addr[0], ETHER_ADDR_LEN);
3568  	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3569  	MPASS(error == 0 && sg.sg_nseg == nitems(segs));
3570  
3571  	if (error == 0)
3572  		vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3573  
3574  	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3575  }
3576  
3577  static int
3578  vtnet_ctrl_guest_offloads(struct vtnet_softc *sc, uint64_t offloads)
3579  {
3580  	struct sglist_seg segs[3];
3581  	struct sglist sg;
3582  	struct {
3583  		struct virtio_net_ctrl_hdr hdr __aligned(2);
3584  		uint8_t pad1;
3585  		uint64_t offloads __aligned(8);
3586  		uint8_t pad2;
3587  		uint8_t ack;
3588  	} s;
3589  	int error;
3590  
3591  	error = 0;
3592  	MPASS(sc->vtnet_features & VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
3593  
3594  	s.hdr.class = VIRTIO_NET_CTRL_GUEST_OFFLOADS;
3595  	s.hdr.cmd = VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET;
3596  	s.offloads = vtnet_gtoh64(sc, offloads);
3597  	s.ack = VIRTIO_NET_ERR;
3598  
3599  	sglist_init(&sg, nitems(segs), segs);
3600  	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3601  	error |= sglist_append(&sg, &s.offloads, sizeof(uint64_t));
3602  	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3603  	MPASS(error == 0 && sg.sg_nseg == nitems(segs));
3604  
3605  	if (error == 0)
3606  		vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3607  
3608  	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3609  }
3610  
3611  static int
3612  vtnet_ctrl_mq_cmd(struct vtnet_softc *sc, uint16_t npairs)
3613  {
3614  	struct sglist_seg segs[3];
3615  	struct sglist sg;
3616  	struct {
3617  		struct virtio_net_ctrl_hdr hdr __aligned(2);
3618  		uint8_t pad1;
3619  		struct virtio_net_ctrl_mq mq __aligned(2);
3620  		uint8_t pad2;
3621  		uint8_t ack;
3622  	} s;
3623  	int error;
3624  
3625  	error = 0;
3626  	MPASS(sc->vtnet_flags & VTNET_FLAG_MQ);
3627  
3628  	s.hdr.class = VIRTIO_NET_CTRL_MQ;
3629  	s.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
3630  	s.mq.virtqueue_pairs = vtnet_gtoh16(sc, npairs);
3631  	s.ack = VIRTIO_NET_ERR;
3632  
3633  	sglist_init(&sg, nitems(segs), segs);
3634  	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3635  	error |= sglist_append(&sg, &s.mq, sizeof(struct virtio_net_ctrl_mq));
3636  	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3637  	MPASS(error == 0 && sg.sg_nseg == nitems(segs));
3638  
3639  	if (error == 0)
3640  		vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3641  
3642  	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3643  }
3644  
3645  static int
3646  vtnet_ctrl_rx_cmd(struct vtnet_softc *sc, uint8_t cmd, bool on)
3647  {
3648  	struct sglist_seg segs[3];
3649  	struct sglist sg;
3650  	struct {
3651  		struct virtio_net_ctrl_hdr hdr __aligned(2);
3652  		uint8_t pad1;
3653  		uint8_t onoff;
3654  		uint8_t pad2;
3655  		uint8_t ack;
3656  	} s;
3657  	int error;
3658  
3659  	error = 0;
3660  	MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_RX);
3661  
3662  	s.hdr.class = VIRTIO_NET_CTRL_RX;
3663  	s.hdr.cmd = cmd;
3664  	s.onoff = on;
3665  	s.ack = VIRTIO_NET_ERR;
3666  
3667  	sglist_init(&sg, nitems(segs), segs);
3668  	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3669  	error |= sglist_append(&sg, &s.onoff, sizeof(uint8_t));
3670  	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3671  	MPASS(error == 0 && sg.sg_nseg == nitems(segs));
3672  
3673  	if (error == 0)
3674  		vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3675  
3676  	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3677  }
3678  
3679  static int
3680  vtnet_set_promisc(struct vtnet_softc *sc, bool on)
3681  {
3682  	return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_PROMISC, on));
3683  }
3684  
3685  static int
3686  vtnet_set_allmulti(struct vtnet_softc *sc, bool on)
3687  {
3688  	return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, on));
3689  }
3690  
3691  static void
3692  vtnet_rx_filter(struct vtnet_softc *sc)
3693  {
3694  	device_t dev;
3695  	if_t ifp;
3696  
3697  	dev = sc->vtnet_dev;
3698  	ifp = sc->vtnet_ifp;
3699  
3700  	VTNET_CORE_LOCK_ASSERT(sc);
3701  
3702  	if (vtnet_set_promisc(sc, if_getflags(ifp) & IFF_PROMISC) != 0) {
3703  		device_printf(dev, "cannot %s promiscuous mode\n",
3704  		    if_getflags(ifp) & IFF_PROMISC ? "enable" : "disable");
3705  	}
3706  
3707  	if (vtnet_set_allmulti(sc, if_getflags(ifp) & IFF_ALLMULTI) != 0) {
3708  		device_printf(dev, "cannot %s all-multicast mode\n",
3709  		    if_getflags(ifp) & IFF_ALLMULTI ? "enable" : "disable");
3710  	}
3711  }
3712  
3713  static u_int
3714  vtnet_copy_ifaddr(void *arg, struct sockaddr_dl *sdl, u_int ucnt)
3715  {
3716  	struct vtnet_softc *sc = arg;
3717  
3718  	if (memcmp(LLADDR(sdl), sc->vtnet_hwaddr, ETHER_ADDR_LEN) == 0)
3719  		return (0);
3720  
3721  	if (ucnt < VTNET_MAX_MAC_ENTRIES)
3722  		bcopy(LLADDR(sdl),
3723  		    &sc->vtnet_mac_filter->vmf_unicast.macs[ucnt],
3724  		    ETHER_ADDR_LEN);
3725  
3726  	return (1);
3727  }
3728  
3729  static u_int
3730  vtnet_copy_maddr(void *arg, struct sockaddr_dl *sdl, u_int mcnt)
3731  {
3732  	struct vtnet_mac_filter *filter = arg;
3733  
3734  	if (mcnt < VTNET_MAX_MAC_ENTRIES)
3735  		bcopy(LLADDR(sdl), &filter->vmf_multicast.macs[mcnt],
3736  		    ETHER_ADDR_LEN);
3737  
3738  	return (1);
3739  }
3740  
3741  static void
3742  vtnet_rx_filter_mac(struct vtnet_softc *sc)
3743  {
3744  	struct virtio_net_ctrl_hdr hdr __aligned(2);
3745  	struct vtnet_mac_filter *filter;
3746  	struct sglist_seg segs[4];
3747  	struct sglist sg;
3748  	if_t ifp;
3749  	bool promisc, allmulti;
3750  	u_int ucnt, mcnt;
3751  	int error;
3752  	uint8_t ack;
3753  
3754  	ifp = sc->vtnet_ifp;
3755  	filter = sc->vtnet_mac_filter;
3756  	error = 0;
3757  
3758  	MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_RX);
3759  	VTNET_CORE_LOCK_ASSERT(sc);
3760  
3761  	/* Unicast MAC addresses: */
3762  	ucnt = if_foreach_lladdr(ifp, vtnet_copy_ifaddr, sc);
3763  	promisc = (ucnt > VTNET_MAX_MAC_ENTRIES);
3764  
3765  	if (promisc) {
3766  		ucnt = 0;
3767  		if_printf(ifp, "more than %d MAC addresses assigned, "
3768  		    "falling back to promiscuous mode\n",
3769  		    VTNET_MAX_MAC_ENTRIES);
3770  	}
3771  
3772  	/* Multicast MAC addresses: */
3773  	mcnt = if_foreach_llmaddr(ifp, vtnet_copy_maddr, filter);
3774  	allmulti = (mcnt > VTNET_MAX_MAC_ENTRIES);
3775  
3776  	if (allmulti) {
3777  		mcnt = 0;
3778  		if_printf(ifp, "more than %d multicast MAC addresses "
3779  		    "assigned, falling back to all-multicast mode\n",
3780  		    VTNET_MAX_MAC_ENTRIES);
3781  	}
3782  
3783  	if (promisc && allmulti)
3784  		goto out;
3785  
3786  	filter->vmf_unicast.nentries = vtnet_gtoh32(sc, ucnt);
3787  	filter->vmf_multicast.nentries = vtnet_gtoh32(sc, mcnt);
3788  
3789  	hdr.class = VIRTIO_NET_CTRL_MAC;
3790  	hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
3791  	ack = VIRTIO_NET_ERR;
3792  
3793  	sglist_init(&sg, nitems(segs), segs);
3794  	error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
3795  	error |= sglist_append(&sg, &filter->vmf_unicast,
3796  	    sizeof(uint32_t) + ucnt * ETHER_ADDR_LEN);
3797  	error |= sglist_append(&sg, &filter->vmf_multicast,
3798  	    sizeof(uint32_t) + mcnt * ETHER_ADDR_LEN);
3799  	error |= sglist_append(&sg, &ack, sizeof(uint8_t));
3800  	MPASS(error == 0 && sg.sg_nseg == nitems(segs));
3801  
3802  	if (error == 0)
3803  		vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
3804  	if (ack != VIRTIO_NET_OK)
3805  		if_printf(ifp, "error setting host MAC filter table\n");
3806  
3807  out:
3808  	if (promisc != 0 && vtnet_set_promisc(sc, true) != 0)
3809  		if_printf(ifp, "cannot enable promiscuous mode\n");
3810  	if (allmulti != 0 && vtnet_set_allmulti(sc, true) != 0)
3811  		if_printf(ifp, "cannot enable all-multicast mode\n");
3812  }
3813  
3814  static int
3815  vtnet_exec_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
3816  {
3817  	struct sglist_seg segs[3];
3818  	struct sglist sg;
3819  	struct {
3820  		struct virtio_net_ctrl_hdr hdr __aligned(2);
3821  		uint8_t pad1;
3822  		uint16_t tag __aligned(2);
3823  		uint8_t pad2;
3824  		uint8_t ack;
3825  	} s;
3826  	int error;
3827  
3828  	error = 0;
3829  	MPASS(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER);
3830  
3831  	s.hdr.class = VIRTIO_NET_CTRL_VLAN;
3832  	s.hdr.cmd = add ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
3833  	s.tag = vtnet_gtoh16(sc, tag);
3834  	s.ack = VIRTIO_NET_ERR;
3835  
3836  	sglist_init(&sg, nitems(segs), segs);
3837  	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3838  	error |= sglist_append(&sg, &s.tag, sizeof(uint16_t));
3839  	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3840  	MPASS(error == 0 && sg.sg_nseg == nitems(segs));
3841  
3842  	if (error == 0)
3843  		vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3844  
3845  	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3846  }
3847  
3848  static void
3849  vtnet_rx_filter_vlan(struct vtnet_softc *sc)
3850  {
3851  	int i, bit;
3852  	uint32_t w;
3853  	uint16_t tag;
3854  
3855  	MPASS(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER);
3856  	VTNET_CORE_LOCK_ASSERT(sc);
3857  
3858  	/* Enable the filter for each configured VLAN. */
3859  	for (i = 0; i < VTNET_VLAN_FILTER_NWORDS; i++) {
3860  		w = sc->vtnet_vlan_filter[i];
3861  
3862  		while ((bit = ffs(w) - 1) != -1) {
3863  			w &= ~(1 << bit);
3864  			tag = sizeof(w) * CHAR_BIT * i + bit;
3865  
3866  			if (vtnet_exec_vlan_filter(sc, 1, tag) != 0) {
3867  				device_printf(sc->vtnet_dev,
3868  				    "cannot enable VLAN %d filter\n", tag);
3869  			}
3870  		}
3871  	}
3872  }
3873  
3874  static void
3875  vtnet_update_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
3876  {
3877  	if_t ifp;
3878  	int idx, bit;
3879  
3880  	ifp = sc->vtnet_ifp;
3881  	idx = (tag >> 5) & 0x7F;
3882  	bit = tag & 0x1F;
3883  
3884  	if (tag == 0 || tag > 4095)
3885  		return;
3886  
3887  	VTNET_CORE_LOCK(sc);
3888  
3889  	if (add)
3890  		sc->vtnet_vlan_filter[idx] |= (1 << bit);
3891  	else
3892  		sc->vtnet_vlan_filter[idx] &= ~(1 << bit);
3893  
3894  	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER &&
3895  	    if_getdrvflags(ifp) & IFF_DRV_RUNNING &&
3896  	    vtnet_exec_vlan_filter(sc, add, tag) != 0) {
3897  		device_printf(sc->vtnet_dev,
3898  		    "cannot %s VLAN %d %s the host filter table\n",
3899  		    add ? "add" : "remove", tag, add ? "to" : "from");
3900  	}
3901  
3902  	VTNET_CORE_UNLOCK(sc);
3903  }
3904  
3905  static void
3906  vtnet_register_vlan(void *arg, if_t ifp, uint16_t tag)
3907  {
3908  
3909  	if (if_getsoftc(ifp) != arg)
3910  		return;
3911  
3912  	vtnet_update_vlan_filter(arg, 1, tag);
3913  }
3914  
3915  static void
3916  vtnet_unregister_vlan(void *arg, if_t ifp, uint16_t tag)
3917  {
3918  
3919  	if (if_getsoftc(ifp) != arg)
3920  		return;
3921  
3922  	vtnet_update_vlan_filter(arg, 0, tag);
3923  }
3924  
3925  static void
3926  vtnet_update_speed_duplex(struct vtnet_softc *sc)
3927  {
3928  	if_t ifp;
3929  	uint32_t speed;
3930  
3931  	ifp = sc->vtnet_ifp;
3932  
3933  	if ((sc->vtnet_features & VIRTIO_NET_F_SPEED_DUPLEX) == 0)
3934  		return;
3935  
3936  	/* BMV: Ignore duplex. */
3937  	speed = virtio_read_dev_config_4(sc->vtnet_dev,
3938  	    offsetof(struct virtio_net_config, speed));
3939  	if (speed != UINT32_MAX)
3940  		if_setbaudrate(ifp, IF_Mbps(speed));
3941  }
3942  
3943  static int
3944  vtnet_is_link_up(struct vtnet_softc *sc)
3945  {
3946  	uint16_t status;
3947  
3948  	if ((sc->vtnet_features & VIRTIO_NET_F_STATUS) == 0)
3949  		return (1);
3950  
3951  	status = virtio_read_dev_config_2(sc->vtnet_dev,
3952  	    offsetof(struct virtio_net_config, status));
3953  
3954  	return ((status & VIRTIO_NET_S_LINK_UP) != 0);
3955  }
3956  
3957  static void
3958  vtnet_update_link_status(struct vtnet_softc *sc)
3959  {
3960  	if_t ifp;
3961  	int link;
3962  
3963  	ifp = sc->vtnet_ifp;
3964  	VTNET_CORE_LOCK_ASSERT(sc);
3965  	link = vtnet_is_link_up(sc);
3966  
3967  	/* Notify if the link status has changed. */
3968  	if (link != 0 && sc->vtnet_link_active == 0) {
3969  		vtnet_update_speed_duplex(sc);
3970  		sc->vtnet_link_active = 1;
3971  		if_link_state_change(ifp, LINK_STATE_UP);
3972  	} else if (link == 0 && sc->vtnet_link_active != 0) {
3973  		sc->vtnet_link_active = 0;
3974  		if_link_state_change(ifp, LINK_STATE_DOWN);
3975  	}
3976  }
3977  
3978  static int
3979  vtnet_ifmedia_upd(if_t ifp __unused)
3980  {
3981  	return (EOPNOTSUPP);
3982  }
3983  
3984  static void
3985  vtnet_ifmedia_sts(if_t ifp, struct ifmediareq *ifmr)
3986  {
3987  	struct vtnet_softc *sc;
3988  
3989  	sc = if_getsoftc(ifp);
3990  
3991  	ifmr->ifm_status = IFM_AVALID;
3992  	ifmr->ifm_active = IFM_ETHER;
3993  
3994  	VTNET_CORE_LOCK(sc);
3995  	if (vtnet_is_link_up(sc) != 0) {
3996  		ifmr->ifm_status |= IFM_ACTIVE;
3997  		ifmr->ifm_active |= IFM_10G_T | IFM_FDX;
3998  	} else
3999  		ifmr->ifm_active |= IFM_NONE;
4000  	VTNET_CORE_UNLOCK(sc);
4001  }
4002  
4003  static void
4004  vtnet_get_macaddr(struct vtnet_softc *sc)
4005  {
4006  
4007  	if (sc->vtnet_flags & VTNET_FLAG_MAC) {
4008  		virtio_read_device_config_array(sc->vtnet_dev,
4009  		    offsetof(struct virtio_net_config, mac),
4010  		    &sc->vtnet_hwaddr[0], sizeof(uint8_t), ETHER_ADDR_LEN);
4011  	} else {
4012  		/* Generate a random locally administered unicast address. */
4013  		sc->vtnet_hwaddr[0] = 0xB2;
4014  		arc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1, 0);
4015  	}
4016  }
4017  
4018  static void
4019  vtnet_set_macaddr(struct vtnet_softc *sc)
4020  {
4021  	device_t dev;
4022  	int error;
4023  
4024  	dev = sc->vtnet_dev;
4025  
4026  	if (sc->vtnet_flags & VTNET_FLAG_CTRL_MAC) {
4027  		error = vtnet_ctrl_mac_cmd(sc, sc->vtnet_hwaddr);
4028  		if (error)
4029  			device_printf(dev, "unable to set MAC address\n");
4030  		return;
4031  	}
4032  
4033  	/* MAC in config is read-only in modern VirtIO. */
4034  	if (!vtnet_modern(sc) && sc->vtnet_flags & VTNET_FLAG_MAC) {
4035  		for (int i = 0; i < ETHER_ADDR_LEN; i++) {
4036  			virtio_write_dev_config_1(dev,
4037  			    offsetof(struct virtio_net_config, mac) + i,
4038  			    sc->vtnet_hwaddr[i]);
4039  		}
4040  	}
4041  }
4042  
4043  static void
4044  vtnet_attached_set_macaddr(struct vtnet_softc *sc)
4045  {
4046  
4047  	/* Assign MAC address if it was generated. */
4048  	if ((sc->vtnet_flags & VTNET_FLAG_MAC) == 0)
4049  		vtnet_set_macaddr(sc);
4050  }
4051  
4052  static void
4053  vtnet_vlan_tag_remove(struct mbuf *m)
4054  {
4055  	struct ether_vlan_header *evh;
4056  
4057  	evh = mtod(m, struct ether_vlan_header *);
4058  	m->m_pkthdr.ether_vtag = ntohs(evh->evl_tag);
4059  	m->m_flags |= M_VLANTAG;
4060  
4061  	/* Strip the 802.1Q header. */
4062  	bcopy((char *) evh, (char *) evh + ETHER_VLAN_ENCAP_LEN,
4063  	    ETHER_HDR_LEN - ETHER_TYPE_LEN);
4064  	m_adj(m, ETHER_VLAN_ENCAP_LEN);
4065  }
4066  
4067  static void
4068  vtnet_set_rx_process_limit(struct vtnet_softc *sc)
4069  {
4070  	int limit;
4071  
4072  	limit = vtnet_tunable_int(sc, "rx_process_limit",
4073  	    vtnet_rx_process_limit);
4074  	if (limit < 0)
4075  		limit = INT_MAX;
4076  	sc->vtnet_rx_process_limit = limit;
4077  }
4078  
4079  static void
4080  vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *ctx,
4081      struct sysctl_oid_list *child, struct vtnet_rxq *rxq)
4082  {
4083  	struct sysctl_oid *node;
4084  	struct sysctl_oid_list *list;
4085  	struct vtnet_rxq_stats *stats;
4086  	char namebuf[16];
4087  
4088  	snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vtnrx_id);
4089  	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
4090  	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Receive Queue");
4091  	list = SYSCTL_CHILDREN(node);
4092  
4093  	stats = &rxq->vtnrx_stats;
4094  
4095  	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD,
4096  	    &stats->vrxs_ipackets, "Receive packets");
4097  	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD,
4098  	    &stats->vrxs_ibytes, "Receive bytes");
4099  	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD,
4100  	    &stats->vrxs_iqdrops, "Receive drops");
4101  	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD,
4102  	    &stats->vrxs_ierrors, "Receive errors");
4103  	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
4104  	    &stats->vrxs_csum, "Receive checksum offloaded");
4105  	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum_failed", CTLFLAG_RD,
4106  	    &stats->vrxs_csum_failed, "Receive checksum offload failed");
4107  	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "host_lro", CTLFLAG_RD,
4108  	    &stats->vrxs_host_lro, "Receive host segmentation offloaded");
4109  	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD,
4110  	    &stats->vrxs_rescheduled,
4111  	    "Receive interrupt handler rescheduled");
4112  }
4113  
4114  static void
4115  vtnet_setup_txq_sysctl(struct sysctl_ctx_list *ctx,
4116      struct sysctl_oid_list *child, struct vtnet_txq *txq)
4117  {
4118  	struct sysctl_oid *node;
4119  	struct sysctl_oid_list *list;
4120  	struct vtnet_txq_stats *stats;
4121  	char namebuf[16];
4122  
4123  	snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vtntx_id);
4124  	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
4125  	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Transmit Queue");
4126  	list = SYSCTL_CHILDREN(node);
4127  
4128  	stats = &txq->vtntx_stats;
4129  
4130  	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD,
4131  	    &stats->vtxs_opackets, "Transmit packets");
4132  	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD,
4133  	    &stats->vtxs_obytes, "Transmit bytes");
4134  	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD,
4135  	    &stats->vtxs_omcasts, "Transmit multicasts");
4136  	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
4137  	    &stats->vtxs_csum, "Transmit checksum offloaded");
4138  	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD,
4139  	    &stats->vtxs_tso, "Transmit TCP segmentation offloaded");
4140  	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD,
4141  	    &stats->vtxs_rescheduled,
4142  	    "Transmit interrupt handler rescheduled");
4143  }
4144  
4145  static void
4146  vtnet_setup_queue_sysctl(struct vtnet_softc *sc)
4147  {
4148  	device_t dev;
4149  	struct sysctl_ctx_list *ctx;
4150  	struct sysctl_oid *tree;
4151  	struct sysctl_oid_list *child;
4152  	int i;
4153  
4154  	dev = sc->vtnet_dev;
4155  	ctx = device_get_sysctl_ctx(dev);
4156  	tree = device_get_sysctl_tree(dev);
4157  	child = SYSCTL_CHILDREN(tree);
4158  
4159  	for (i = 0; i < sc->vtnet_req_vq_pairs; i++) {
4160  		vtnet_setup_rxq_sysctl(ctx, child, &sc->vtnet_rxqs[i]);
4161  		vtnet_setup_txq_sysctl(ctx, child, &sc->vtnet_txqs[i]);
4162  	}
4163  }
4164  
4165  static void
4166  vtnet_setup_stat_sysctl(struct sysctl_ctx_list *ctx,
4167      struct sysctl_oid_list *child, struct vtnet_softc *sc)
4168  {
4169  	struct vtnet_statistics *stats;
4170  	struct vtnet_rxq_stats rxaccum;
4171  	struct vtnet_txq_stats txaccum;
4172  
4173  	vtnet_accum_stats(sc, &rxaccum, &txaccum);
4174  
4175  	stats = &sc->vtnet_stats;
4176  	stats->rx_csum_offloaded = rxaccum.vrxs_csum;
4177  	stats->rx_csum_failed = rxaccum.vrxs_csum_failed;
4178  	stats->rx_task_rescheduled = rxaccum.vrxs_rescheduled;
4179  	stats->tx_csum_offloaded = txaccum.vtxs_csum;
4180  	stats->tx_tso_offloaded = txaccum.vtxs_tso;
4181  	stats->tx_task_rescheduled = txaccum.vtxs_rescheduled;
4182  
4183  	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "mbuf_alloc_failed",
4184  	    CTLFLAG_RD, &stats->mbuf_alloc_failed,
4185  	    "Mbuf cluster allocation failures");
4186  
4187  	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_frame_too_large",
4188  	    CTLFLAG_RD, &stats->rx_frame_too_large,
4189  	    "Received frame larger than the mbuf chain");
4190  	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_enq_replacement_failed",
4191  	    CTLFLAG_RD, &stats->rx_enq_replacement_failed,
4192  	    "Enqueuing the replacement receive mbuf failed");
4193  	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_mergeable_failed",
4194  	    CTLFLAG_RD, &stats->rx_mergeable_failed,
4195  	    "Mergeable buffers receive failures");
4196  	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ethtype",
4197  	    CTLFLAG_RD, &stats->rx_csum_bad_ethtype,
4198  	    "Received checksum offloaded buffer with unsupported "
4199  	    "Ethernet type");
4200  	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ipproto",
4201  	    CTLFLAG_RD, &stats->rx_csum_bad_ipproto,
4202  	    "Received checksum offloaded buffer with incorrect IP protocol");
4203  	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_offset",
4204  	    CTLFLAG_RD, &stats->rx_csum_bad_offset,
4205  	    "Received checksum offloaded buffer with incorrect offset");
4206  	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_proto",
4207  	    CTLFLAG_RD, &stats->rx_csum_bad_proto,
4208  	    "Received checksum offloaded buffer with incorrect protocol");
4209  	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_failed",
4210  	    CTLFLAG_RD, &stats->rx_csum_failed,
4211  	    "Received buffer checksum offload failed");
4212  	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_offloaded",
4213  	    CTLFLAG_RD, &stats->rx_csum_offloaded,
4214  	    "Received buffer checksum offload succeeded");
4215  	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_task_rescheduled",
4216  	    CTLFLAG_RD, &stats->rx_task_rescheduled,
4217  	    "Times the receive interrupt task rescheduled itself");
4218  
4219  	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_unknown_ethtype",
4220  	    CTLFLAG_RD, &stats->tx_csum_unknown_ethtype,
4221  	    "Aborted transmit of checksum offloaded buffer with unknown "
4222  	    "Ethernet type");
4223  	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_proto_mismatch",
4224  	    CTLFLAG_RD, &stats->tx_csum_proto_mismatch,
4225  	    "Aborted transmit of checksum offloaded buffer because mismatched "
4226  	    "protocols");
4227  	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_not_tcp",
4228  	    CTLFLAG_RD, &stats->tx_tso_not_tcp,
4229  	    "Aborted transmit of TSO buffer with non TCP protocol");
4230  	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_without_csum",
4231  	    CTLFLAG_RD, &stats->tx_tso_without_csum,
4232  	    "Aborted transmit of TSO buffer without TCP checksum offload");
4233  	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defragged",
4234  	    CTLFLAG_RD, &stats->tx_defragged,
4235  	    "Transmit mbufs defragged");
4236  	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defrag_failed",
4237  	    CTLFLAG_RD, &stats->tx_defrag_failed,
4238  	    "Aborted transmit of buffer because defrag failed");
4239  	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_offloaded",
4240  	    CTLFLAG_RD, &stats->tx_csum_offloaded,
4241  	    "Offloaded checksum of transmitted buffer");
4242  	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_offloaded",
4243  	    CTLFLAG_RD, &stats->tx_tso_offloaded,
4244  	    "Segmentation offload of transmitted buffer");
4245  	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_task_rescheduled",
4246  	    CTLFLAG_RD, &stats->tx_task_rescheduled,
4247  	    "Times the transmit interrupt task rescheduled itself");
4248  }
4249  
4250  static void
4251  vtnet_setup_sysctl(struct vtnet_softc *sc)
4252  {
4253  	device_t dev;
4254  	struct sysctl_ctx_list *ctx;
4255  	struct sysctl_oid *tree;
4256  	struct sysctl_oid_list *child;
4257  
4258  	dev = sc->vtnet_dev;
4259  	ctx = device_get_sysctl_ctx(dev);
4260  	tree = device_get_sysctl_tree(dev);
4261  	child = SYSCTL_CHILDREN(tree);
4262  
4263  	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_vq_pairs",
4264  	    CTLFLAG_RD, &sc->vtnet_max_vq_pairs, 0,
4265  	    "Number of maximum supported virtqueue pairs");
4266  	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "req_vq_pairs",
4267  	    CTLFLAG_RD, &sc->vtnet_req_vq_pairs, 0,
4268  	    "Number of requested virtqueue pairs");
4269  	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "act_vq_pairs",
4270  	    CTLFLAG_RD, &sc->vtnet_act_vq_pairs, 0,
4271  	    "Number of active virtqueue pairs");
4272  
4273  	vtnet_setup_stat_sysctl(ctx, child, sc);
4274  }
4275  
4276  static void
4277  vtnet_load_tunables(struct vtnet_softc *sc)
4278  {
4279  
4280  	sc->vtnet_lro_entry_count = vtnet_tunable_int(sc,
4281  	    "lro_entry_count", vtnet_lro_entry_count);
4282  	if (sc->vtnet_lro_entry_count < TCP_LRO_ENTRIES)
4283  		sc->vtnet_lro_entry_count = TCP_LRO_ENTRIES;
4284  
4285  	sc->vtnet_lro_mbufq_depth = vtnet_tunable_int(sc,
4286  	    "lro_mbufq_depth", vtnet_lro_mbufq_depth);
4287  }
4288  
4289  static int
4290  vtnet_rxq_enable_intr(struct vtnet_rxq *rxq)
4291  {
4292  
4293  	return (virtqueue_enable_intr(rxq->vtnrx_vq));
4294  }
4295  
4296  static void
4297  vtnet_rxq_disable_intr(struct vtnet_rxq *rxq)
4298  {
4299  
4300  	virtqueue_disable_intr(rxq->vtnrx_vq);
4301  }
4302  
4303  static int
4304  vtnet_txq_enable_intr(struct vtnet_txq *txq)
4305  {
4306  	struct virtqueue *vq;
4307  
4308  	vq = txq->vtntx_vq;
4309  
4310  	if (vtnet_txq_below_threshold(txq) != 0)
4311  		return (virtqueue_postpone_intr(vq, VQ_POSTPONE_LONG));
4312  
4313  	/*
4314  	 * The free count is above our threshold. Keep the Tx interrupt
4315  	 * disabled until the queue is fuller.
4316  	 */
4317  	return (0);
4318  }
4319  
4320  static void
4321  vtnet_txq_disable_intr(struct vtnet_txq *txq)
4322  {
4323  
4324  	virtqueue_disable_intr(txq->vtntx_vq);
4325  }
4326  
4327  static void
4328  vtnet_enable_rx_interrupts(struct vtnet_softc *sc)
4329  {
4330  	struct vtnet_rxq *rxq;
4331  	int i;
4332  
4333  	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
4334  		rxq = &sc->vtnet_rxqs[i];
4335  		if (vtnet_rxq_enable_intr(rxq) != 0)
4336  			taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
4337  	}
4338  }
4339  
4340  static void
4341  vtnet_enable_tx_interrupts(struct vtnet_softc *sc)
4342  {
4343  	int i;
4344  
4345  	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
4346  		vtnet_txq_enable_intr(&sc->vtnet_txqs[i]);
4347  }
4348  
4349  static void
4350  vtnet_enable_interrupts(struct vtnet_softc *sc)
4351  {
4352  
4353  	vtnet_enable_rx_interrupts(sc);
4354  	vtnet_enable_tx_interrupts(sc);
4355  }
4356  
4357  static void
4358  vtnet_disable_rx_interrupts(struct vtnet_softc *sc)
4359  {
4360  	int i;
4361  
4362  	for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
4363  		vtnet_rxq_disable_intr(&sc->vtnet_rxqs[i]);
4364  }
4365  
4366  static void
4367  vtnet_disable_tx_interrupts(struct vtnet_softc *sc)
4368  {
4369  	int i;
4370  
4371  	for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
4372  		vtnet_txq_disable_intr(&sc->vtnet_txqs[i]);
4373  }
4374  
4375  static void
4376  vtnet_disable_interrupts(struct vtnet_softc *sc)
4377  {
4378  
4379  	vtnet_disable_rx_interrupts(sc);
4380  	vtnet_disable_tx_interrupts(sc);
4381  }
4382  
4383  static int
4384  vtnet_tunable_int(struct vtnet_softc *sc, const char *knob, int def)
4385  {
4386  	char path[64];
4387  
4388  	snprintf(path, sizeof(path),
4389  	    "hw.vtnet.%d.%s", device_get_unit(sc->vtnet_dev), knob);
4390  	TUNABLE_INT_FETCH(path, &def);
4391  
4392  	return (def);
4393  }
4394  
4395  #ifdef DEBUGNET
4396  static void
4397  vtnet_debugnet_init(if_t ifp, int *nrxr, int *ncl, int *clsize)
4398  {
4399  	struct vtnet_softc *sc;
4400  
4401  	sc = if_getsoftc(ifp);
4402  
4403  	VTNET_CORE_LOCK(sc);
4404  	*nrxr = sc->vtnet_req_vq_pairs;
4405  	*ncl = DEBUGNET_MAX_IN_FLIGHT;
4406  	*clsize = sc->vtnet_rx_clustersz;
4407  	VTNET_CORE_UNLOCK(sc);
4408  }
4409  
4410  static void
4411  vtnet_debugnet_event(if_t ifp __unused, enum debugnet_ev event)
4412  {
4413  	struct vtnet_softc *sc;
4414  	static bool sw_lro_enabled = false;
4415  
4416  	/*
4417  	 * Disable software LRO, since it would require entering the network
4418  	 * epoch when calling vtnet_txq_eof() in vtnet_debugnet_poll().
4419  	 */
4420  	sc = if_getsoftc(ifp);
4421  	switch (event) {
4422  	case DEBUGNET_START:
4423  		sw_lro_enabled = (sc->vtnet_flags & VTNET_FLAG_SW_LRO) != 0;
4424  		if (sw_lro_enabled)
4425  			sc->vtnet_flags &= ~VTNET_FLAG_SW_LRO;
4426  		break;
4427  	case DEBUGNET_END:
4428  		if (sw_lro_enabled)
4429  			sc->vtnet_flags |= VTNET_FLAG_SW_LRO;
4430  		break;
4431  	}
4432  }
4433  
4434  static int
4435  vtnet_debugnet_transmit(if_t ifp, struct mbuf *m)
4436  {
4437  	struct vtnet_softc *sc;
4438  	struct vtnet_txq *txq;
4439  	int error;
4440  
4441  	sc = if_getsoftc(ifp);
4442  	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
4443  	    IFF_DRV_RUNNING)
4444  		return (EBUSY);
4445  
4446  	txq = &sc->vtnet_txqs[0];
4447  	error = vtnet_txq_encap(txq, &m, M_NOWAIT | M_USE_RESERVE);
4448  	if (error == 0)
4449  		(void)vtnet_txq_notify(txq);
4450  	return (error);
4451  }
4452  
4453  static int
4454  vtnet_debugnet_poll(if_t ifp, int count)
4455  {
4456  	struct vtnet_softc *sc;
4457  	int i;
4458  
4459  	sc = if_getsoftc(ifp);
4460  	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
4461  	    IFF_DRV_RUNNING)
4462  		return (EBUSY);
4463  
4464  	(void)vtnet_txq_eof(&sc->vtnet_txqs[0]);
4465  	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
4466  		(void)vtnet_rxq_eof(&sc->vtnet_rxqs[i]);
4467  	return (0);
4468  }
4469  #endif /* DEBUGNET */
4470