1 /*- 2 * Copyright (c) 2010-2012 Citrix Inc. 3 * Copyright (c) 2009-2012,2016-2017 Microsoft Corp. 4 * Copyright (c) 2012 NetApp Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /*- 30 * Copyright (c) 2004-2006 Kip Macy 31 * All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 45 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 52 * SUCH DAMAGE. 53 */ 54 55 #include <sys/cdefs.h> 56 __FBSDID("$FreeBSD$"); 57 58 #include "opt_hn.h" 59 #include "opt_inet6.h" 60 #include "opt_inet.h" 61 #include "opt_rss.h" 62 63 #include <sys/param.h> 64 #include <sys/bus.h> 65 #include <sys/kernel.h> 66 #include <sys/limits.h> 67 #include <sys/malloc.h> 68 #include <sys/mbuf.h> 69 #include <sys/module.h> 70 #include <sys/queue.h> 71 #include <sys/lock.h> 72 #include <sys/rmlock.h> 73 #include <sys/sbuf.h> 74 #include <sys/smp.h> 75 #include <sys/socket.h> 76 #include <sys/sockio.h> 77 #include <sys/sx.h> 78 #include <sys/sysctl.h> 79 #include <sys/systm.h> 80 #include <sys/taskqueue.h> 81 #include <sys/buf_ring.h> 82 #include <sys/eventhandler.h> 83 84 #include <machine/atomic.h> 85 #include <machine/in_cksum.h> 86 87 #include <net/bpf.h> 88 #include <net/ethernet.h> 89 #include <net/if.h> 90 #include <net/if_dl.h> 91 #include <net/if_media.h> 92 #include <net/if_types.h> 93 #include <net/if_var.h> 94 #include <net/rndis.h> 95 #ifdef RSS 96 #include <net/rss_config.h> 97 #endif 98 99 #include <netinet/in_systm.h> 100 #include <netinet/in.h> 101 #include <netinet/ip.h> 102 #include <netinet/ip6.h> 103 #include <netinet/tcp.h> 104 #include <netinet/tcp_lro.h> 105 #include <netinet/udp.h> 106 107 #include <dev/hyperv/include/hyperv.h> 108 #include <dev/hyperv/include/hyperv_busdma.h> 109 #include <dev/hyperv/include/vmbus.h> 110 #include <dev/hyperv/include/vmbus_xact.h> 111 112 #include <dev/hyperv/netvsc/ndis.h> 113 #include <dev/hyperv/netvsc/if_hnreg.h> 114 #include <dev/hyperv/netvsc/if_hnvar.h> 115 #include <dev/hyperv/netvsc/hn_nvs.h> 116 #include <dev/hyperv/netvsc/hn_rndis.h> 117 118 #include "vmbus_if.h" 119 120 #define HN_IFSTART_SUPPORT 121 122 #define HN_RING_CNT_DEF_MAX 8 123 124 #define HN_VFMAP_SIZE_DEF 8 125 126 #define HN_XPNT_VF_ATTWAIT_MIN 2 /* seconds */ 127 128 /* YYY should get it from the underlying channel */ 129 #define HN_TX_DESC_CNT 512 130 131 #define HN_RNDIS_PKT_LEN \ 132 (sizeof(struct rndis_packet_msg) + \ 133 HN_RNDIS_PKTINFO_SIZE(HN_NDIS_HASH_VALUE_SIZE) + \ 134 HN_RNDIS_PKTINFO_SIZE(NDIS_VLAN_INFO_SIZE) + \ 135 HN_RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) + \ 136 HN_RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE)) 137 #define HN_RNDIS_PKT_BOUNDARY PAGE_SIZE 138 #define HN_RNDIS_PKT_ALIGN CACHE_LINE_SIZE 139 140 #define HN_TX_DATA_BOUNDARY PAGE_SIZE 141 #define HN_TX_DATA_MAXSIZE IP_MAXPACKET 142 #define HN_TX_DATA_SEGSIZE PAGE_SIZE 143 /* -1 for RNDIS packet message */ 144 #define HN_TX_DATA_SEGCNT_MAX (HN_GPACNT_MAX - 1) 145 146 #define HN_DIRECT_TX_SIZE_DEF 128 147 148 #define HN_EARLY_TXEOF_THRESH 8 149 150 #define HN_PKTBUF_LEN_DEF (16 * 1024) 151 152 #define HN_LROENT_CNT_DEF 128 153 154 #define HN_LRO_LENLIM_MULTIRX_DEF (12 * ETHERMTU) 155 #define HN_LRO_LENLIM_DEF (25 * ETHERMTU) 156 /* YYY 2*MTU is a bit rough, but should be good enough. */ 157 #define HN_LRO_LENLIM_MIN(ifp) (2 * (ifp)->if_mtu) 158 159 #define HN_LRO_ACKCNT_DEF 1 160 161 #define HN_LOCK_INIT(sc) \ 162 sx_init(&(sc)->hn_lock, device_get_nameunit((sc)->hn_dev)) 163 #define HN_LOCK_DESTROY(sc) sx_destroy(&(sc)->hn_lock) 164 #define HN_LOCK_ASSERT(sc) sx_assert(&(sc)->hn_lock, SA_XLOCKED) 165 #define HN_LOCK(sc) \ 166 do { \ 167 while (sx_try_xlock(&(sc)->hn_lock) == 0) \ 168 DELAY(1000); \ 169 } while (0) 170 #define HN_UNLOCK(sc) sx_xunlock(&(sc)->hn_lock) 171 172 #define HN_CSUM_IP_MASK (CSUM_IP | CSUM_IP_TCP | CSUM_IP_UDP) 173 #define HN_CSUM_IP6_MASK (CSUM_IP6_TCP | CSUM_IP6_UDP) 174 #define HN_CSUM_IP_HWASSIST(sc) \ 175 ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP_MASK) 176 #define HN_CSUM_IP6_HWASSIST(sc) \ 177 ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP6_MASK) 178 179 #define HN_PKTSIZE_MIN(align) \ 180 roundup2(ETHER_MIN_LEN + ETHER_VLAN_ENCAP_LEN - ETHER_CRC_LEN + \ 181 HN_RNDIS_PKT_LEN, (align)) 182 #define HN_PKTSIZE(m, align) \ 183 roundup2((m)->m_pkthdr.len + HN_RNDIS_PKT_LEN, (align)) 184 185 #ifdef RSS 186 #define HN_RING_IDX2CPU(sc, idx) rss_getcpu((idx) % rss_getnumbuckets()) 187 #else 188 #define HN_RING_IDX2CPU(sc, idx) (((sc)->hn_cpu + (idx)) % mp_ncpus) 189 #endif 190 191 struct hn_txdesc { 192 #ifndef HN_USE_TXDESC_BUFRING 193 SLIST_ENTRY(hn_txdesc) link; 194 #endif 195 STAILQ_ENTRY(hn_txdesc) agg_link; 196 197 /* Aggregated txdescs, in sending order. */ 198 STAILQ_HEAD(, hn_txdesc) agg_list; 199 200 /* The oldest packet, if transmission aggregation happens. */ 201 struct mbuf *m; 202 struct hn_tx_ring *txr; 203 int refs; 204 uint32_t flags; /* HN_TXD_FLAG_ */ 205 struct hn_nvs_sendctx send_ctx; 206 uint32_t chim_index; 207 int chim_size; 208 209 bus_dmamap_t data_dmap; 210 211 bus_addr_t rndis_pkt_paddr; 212 struct rndis_packet_msg *rndis_pkt; 213 bus_dmamap_t rndis_pkt_dmap; 214 }; 215 216 #define HN_TXD_FLAG_ONLIST 0x0001 217 #define HN_TXD_FLAG_DMAMAP 0x0002 218 #define HN_TXD_FLAG_ONAGG 0x0004 219 220 struct hn_rxinfo { 221 uint32_t vlan_info; 222 uint32_t csum_info; 223 uint32_t hash_info; 224 uint32_t hash_value; 225 }; 226 227 struct hn_rxvf_setarg { 228 struct hn_rx_ring *rxr; 229 struct ifnet *vf_ifp; 230 }; 231 232 #define HN_RXINFO_VLAN 0x0001 233 #define HN_RXINFO_CSUM 0x0002 234 #define HN_RXINFO_HASHINF 0x0004 235 #define HN_RXINFO_HASHVAL 0x0008 236 #define HN_RXINFO_ALL \ 237 (HN_RXINFO_VLAN | \ 238 HN_RXINFO_CSUM | \ 239 HN_RXINFO_HASHINF | \ 240 HN_RXINFO_HASHVAL) 241 242 #define HN_NDIS_VLAN_INFO_INVALID 0xffffffff 243 #define HN_NDIS_RXCSUM_INFO_INVALID 0 244 #define HN_NDIS_HASH_INFO_INVALID 0 245 246 static int hn_probe(device_t); 247 static int hn_attach(device_t); 248 static int hn_detach(device_t); 249 static int hn_shutdown(device_t); 250 static void hn_chan_callback(struct vmbus_channel *, 251 void *); 252 253 static void hn_init(void *); 254 static int hn_ioctl(struct ifnet *, u_long, caddr_t); 255 #ifdef HN_IFSTART_SUPPORT 256 static void hn_start(struct ifnet *); 257 #endif 258 static int hn_transmit(struct ifnet *, struct mbuf *); 259 static void hn_xmit_qflush(struct ifnet *); 260 static int hn_ifmedia_upd(struct ifnet *); 261 static void hn_ifmedia_sts(struct ifnet *, 262 struct ifmediareq *); 263 264 static void hn_ifnet_event(void *, struct ifnet *, int); 265 static void hn_ifaddr_event(void *, struct ifnet *); 266 static void hn_ifnet_attevent(void *, struct ifnet *); 267 static void hn_ifnet_detevent(void *, struct ifnet *); 268 static void hn_ifnet_lnkevent(void *, struct ifnet *, int); 269 270 static bool hn_ismyvf(const struct hn_softc *, 271 const struct ifnet *); 272 static void hn_rxvf_change(struct hn_softc *, 273 struct ifnet *, bool); 274 static void hn_rxvf_set(struct hn_softc *, struct ifnet *); 275 static void hn_rxvf_set_task(void *, int); 276 static void hn_xpnt_vf_input(struct ifnet *, struct mbuf *); 277 static int hn_xpnt_vf_iocsetflags(struct hn_softc *); 278 static int hn_xpnt_vf_iocsetcaps(struct hn_softc *, 279 struct ifreq *); 280 static void hn_xpnt_vf_saveifflags(struct hn_softc *); 281 static bool hn_xpnt_vf_isready(struct hn_softc *); 282 static void hn_xpnt_vf_setready(struct hn_softc *); 283 static void hn_xpnt_vf_init_taskfunc(void *, int); 284 static void hn_xpnt_vf_init(struct hn_softc *); 285 static void hn_xpnt_vf_setenable(struct hn_softc *); 286 static void hn_xpnt_vf_setdisable(struct hn_softc *, bool); 287 288 static int hn_rndis_rxinfo(const void *, int, 289 struct hn_rxinfo *); 290 static void hn_rndis_rx_data(struct hn_rx_ring *, 291 const void *, int); 292 static void hn_rndis_rx_status(struct hn_softc *, 293 const void *, int); 294 static void hn_rndis_init_fixat(struct hn_softc *, int); 295 296 static void hn_nvs_handle_notify(struct hn_softc *, 297 const struct vmbus_chanpkt_hdr *); 298 static void hn_nvs_handle_comp(struct hn_softc *, 299 struct vmbus_channel *, 300 const struct vmbus_chanpkt_hdr *); 301 static void hn_nvs_handle_rxbuf(struct hn_rx_ring *, 302 struct vmbus_channel *, 303 const struct vmbus_chanpkt_hdr *); 304 static void hn_nvs_ack_rxbuf(struct hn_rx_ring *, 305 struct vmbus_channel *, uint64_t); 306 307 #if __FreeBSD_version >= 1100099 308 static int hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS); 309 static int hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS); 310 #endif 311 static int hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS); 312 static int hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS); 313 #if __FreeBSD_version < 1100095 314 static int hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS); 315 #else 316 static int hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS); 317 #endif 318 static int hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS); 319 static int hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS); 320 static int hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS); 321 static int hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS); 322 static int hn_caps_sysctl(SYSCTL_HANDLER_ARGS); 323 static int hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS); 324 static int hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS); 325 #ifndef RSS 326 static int hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS); 327 static int hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS); 328 #endif 329 static int hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS); 330 static int hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS); 331 static int hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS); 332 static int hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS); 333 static int hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS); 334 static int hn_polling_sysctl(SYSCTL_HANDLER_ARGS); 335 static int hn_vf_sysctl(SYSCTL_HANDLER_ARGS); 336 static int hn_rxvf_sysctl(SYSCTL_HANDLER_ARGS); 337 static int hn_vflist_sysctl(SYSCTL_HANDLER_ARGS); 338 static int hn_vfmap_sysctl(SYSCTL_HANDLER_ARGS); 339 static int hn_xpnt_vf_accbpf_sysctl(SYSCTL_HANDLER_ARGS); 340 static int hn_xpnt_vf_enabled_sysctl(SYSCTL_HANDLER_ARGS); 341 342 static void hn_stop(struct hn_softc *, bool); 343 static void hn_init_locked(struct hn_softc *); 344 static int hn_chan_attach(struct hn_softc *, 345 struct vmbus_channel *); 346 static void hn_chan_detach(struct hn_softc *, 347 struct vmbus_channel *); 348 static int hn_attach_subchans(struct hn_softc *); 349 static void hn_detach_allchans(struct hn_softc *); 350 static void hn_chan_rollup(struct hn_rx_ring *, 351 struct hn_tx_ring *); 352 static void hn_set_ring_inuse(struct hn_softc *, int); 353 static int hn_synth_attach(struct hn_softc *, int); 354 static void hn_synth_detach(struct hn_softc *); 355 static int hn_synth_alloc_subchans(struct hn_softc *, 356 int *); 357 static bool hn_synth_attachable(const struct hn_softc *); 358 static void hn_suspend(struct hn_softc *); 359 static void hn_suspend_data(struct hn_softc *); 360 static void hn_suspend_mgmt(struct hn_softc *); 361 static void hn_resume(struct hn_softc *); 362 static void hn_resume_data(struct hn_softc *); 363 static void hn_resume_mgmt(struct hn_softc *); 364 static void hn_suspend_mgmt_taskfunc(void *, int); 365 static void hn_chan_drain(struct hn_softc *, 366 struct vmbus_channel *); 367 static void hn_disable_rx(struct hn_softc *); 368 static void hn_drain_rxtx(struct hn_softc *, int); 369 static void hn_polling(struct hn_softc *, u_int); 370 static void hn_chan_polling(struct vmbus_channel *, u_int); 371 static void hn_mtu_change_fixup(struct hn_softc *); 372 373 static void hn_update_link_status(struct hn_softc *); 374 static void hn_change_network(struct hn_softc *); 375 static void hn_link_taskfunc(void *, int); 376 static void hn_netchg_init_taskfunc(void *, int); 377 static void hn_netchg_status_taskfunc(void *, int); 378 static void hn_link_status(struct hn_softc *); 379 380 static int hn_create_rx_data(struct hn_softc *, int); 381 static void hn_destroy_rx_data(struct hn_softc *); 382 static int hn_check_iplen(const struct mbuf *, int); 383 static int hn_set_rxfilter(struct hn_softc *, uint32_t); 384 static int hn_rxfilter_config(struct hn_softc *); 385 #ifndef RSS 386 static int hn_rss_reconfig(struct hn_softc *); 387 #endif 388 static void hn_rss_ind_fixup(struct hn_softc *); 389 static int hn_rxpkt(struct hn_rx_ring *, const void *, 390 int, const struct hn_rxinfo *); 391 392 static int hn_tx_ring_create(struct hn_softc *, int); 393 static void hn_tx_ring_destroy(struct hn_tx_ring *); 394 static int hn_create_tx_data(struct hn_softc *, int); 395 static void hn_fixup_tx_data(struct hn_softc *); 396 static void hn_destroy_tx_data(struct hn_softc *); 397 static void hn_txdesc_dmamap_destroy(struct hn_txdesc *); 398 static void hn_txdesc_gc(struct hn_tx_ring *, 399 struct hn_txdesc *); 400 static int hn_encap(struct ifnet *, struct hn_tx_ring *, 401 struct hn_txdesc *, struct mbuf **); 402 static int hn_txpkt(struct ifnet *, struct hn_tx_ring *, 403 struct hn_txdesc *); 404 static void hn_set_chim_size(struct hn_softc *, int); 405 static void hn_set_tso_maxsize(struct hn_softc *, int, int); 406 static bool hn_tx_ring_pending(struct hn_tx_ring *); 407 static void hn_tx_ring_qflush(struct hn_tx_ring *); 408 static void hn_resume_tx(struct hn_softc *, int); 409 static void hn_set_txagg(struct hn_softc *); 410 static void *hn_try_txagg(struct ifnet *, 411 struct hn_tx_ring *, struct hn_txdesc *, 412 int); 413 static int hn_get_txswq_depth(const struct hn_tx_ring *); 414 static void hn_txpkt_done(struct hn_nvs_sendctx *, 415 struct hn_softc *, struct vmbus_channel *, 416 const void *, int); 417 static int hn_txpkt_sglist(struct hn_tx_ring *, 418 struct hn_txdesc *); 419 static int hn_txpkt_chim(struct hn_tx_ring *, 420 struct hn_txdesc *); 421 static int hn_xmit(struct hn_tx_ring *, int); 422 static void hn_xmit_taskfunc(void *, int); 423 static void hn_xmit_txeof(struct hn_tx_ring *); 424 static void hn_xmit_txeof_taskfunc(void *, int); 425 #ifdef HN_IFSTART_SUPPORT 426 static int hn_start_locked(struct hn_tx_ring *, int); 427 static void hn_start_taskfunc(void *, int); 428 static void hn_start_txeof(struct hn_tx_ring *); 429 static void hn_start_txeof_taskfunc(void *, int); 430 #endif 431 432 SYSCTL_NODE(_hw, OID_AUTO, hn, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 433 "Hyper-V network interface"); 434 435 /* Trust tcp segements verification on host side. */ 436 static int hn_trust_hosttcp = 1; 437 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hosttcp, CTLFLAG_RDTUN, 438 &hn_trust_hosttcp, 0, 439 "Trust tcp segement verification on host side, " 440 "when csum info is missing (global setting)"); 441 442 /* Trust udp datagrams verification on host side. */ 443 static int hn_trust_hostudp = 1; 444 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostudp, CTLFLAG_RDTUN, 445 &hn_trust_hostudp, 0, 446 "Trust udp datagram verification on host side, " 447 "when csum info is missing (global setting)"); 448 449 /* Trust ip packets verification on host side. */ 450 static int hn_trust_hostip = 1; 451 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostip, CTLFLAG_RDTUN, 452 &hn_trust_hostip, 0, 453 "Trust ip packet verification on host side, " 454 "when csum info is missing (global setting)"); 455 456 /* Limit TSO burst size */ 457 static int hn_tso_maxlen = IP_MAXPACKET; 458 SYSCTL_INT(_hw_hn, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN, 459 &hn_tso_maxlen, 0, "TSO burst limit"); 460 461 /* Limit chimney send size */ 462 static int hn_tx_chimney_size = 0; 463 SYSCTL_INT(_hw_hn, OID_AUTO, tx_chimney_size, CTLFLAG_RDTUN, 464 &hn_tx_chimney_size, 0, "Chimney send packet size limit"); 465 466 /* Limit the size of packet for direct transmission */ 467 static int hn_direct_tx_size = HN_DIRECT_TX_SIZE_DEF; 468 SYSCTL_INT(_hw_hn, OID_AUTO, direct_tx_size, CTLFLAG_RDTUN, 469 &hn_direct_tx_size, 0, "Size of the packet for direct transmission"); 470 471 /* # of LRO entries per RX ring */ 472 #if defined(INET) || defined(INET6) 473 #if __FreeBSD_version >= 1100095 474 static int hn_lro_entry_count = HN_LROENT_CNT_DEF; 475 SYSCTL_INT(_hw_hn, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN, 476 &hn_lro_entry_count, 0, "LRO entry count"); 477 #endif 478 #endif 479 480 static int hn_tx_taskq_cnt = 1; 481 SYSCTL_INT(_hw_hn, OID_AUTO, tx_taskq_cnt, CTLFLAG_RDTUN, 482 &hn_tx_taskq_cnt, 0, "# of TX taskqueues"); 483 484 #define HN_TX_TASKQ_M_INDEP 0 485 #define HN_TX_TASKQ_M_GLOBAL 1 486 #define HN_TX_TASKQ_M_EVTTQ 2 487 488 static int hn_tx_taskq_mode = HN_TX_TASKQ_M_INDEP; 489 SYSCTL_INT(_hw_hn, OID_AUTO, tx_taskq_mode, CTLFLAG_RDTUN, 490 &hn_tx_taskq_mode, 0, "TX taskqueue modes: " 491 "0 - independent, 1 - share global tx taskqs, 2 - share event taskqs"); 492 493 #ifndef HN_USE_TXDESC_BUFRING 494 static int hn_use_txdesc_bufring = 0; 495 #else 496 static int hn_use_txdesc_bufring = 1; 497 #endif 498 SYSCTL_INT(_hw_hn, OID_AUTO, use_txdesc_bufring, CTLFLAG_RD, 499 &hn_use_txdesc_bufring, 0, "Use buf_ring for TX descriptors"); 500 501 #ifdef HN_IFSTART_SUPPORT 502 /* Use ifnet.if_start instead of ifnet.if_transmit */ 503 static int hn_use_if_start = 0; 504 SYSCTL_INT(_hw_hn, OID_AUTO, use_if_start, CTLFLAG_RDTUN, 505 &hn_use_if_start, 0, "Use if_start TX method"); 506 #endif 507 508 /* # of channels to use */ 509 static int hn_chan_cnt = 0; 510 SYSCTL_INT(_hw_hn, OID_AUTO, chan_cnt, CTLFLAG_RDTUN, 511 &hn_chan_cnt, 0, 512 "# of channels to use; each channel has one RX ring and one TX ring"); 513 514 /* # of transmit rings to use */ 515 static int hn_tx_ring_cnt = 0; 516 SYSCTL_INT(_hw_hn, OID_AUTO, tx_ring_cnt, CTLFLAG_RDTUN, 517 &hn_tx_ring_cnt, 0, "# of TX rings to use"); 518 519 /* Software TX ring deptch */ 520 static int hn_tx_swq_depth = 0; 521 SYSCTL_INT(_hw_hn, OID_AUTO, tx_swq_depth, CTLFLAG_RDTUN, 522 &hn_tx_swq_depth, 0, "Depth of IFQ or BUFRING"); 523 524 /* Enable sorted LRO, and the depth of the per-channel mbuf queue */ 525 #if __FreeBSD_version >= 1100095 526 static u_int hn_lro_mbufq_depth = 0; 527 SYSCTL_UINT(_hw_hn, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN, 528 &hn_lro_mbufq_depth, 0, "Depth of LRO mbuf queue"); 529 #endif 530 531 /* Packet transmission aggregation size limit */ 532 static int hn_tx_agg_size = -1; 533 SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_size, CTLFLAG_RDTUN, 534 &hn_tx_agg_size, 0, "Packet transmission aggregation size limit"); 535 536 /* Packet transmission aggregation count limit */ 537 static int hn_tx_agg_pkts = -1; 538 SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_pkts, CTLFLAG_RDTUN, 539 &hn_tx_agg_pkts, 0, "Packet transmission aggregation packet limit"); 540 541 /* VF list */ 542 SYSCTL_PROC(_hw_hn, OID_AUTO, vflist, CTLFLAG_RD | CTLTYPE_STRING, 543 0, 0, hn_vflist_sysctl, "A", "VF list"); 544 545 /* VF mapping */ 546 SYSCTL_PROC(_hw_hn, OID_AUTO, vfmap, CTLFLAG_RD | CTLTYPE_STRING, 547 0, 0, hn_vfmap_sysctl, "A", "VF mapping"); 548 549 /* Transparent VF */ 550 static int hn_xpnt_vf = 0; 551 SYSCTL_INT(_hw_hn, OID_AUTO, vf_transparent, CTLFLAG_RDTUN, 552 &hn_xpnt_vf, 0, "Transparent VF mod"); 553 554 /* Accurate BPF support for Transparent VF */ 555 static int hn_xpnt_vf_accbpf = 0; 556 SYSCTL_INT(_hw_hn, OID_AUTO, vf_xpnt_accbpf, CTLFLAG_RDTUN, 557 &hn_xpnt_vf_accbpf, 0, "Accurate BPF for transparent VF"); 558 559 /* Extra wait for transparent VF attach routing; unit seconds. */ 560 static int hn_xpnt_vf_attwait = HN_XPNT_VF_ATTWAIT_MIN; 561 SYSCTL_INT(_hw_hn, OID_AUTO, vf_xpnt_attwait, CTLFLAG_RWTUN, 562 &hn_xpnt_vf_attwait, 0, 563 "Extra wait for transparent VF attach routing; unit: seconds"); 564 565 static u_int hn_cpu_index; /* next CPU for channel */ 566 static struct taskqueue **hn_tx_taskque;/* shared TX taskqueues */ 567 568 static struct rmlock hn_vfmap_lock; 569 static int hn_vfmap_size; 570 static struct ifnet **hn_vfmap; 571 572 #ifndef RSS 573 static const uint8_t 574 hn_rss_key_default[NDIS_HASH_KEYSIZE_TOEPLITZ] = { 575 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 576 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 577 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, 578 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, 579 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa 580 }; 581 #endif /* !RSS */ 582 583 static const struct hyperv_guid hn_guid = { 584 .hv_guid = { 585 0x63, 0x51, 0x61, 0xf8, 0x3e, 0xdf, 0xc5, 0x46, 586 0x91, 0x3f, 0xf2, 0xd2, 0xf9, 0x65, 0xed, 0x0e } 587 }; 588 589 static device_method_t hn_methods[] = { 590 /* Device interface */ 591 DEVMETHOD(device_probe, hn_probe), 592 DEVMETHOD(device_attach, hn_attach), 593 DEVMETHOD(device_detach, hn_detach), 594 DEVMETHOD(device_shutdown, hn_shutdown), 595 DEVMETHOD_END 596 }; 597 598 static driver_t hn_driver = { 599 "hn", 600 hn_methods, 601 sizeof(struct hn_softc) 602 }; 603 604 static devclass_t hn_devclass; 605 606 DRIVER_MODULE(hn, vmbus, hn_driver, hn_devclass, 0, 0); 607 MODULE_VERSION(hn, 1); 608 MODULE_DEPEND(hn, vmbus, 1, 1, 1); 609 610 #if __FreeBSD_version >= 1100099 611 static void 612 hn_set_lro_lenlim(struct hn_softc *sc, int lenlim) 613 { 614 int i; 615 616 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 617 sc->hn_rx_ring[i].hn_lro.lro_length_lim = lenlim; 618 } 619 #endif 620 621 static int 622 hn_txpkt_sglist(struct hn_tx_ring *txr, struct hn_txdesc *txd) 623 { 624 625 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID && 626 txd->chim_size == 0, ("invalid rndis sglist txd")); 627 return (hn_nvs_send_rndis_sglist(txr->hn_chan, HN_NVS_RNDIS_MTYPE_DATA, 628 &txd->send_ctx, txr->hn_gpa, txr->hn_gpa_cnt)); 629 } 630 631 static int 632 hn_txpkt_chim(struct hn_tx_ring *txr, struct hn_txdesc *txd) 633 { 634 struct hn_nvs_rndis rndis; 635 636 KASSERT(txd->chim_index != HN_NVS_CHIM_IDX_INVALID && 637 txd->chim_size > 0, ("invalid rndis chim txd")); 638 639 rndis.nvs_type = HN_NVS_TYPE_RNDIS; 640 rndis.nvs_rndis_mtype = HN_NVS_RNDIS_MTYPE_DATA; 641 rndis.nvs_chim_idx = txd->chim_index; 642 rndis.nvs_chim_sz = txd->chim_size; 643 644 return (hn_nvs_send(txr->hn_chan, VMBUS_CHANPKT_FLAG_RC, 645 &rndis, sizeof(rndis), &txd->send_ctx)); 646 } 647 648 static __inline uint32_t 649 hn_chim_alloc(struct hn_softc *sc) 650 { 651 int i, bmap_cnt = sc->hn_chim_bmap_cnt; 652 u_long *bmap = sc->hn_chim_bmap; 653 uint32_t ret = HN_NVS_CHIM_IDX_INVALID; 654 655 for (i = 0; i < bmap_cnt; ++i) { 656 int idx; 657 658 idx = ffsl(~bmap[i]); 659 if (idx == 0) 660 continue; 661 662 --idx; /* ffsl is 1-based */ 663 KASSERT(i * LONG_BIT + idx < sc->hn_chim_cnt, 664 ("invalid i %d and idx %d", i, idx)); 665 666 if (atomic_testandset_long(&bmap[i], idx)) 667 continue; 668 669 ret = i * LONG_BIT + idx; 670 break; 671 } 672 return (ret); 673 } 674 675 static __inline void 676 hn_chim_free(struct hn_softc *sc, uint32_t chim_idx) 677 { 678 u_long mask; 679 uint32_t idx; 680 681 idx = chim_idx / LONG_BIT; 682 KASSERT(idx < sc->hn_chim_bmap_cnt, 683 ("invalid chimney index 0x%x", chim_idx)); 684 685 mask = 1UL << (chim_idx % LONG_BIT); 686 KASSERT(sc->hn_chim_bmap[idx] & mask, 687 ("index bitmap 0x%lx, chimney index %u, " 688 "bitmap idx %d, bitmask 0x%lx", 689 sc->hn_chim_bmap[idx], chim_idx, idx, mask)); 690 691 atomic_clear_long(&sc->hn_chim_bmap[idx], mask); 692 } 693 694 #if defined(INET6) || defined(INET) 695 696 #define PULLUP_HDR(m, len) \ 697 do { \ 698 if (__predict_false((m)->m_len < (len))) { \ 699 (m) = m_pullup((m), (len)); \ 700 if ((m) == NULL) \ 701 return (NULL); \ 702 } \ 703 } while (0) 704 705 /* 706 * NOTE: If this function failed, the m_head would be freed. 707 */ 708 static __inline struct mbuf * 709 hn_tso_fixup(struct mbuf *m_head) 710 { 711 struct ether_vlan_header *evl; 712 struct tcphdr *th; 713 int ehlen; 714 715 KASSERT(M_WRITABLE(m_head), ("TSO mbuf not writable")); 716 717 PULLUP_HDR(m_head, sizeof(*evl)); 718 evl = mtod(m_head, struct ether_vlan_header *); 719 if (evl->evl_encap_proto == ntohs(ETHERTYPE_VLAN)) 720 ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 721 else 722 ehlen = ETHER_HDR_LEN; 723 724 #ifdef INET 725 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) { 726 struct ip *ip; 727 int iphlen; 728 729 PULLUP_HDR(m_head, ehlen + sizeof(*ip)); 730 ip = mtodo(m_head, ehlen); 731 iphlen = ip->ip_hl << 2; 732 733 PULLUP_HDR(m_head, ehlen + iphlen + sizeof(*th)); 734 th = mtodo(m_head, ehlen + iphlen); 735 736 ip->ip_len = 0; 737 ip->ip_sum = 0; 738 th->th_sum = in_pseudo(ip->ip_src.s_addr, 739 ip->ip_dst.s_addr, htons(IPPROTO_TCP)); 740 } 741 #endif 742 #if defined(INET6) && defined(INET) 743 else 744 #endif 745 #ifdef INET6 746 { 747 struct ip6_hdr *ip6; 748 749 PULLUP_HDR(m_head, ehlen + sizeof(*ip6)); 750 ip6 = mtodo(m_head, ehlen); 751 if (ip6->ip6_nxt != IPPROTO_TCP) { 752 m_freem(m_head); 753 return (NULL); 754 } 755 756 PULLUP_HDR(m_head, ehlen + sizeof(*ip6) + sizeof(*th)); 757 th = mtodo(m_head, ehlen + sizeof(*ip6)); 758 759 ip6->ip6_plen = 0; 760 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); 761 } 762 #endif 763 return (m_head); 764 765 } 766 767 /* 768 * NOTE: If this function failed, the m_head would be freed. 769 */ 770 static __inline struct mbuf * 771 hn_check_tcpsyn(struct mbuf *m_head, int *tcpsyn) 772 { 773 const struct ether_vlan_header *evl; 774 const struct tcphdr *th; 775 int ehlen; 776 777 *tcpsyn = 0; 778 779 PULLUP_HDR(m_head, sizeof(*evl)); 780 evl = mtod(m_head, const struct ether_vlan_header *); 781 if (evl->evl_encap_proto == ntohs(ETHERTYPE_VLAN)) 782 ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 783 else 784 ehlen = ETHER_HDR_LEN; 785 786 #ifdef INET 787 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TCP) { 788 const struct ip *ip; 789 int iphlen; 790 791 PULLUP_HDR(m_head, ehlen + sizeof(*ip)); 792 ip = mtodo(m_head, ehlen); 793 iphlen = ip->ip_hl << 2; 794 795 PULLUP_HDR(m_head, ehlen + iphlen + sizeof(*th)); 796 th = mtodo(m_head, ehlen + iphlen); 797 if (th->th_flags & TH_SYN) 798 *tcpsyn = 1; 799 } 800 #endif 801 #if defined(INET6) && defined(INET) 802 else 803 #endif 804 #ifdef INET6 805 { 806 const struct ip6_hdr *ip6; 807 808 PULLUP_HDR(m_head, ehlen + sizeof(*ip6)); 809 ip6 = mtodo(m_head, ehlen); 810 if (ip6->ip6_nxt != IPPROTO_TCP) 811 return (m_head); 812 813 PULLUP_HDR(m_head, ehlen + sizeof(*ip6) + sizeof(*th)); 814 th = mtodo(m_head, ehlen + sizeof(*ip6)); 815 if (th->th_flags & TH_SYN) 816 *tcpsyn = 1; 817 } 818 #endif 819 return (m_head); 820 } 821 822 #undef PULLUP_HDR 823 824 #endif /* INET6 || INET */ 825 826 static int 827 hn_set_rxfilter(struct hn_softc *sc, uint32_t filter) 828 { 829 int error = 0; 830 831 HN_LOCK_ASSERT(sc); 832 833 if (sc->hn_rx_filter != filter) { 834 error = hn_rndis_set_rxfilter(sc, filter); 835 if (!error) 836 sc->hn_rx_filter = filter; 837 } 838 return (error); 839 } 840 841 static int 842 hn_rxfilter_config(struct hn_softc *sc) 843 { 844 struct ifnet *ifp = sc->hn_ifp; 845 uint32_t filter; 846 847 HN_LOCK_ASSERT(sc); 848 849 /* 850 * If the non-transparent mode VF is activated, we don't know how 851 * its RX filter is configured, so stick the synthetic device in 852 * the promiscous mode. 853 */ 854 if ((ifp->if_flags & IFF_PROMISC) || (sc->hn_flags & HN_FLAG_RXVF)) { 855 filter = NDIS_PACKET_TYPE_PROMISCUOUS; 856 } else { 857 filter = NDIS_PACKET_TYPE_DIRECTED; 858 if (ifp->if_flags & IFF_BROADCAST) 859 filter |= NDIS_PACKET_TYPE_BROADCAST; 860 /* TODO: support multicast list */ 861 if ((ifp->if_flags & IFF_ALLMULTI) || 862 !TAILQ_EMPTY(&ifp->if_multiaddrs)) 863 filter |= NDIS_PACKET_TYPE_ALL_MULTICAST; 864 } 865 return (hn_set_rxfilter(sc, filter)); 866 } 867 868 static void 869 hn_set_txagg(struct hn_softc *sc) 870 { 871 uint32_t size, pkts; 872 int i; 873 874 /* 875 * Setup aggregation size. 876 */ 877 if (sc->hn_agg_size < 0) 878 size = UINT32_MAX; 879 else 880 size = sc->hn_agg_size; 881 882 if (sc->hn_rndis_agg_size < size) 883 size = sc->hn_rndis_agg_size; 884 885 /* NOTE: We only aggregate packets using chimney sending buffers. */ 886 if (size > (uint32_t)sc->hn_chim_szmax) 887 size = sc->hn_chim_szmax; 888 889 if (size <= 2 * HN_PKTSIZE_MIN(sc->hn_rndis_agg_align)) { 890 /* Disable */ 891 size = 0; 892 pkts = 0; 893 goto done; 894 } 895 896 /* NOTE: Type of the per TX ring setting is 'int'. */ 897 if (size > INT_MAX) 898 size = INT_MAX; 899 900 /* 901 * Setup aggregation packet count. 902 */ 903 if (sc->hn_agg_pkts < 0) 904 pkts = UINT32_MAX; 905 else 906 pkts = sc->hn_agg_pkts; 907 908 if (sc->hn_rndis_agg_pkts < pkts) 909 pkts = sc->hn_rndis_agg_pkts; 910 911 if (pkts <= 1) { 912 /* Disable */ 913 size = 0; 914 pkts = 0; 915 goto done; 916 } 917 918 /* NOTE: Type of the per TX ring setting is 'short'. */ 919 if (pkts > SHRT_MAX) 920 pkts = SHRT_MAX; 921 922 done: 923 /* NOTE: Type of the per TX ring setting is 'short'. */ 924 if (sc->hn_rndis_agg_align > SHRT_MAX) { 925 /* Disable */ 926 size = 0; 927 pkts = 0; 928 } 929 930 if (bootverbose) { 931 if_printf(sc->hn_ifp, "TX agg size %u, pkts %u, align %u\n", 932 size, pkts, sc->hn_rndis_agg_align); 933 } 934 935 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 936 struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; 937 938 mtx_lock(&txr->hn_tx_lock); 939 txr->hn_agg_szmax = size; 940 txr->hn_agg_pktmax = pkts; 941 txr->hn_agg_align = sc->hn_rndis_agg_align; 942 mtx_unlock(&txr->hn_tx_lock); 943 } 944 } 945 946 static int 947 hn_get_txswq_depth(const struct hn_tx_ring *txr) 948 { 949 950 KASSERT(txr->hn_txdesc_cnt > 0, ("tx ring is not setup yet")); 951 if (hn_tx_swq_depth < txr->hn_txdesc_cnt) 952 return txr->hn_txdesc_cnt; 953 return hn_tx_swq_depth; 954 } 955 956 #ifndef RSS 957 static int 958 hn_rss_reconfig(struct hn_softc *sc) 959 { 960 int error; 961 962 HN_LOCK_ASSERT(sc); 963 964 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) 965 return (ENXIO); 966 967 /* 968 * Disable RSS first. 969 * 970 * NOTE: 971 * Direct reconfiguration by setting the UNCHG flags does 972 * _not_ work properly. 973 */ 974 if (bootverbose) 975 if_printf(sc->hn_ifp, "disable RSS\n"); 976 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_DISABLE); 977 if (error) { 978 if_printf(sc->hn_ifp, "RSS disable failed\n"); 979 return (error); 980 } 981 982 /* 983 * Reenable the RSS w/ the updated RSS key or indirect 984 * table. 985 */ 986 if (bootverbose) 987 if_printf(sc->hn_ifp, "reconfig RSS\n"); 988 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE); 989 if (error) { 990 if_printf(sc->hn_ifp, "RSS reconfig failed\n"); 991 return (error); 992 } 993 return (0); 994 } 995 #endif /* !RSS */ 996 997 static void 998 hn_rss_ind_fixup(struct hn_softc *sc) 999 { 1000 struct ndis_rssprm_toeplitz *rss = &sc->hn_rss; 1001 int i, nchan; 1002 1003 nchan = sc->hn_rx_ring_inuse; 1004 KASSERT(nchan > 1, ("invalid # of channels %d", nchan)); 1005 1006 /* 1007 * Check indirect table to make sure that all channels in it 1008 * can be used. 1009 */ 1010 for (i = 0; i < NDIS_HASH_INDCNT; ++i) { 1011 if (rss->rss_ind[i] >= nchan) { 1012 if_printf(sc->hn_ifp, 1013 "RSS indirect table %d fixup: %u -> %d\n", 1014 i, rss->rss_ind[i], nchan - 1); 1015 rss->rss_ind[i] = nchan - 1; 1016 } 1017 } 1018 } 1019 1020 static int 1021 hn_ifmedia_upd(struct ifnet *ifp __unused) 1022 { 1023 1024 return EOPNOTSUPP; 1025 } 1026 1027 static void 1028 hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) 1029 { 1030 struct hn_softc *sc = ifp->if_softc; 1031 1032 ifmr->ifm_status = IFM_AVALID; 1033 ifmr->ifm_active = IFM_ETHER; 1034 1035 if ((sc->hn_link_flags & HN_LINK_FLAG_LINKUP) == 0) { 1036 ifmr->ifm_active |= IFM_NONE; 1037 return; 1038 } 1039 ifmr->ifm_status |= IFM_ACTIVE; 1040 ifmr->ifm_active |= IFM_10G_T | IFM_FDX; 1041 } 1042 1043 static void 1044 hn_rxvf_set_task(void *xarg, int pending __unused) 1045 { 1046 struct hn_rxvf_setarg *arg = xarg; 1047 1048 arg->rxr->hn_rxvf_ifp = arg->vf_ifp; 1049 } 1050 1051 static void 1052 hn_rxvf_set(struct hn_softc *sc, struct ifnet *vf_ifp) 1053 { 1054 struct hn_rx_ring *rxr; 1055 struct hn_rxvf_setarg arg; 1056 struct task task; 1057 int i; 1058 1059 HN_LOCK_ASSERT(sc); 1060 1061 TASK_INIT(&task, 0, hn_rxvf_set_task, &arg); 1062 1063 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 1064 rxr = &sc->hn_rx_ring[i]; 1065 1066 if (i < sc->hn_rx_ring_inuse) { 1067 arg.rxr = rxr; 1068 arg.vf_ifp = vf_ifp; 1069 vmbus_chan_run_task(rxr->hn_chan, &task); 1070 } else { 1071 rxr->hn_rxvf_ifp = vf_ifp; 1072 } 1073 } 1074 } 1075 1076 static bool 1077 hn_ismyvf(const struct hn_softc *sc, const struct ifnet *ifp) 1078 { 1079 const struct ifnet *hn_ifp; 1080 1081 hn_ifp = sc->hn_ifp; 1082 1083 if (ifp == hn_ifp) 1084 return (false); 1085 1086 if (ifp->if_alloctype != IFT_ETHER) 1087 return (false); 1088 1089 /* Ignore lagg/vlan interfaces */ 1090 if (strcmp(ifp->if_dname, "lagg") == 0 || 1091 strcmp(ifp->if_dname, "vlan") == 0) 1092 return (false); 1093 1094 if (bcmp(IF_LLADDR(ifp), IF_LLADDR(hn_ifp), ETHER_ADDR_LEN) != 0) 1095 return (false); 1096 1097 return (true); 1098 } 1099 1100 static void 1101 hn_rxvf_change(struct hn_softc *sc, struct ifnet *ifp, bool rxvf) 1102 { 1103 struct ifnet *hn_ifp; 1104 1105 HN_LOCK(sc); 1106 1107 if (!(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED)) 1108 goto out; 1109 1110 if (!hn_ismyvf(sc, ifp)) 1111 goto out; 1112 hn_ifp = sc->hn_ifp; 1113 1114 if (rxvf) { 1115 if (sc->hn_flags & HN_FLAG_RXVF) 1116 goto out; 1117 1118 sc->hn_flags |= HN_FLAG_RXVF; 1119 hn_rxfilter_config(sc); 1120 } else { 1121 if (!(sc->hn_flags & HN_FLAG_RXVF)) 1122 goto out; 1123 1124 sc->hn_flags &= ~HN_FLAG_RXVF; 1125 if (hn_ifp->if_drv_flags & IFF_DRV_RUNNING) 1126 hn_rxfilter_config(sc); 1127 else 1128 hn_set_rxfilter(sc, NDIS_PACKET_TYPE_NONE); 1129 } 1130 1131 hn_nvs_set_datapath(sc, 1132 rxvf ? HN_NVS_DATAPATH_VF : HN_NVS_DATAPATH_SYNTH); 1133 1134 hn_rxvf_set(sc, rxvf ? ifp : NULL); 1135 1136 if (rxvf) { 1137 hn_suspend_mgmt(sc); 1138 sc->hn_link_flags &= 1139 ~(HN_LINK_FLAG_LINKUP | HN_LINK_FLAG_NETCHG); 1140 if_link_state_change(hn_ifp, LINK_STATE_DOWN); 1141 } else { 1142 hn_resume_mgmt(sc); 1143 } 1144 1145 devctl_notify("HYPERV_NIC_VF", hn_ifp->if_xname, 1146 rxvf ? "VF_UP" : "VF_DOWN", NULL); 1147 1148 if (bootverbose) { 1149 if_printf(hn_ifp, "datapath is switched %s %s\n", 1150 rxvf ? "to" : "from", ifp->if_xname); 1151 } 1152 out: 1153 HN_UNLOCK(sc); 1154 } 1155 1156 static void 1157 hn_ifnet_event(void *arg, struct ifnet *ifp, int event) 1158 { 1159 1160 if (event != IFNET_EVENT_UP && event != IFNET_EVENT_DOWN) 1161 return; 1162 hn_rxvf_change(arg, ifp, event == IFNET_EVENT_UP); 1163 } 1164 1165 static void 1166 hn_ifaddr_event(void *arg, struct ifnet *ifp) 1167 { 1168 1169 hn_rxvf_change(arg, ifp, ifp->if_flags & IFF_UP); 1170 } 1171 1172 static int 1173 hn_xpnt_vf_iocsetcaps(struct hn_softc *sc, struct ifreq *ifr) 1174 { 1175 struct ifnet *ifp, *vf_ifp; 1176 uint64_t tmp; 1177 int error; 1178 1179 HN_LOCK_ASSERT(sc); 1180 ifp = sc->hn_ifp; 1181 vf_ifp = sc->hn_vf_ifp; 1182 1183 /* 1184 * Fix up requested capabilities w/ supported capabilities, 1185 * since the supported capabilities could have been changed. 1186 */ 1187 ifr->ifr_reqcap &= ifp->if_capabilities; 1188 /* Pass SIOCSIFCAP to VF. */ 1189 error = vf_ifp->if_ioctl(vf_ifp, SIOCSIFCAP, (caddr_t)ifr); 1190 1191 /* 1192 * NOTE: 1193 * The error will be propagated to the callers, however, it 1194 * is _not_ useful here. 1195 */ 1196 1197 /* 1198 * Merge VF's enabled capabilities. 1199 */ 1200 ifp->if_capenable = vf_ifp->if_capenable & ifp->if_capabilities; 1201 1202 tmp = vf_ifp->if_hwassist & HN_CSUM_IP_HWASSIST(sc); 1203 if (ifp->if_capenable & IFCAP_TXCSUM) 1204 ifp->if_hwassist |= tmp; 1205 else 1206 ifp->if_hwassist &= ~tmp; 1207 1208 tmp = vf_ifp->if_hwassist & HN_CSUM_IP6_HWASSIST(sc); 1209 if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) 1210 ifp->if_hwassist |= tmp; 1211 else 1212 ifp->if_hwassist &= ~tmp; 1213 1214 tmp = vf_ifp->if_hwassist & CSUM_IP_TSO; 1215 if (ifp->if_capenable & IFCAP_TSO4) 1216 ifp->if_hwassist |= tmp; 1217 else 1218 ifp->if_hwassist &= ~tmp; 1219 1220 tmp = vf_ifp->if_hwassist & CSUM_IP6_TSO; 1221 if (ifp->if_capenable & IFCAP_TSO6) 1222 ifp->if_hwassist |= tmp; 1223 else 1224 ifp->if_hwassist &= ~tmp; 1225 1226 return (error); 1227 } 1228 1229 static int 1230 hn_xpnt_vf_iocsetflags(struct hn_softc *sc) 1231 { 1232 struct ifnet *vf_ifp; 1233 struct ifreq ifr; 1234 1235 HN_LOCK_ASSERT(sc); 1236 vf_ifp = sc->hn_vf_ifp; 1237 1238 memset(&ifr, 0, sizeof(ifr)); 1239 strlcpy(ifr.ifr_name, vf_ifp->if_xname, sizeof(ifr.ifr_name)); 1240 ifr.ifr_flags = vf_ifp->if_flags & 0xffff; 1241 ifr.ifr_flagshigh = vf_ifp->if_flags >> 16; 1242 return (vf_ifp->if_ioctl(vf_ifp, SIOCSIFFLAGS, (caddr_t)&ifr)); 1243 } 1244 1245 static void 1246 hn_xpnt_vf_saveifflags(struct hn_softc *sc) 1247 { 1248 struct ifnet *ifp = sc->hn_ifp; 1249 int allmulti = 0; 1250 1251 HN_LOCK_ASSERT(sc); 1252 1253 /* XXX vlan(4) style mcast addr maintenance */ 1254 if (!TAILQ_EMPTY(&ifp->if_multiaddrs)) 1255 allmulti = IFF_ALLMULTI; 1256 1257 /* Always set the VF's if_flags */ 1258 sc->hn_vf_ifp->if_flags = ifp->if_flags | allmulti; 1259 } 1260 1261 static void 1262 hn_xpnt_vf_input(struct ifnet *vf_ifp, struct mbuf *m) 1263 { 1264 struct rm_priotracker pt; 1265 struct ifnet *hn_ifp = NULL; 1266 struct mbuf *mn; 1267 1268 /* 1269 * XXX racy, if hn(4) ever detached. 1270 */ 1271 rm_rlock(&hn_vfmap_lock, &pt); 1272 if (vf_ifp->if_index < hn_vfmap_size) 1273 hn_ifp = hn_vfmap[vf_ifp->if_index]; 1274 rm_runlock(&hn_vfmap_lock, &pt); 1275 1276 if (hn_ifp != NULL) { 1277 for (mn = m; mn != NULL; mn = mn->m_nextpkt) { 1278 /* 1279 * Allow tapping on the VF. 1280 */ 1281 ETHER_BPF_MTAP(vf_ifp, mn); 1282 1283 /* 1284 * Update VF stats. 1285 */ 1286 if ((vf_ifp->if_capenable & IFCAP_HWSTATS) == 0) { 1287 if_inc_counter(vf_ifp, IFCOUNTER_IBYTES, 1288 mn->m_pkthdr.len); 1289 } 1290 /* 1291 * XXX IFCOUNTER_IMCAST 1292 * This stat updating is kinda invasive, since it 1293 * requires two checks on the mbuf: the length check 1294 * and the ethernet header check. As of this write, 1295 * all multicast packets go directly to hn(4), which 1296 * makes imcast stat updating in the VF a try in vian. 1297 */ 1298 1299 /* 1300 * Fix up rcvif and increase hn(4)'s ipackets. 1301 */ 1302 mn->m_pkthdr.rcvif = hn_ifp; 1303 if_inc_counter(hn_ifp, IFCOUNTER_IPACKETS, 1); 1304 } 1305 /* 1306 * Go through hn(4)'s if_input. 1307 */ 1308 hn_ifp->if_input(hn_ifp, m); 1309 } else { 1310 /* 1311 * In the middle of the transition; free this 1312 * mbuf chain. 1313 */ 1314 while (m != NULL) { 1315 mn = m->m_nextpkt; 1316 m->m_nextpkt = NULL; 1317 m_freem(m); 1318 m = mn; 1319 } 1320 } 1321 } 1322 1323 static void 1324 hn_mtu_change_fixup(struct hn_softc *sc) 1325 { 1326 struct ifnet *ifp; 1327 1328 HN_LOCK_ASSERT(sc); 1329 ifp = sc->hn_ifp; 1330 1331 hn_set_tso_maxsize(sc, hn_tso_maxlen, ifp->if_mtu); 1332 #if __FreeBSD_version >= 1100099 1333 if (sc->hn_rx_ring[0].hn_lro.lro_length_lim < HN_LRO_LENLIM_MIN(ifp)) 1334 hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MIN(ifp)); 1335 #endif 1336 } 1337 1338 static void 1339 hn_xpnt_vf_setready(struct hn_softc *sc) 1340 { 1341 struct ifnet *ifp, *vf_ifp; 1342 struct ifreq ifr; 1343 1344 HN_LOCK_ASSERT(sc); 1345 ifp = sc->hn_ifp; 1346 vf_ifp = sc->hn_vf_ifp; 1347 1348 /* 1349 * Mark the VF ready. 1350 */ 1351 sc->hn_vf_rdytick = 0; 1352 1353 /* 1354 * Save information for restoration. 1355 */ 1356 sc->hn_saved_caps = ifp->if_capabilities; 1357 sc->hn_saved_tsomax = ifp->if_hw_tsomax; 1358 sc->hn_saved_tsosegcnt = ifp->if_hw_tsomaxsegcount; 1359 sc->hn_saved_tsosegsz = ifp->if_hw_tsomaxsegsize; 1360 1361 /* 1362 * Intersect supported/enabled capabilities. 1363 * 1364 * NOTE: 1365 * if_hwassist is not changed here. 1366 */ 1367 ifp->if_capabilities &= vf_ifp->if_capabilities; 1368 ifp->if_capenable &= ifp->if_capabilities; 1369 1370 /* 1371 * Fix TSO settings. 1372 */ 1373 if (ifp->if_hw_tsomax > vf_ifp->if_hw_tsomax) 1374 ifp->if_hw_tsomax = vf_ifp->if_hw_tsomax; 1375 if (ifp->if_hw_tsomaxsegcount > vf_ifp->if_hw_tsomaxsegcount) 1376 ifp->if_hw_tsomaxsegcount = vf_ifp->if_hw_tsomaxsegcount; 1377 if (ifp->if_hw_tsomaxsegsize > vf_ifp->if_hw_tsomaxsegsize) 1378 ifp->if_hw_tsomaxsegsize = vf_ifp->if_hw_tsomaxsegsize; 1379 1380 /* 1381 * Change VF's enabled capabilities. 1382 */ 1383 memset(&ifr, 0, sizeof(ifr)); 1384 strlcpy(ifr.ifr_name, vf_ifp->if_xname, sizeof(ifr.ifr_name)); 1385 ifr.ifr_reqcap = ifp->if_capenable; 1386 hn_xpnt_vf_iocsetcaps(sc, &ifr); 1387 1388 if (ifp->if_mtu != ETHERMTU) { 1389 int error; 1390 1391 /* 1392 * Change VF's MTU. 1393 */ 1394 memset(&ifr, 0, sizeof(ifr)); 1395 strlcpy(ifr.ifr_name, vf_ifp->if_xname, sizeof(ifr.ifr_name)); 1396 ifr.ifr_mtu = ifp->if_mtu; 1397 error = vf_ifp->if_ioctl(vf_ifp, SIOCSIFMTU, (caddr_t)&ifr); 1398 if (error) { 1399 if_printf(ifp, "%s SIOCSIFMTU %u failed\n", 1400 vf_ifp->if_xname, ifp->if_mtu); 1401 if (ifp->if_mtu > ETHERMTU) { 1402 if_printf(ifp, "change MTU to %d\n", ETHERMTU); 1403 1404 /* 1405 * XXX 1406 * No need to adjust the synthetic parts' MTU; 1407 * failure of the adjustment will cause us 1408 * infinite headache. 1409 */ 1410 ifp->if_mtu = ETHERMTU; 1411 hn_mtu_change_fixup(sc); 1412 } 1413 } 1414 } 1415 } 1416 1417 static bool 1418 hn_xpnt_vf_isready(struct hn_softc *sc) 1419 { 1420 1421 HN_LOCK_ASSERT(sc); 1422 1423 if (!hn_xpnt_vf || sc->hn_vf_ifp == NULL) 1424 return (false); 1425 1426 if (sc->hn_vf_rdytick == 0) 1427 return (true); 1428 1429 if (sc->hn_vf_rdytick > ticks) 1430 return (false); 1431 1432 /* Mark VF as ready. */ 1433 hn_xpnt_vf_setready(sc); 1434 return (true); 1435 } 1436 1437 static void 1438 hn_xpnt_vf_setenable(struct hn_softc *sc) 1439 { 1440 int i; 1441 1442 HN_LOCK_ASSERT(sc); 1443 1444 /* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */ 1445 rm_wlock(&sc->hn_vf_lock); 1446 sc->hn_xvf_flags |= HN_XVFFLAG_ENABLED; 1447 rm_wunlock(&sc->hn_vf_lock); 1448 1449 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 1450 sc->hn_rx_ring[i].hn_rx_flags |= HN_RX_FLAG_XPNT_VF; 1451 } 1452 1453 static void 1454 hn_xpnt_vf_setdisable(struct hn_softc *sc, bool clear_vf) 1455 { 1456 int i; 1457 1458 HN_LOCK_ASSERT(sc); 1459 1460 /* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */ 1461 rm_wlock(&sc->hn_vf_lock); 1462 sc->hn_xvf_flags &= ~HN_XVFFLAG_ENABLED; 1463 if (clear_vf) 1464 sc->hn_vf_ifp = NULL; 1465 rm_wunlock(&sc->hn_vf_lock); 1466 1467 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 1468 sc->hn_rx_ring[i].hn_rx_flags &= ~HN_RX_FLAG_XPNT_VF; 1469 } 1470 1471 static void 1472 hn_xpnt_vf_init(struct hn_softc *sc) 1473 { 1474 int error; 1475 1476 HN_LOCK_ASSERT(sc); 1477 1478 KASSERT((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) == 0, 1479 ("%s: transparent VF was enabled", sc->hn_ifp->if_xname)); 1480 1481 if (bootverbose) { 1482 if_printf(sc->hn_ifp, "try bringing up %s\n", 1483 sc->hn_vf_ifp->if_xname); 1484 } 1485 1486 /* 1487 * Bring the VF up. 1488 */ 1489 hn_xpnt_vf_saveifflags(sc); 1490 sc->hn_vf_ifp->if_flags |= IFF_UP; 1491 error = hn_xpnt_vf_iocsetflags(sc); 1492 if (error) { 1493 if_printf(sc->hn_ifp, "bringing up %s failed: %d\n", 1494 sc->hn_vf_ifp->if_xname, error); 1495 return; 1496 } 1497 1498 /* 1499 * NOTE: 1500 * Datapath setting must happen _after_ bringing the VF up. 1501 */ 1502 hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_VF); 1503 1504 /* Mark transparent mode VF as enabled. */ 1505 hn_xpnt_vf_setenable(sc); 1506 } 1507 1508 static void 1509 hn_xpnt_vf_init_taskfunc(void *xsc, int pending __unused) 1510 { 1511 struct hn_softc *sc = xsc; 1512 1513 HN_LOCK(sc); 1514 1515 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) 1516 goto done; 1517 if (sc->hn_vf_ifp == NULL) 1518 goto done; 1519 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) 1520 goto done; 1521 1522 if (sc->hn_vf_rdytick != 0) { 1523 /* Mark VF as ready. */ 1524 hn_xpnt_vf_setready(sc); 1525 } 1526 1527 if (sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) { 1528 /* 1529 * Delayed VF initialization. 1530 */ 1531 if (bootverbose) { 1532 if_printf(sc->hn_ifp, "delayed initialize %s\n", 1533 sc->hn_vf_ifp->if_xname); 1534 } 1535 hn_xpnt_vf_init(sc); 1536 } 1537 done: 1538 HN_UNLOCK(sc); 1539 } 1540 1541 static void 1542 hn_ifnet_attevent(void *xsc, struct ifnet *ifp) 1543 { 1544 struct hn_softc *sc = xsc; 1545 1546 HN_LOCK(sc); 1547 1548 if (!(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED)) 1549 goto done; 1550 1551 if (!hn_ismyvf(sc, ifp)) 1552 goto done; 1553 1554 if (sc->hn_vf_ifp != NULL) { 1555 if_printf(sc->hn_ifp, "%s was attached as VF\n", 1556 sc->hn_vf_ifp->if_xname); 1557 goto done; 1558 } 1559 1560 if (hn_xpnt_vf && ifp->if_start != NULL) { 1561 /* 1562 * ifnet.if_start is _not_ supported by transparent 1563 * mode VF; mainly due to the IFF_DRV_OACTIVE flag. 1564 */ 1565 if_printf(sc->hn_ifp, "%s uses if_start, which is unsupported " 1566 "in transparent VF mode.\n", ifp->if_xname); 1567 goto done; 1568 } 1569 1570 rm_wlock(&hn_vfmap_lock); 1571 1572 if (ifp->if_index >= hn_vfmap_size) { 1573 struct ifnet **newmap; 1574 int newsize; 1575 1576 newsize = ifp->if_index + HN_VFMAP_SIZE_DEF; 1577 newmap = malloc(sizeof(struct ifnet *) * newsize, M_DEVBUF, 1578 M_WAITOK | M_ZERO); 1579 1580 memcpy(newmap, hn_vfmap, 1581 sizeof(struct ifnet *) * hn_vfmap_size); 1582 free(hn_vfmap, M_DEVBUF); 1583 hn_vfmap = newmap; 1584 hn_vfmap_size = newsize; 1585 } 1586 KASSERT(hn_vfmap[ifp->if_index] == NULL, 1587 ("%s: ifindex %d was mapped to %s", 1588 ifp->if_xname, ifp->if_index, hn_vfmap[ifp->if_index]->if_xname)); 1589 hn_vfmap[ifp->if_index] = sc->hn_ifp; 1590 1591 rm_wunlock(&hn_vfmap_lock); 1592 1593 /* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */ 1594 rm_wlock(&sc->hn_vf_lock); 1595 KASSERT((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) == 0, 1596 ("%s: transparent VF was enabled", sc->hn_ifp->if_xname)); 1597 sc->hn_vf_ifp = ifp; 1598 rm_wunlock(&sc->hn_vf_lock); 1599 1600 if (hn_xpnt_vf) { 1601 int wait_ticks; 1602 1603 /* 1604 * Install if_input for vf_ifp, which does vf_ifp -> hn_ifp. 1605 * Save vf_ifp's current if_input for later restoration. 1606 */ 1607 sc->hn_vf_input = ifp->if_input; 1608 ifp->if_input = hn_xpnt_vf_input; 1609 1610 /* 1611 * Stop link status management; use the VF's. 1612 */ 1613 hn_suspend_mgmt(sc); 1614 1615 /* 1616 * Give VF sometime to complete its attach routing. 1617 */ 1618 wait_ticks = hn_xpnt_vf_attwait * hz; 1619 sc->hn_vf_rdytick = ticks + wait_ticks; 1620 1621 taskqueue_enqueue_timeout(sc->hn_vf_taskq, &sc->hn_vf_init, 1622 wait_ticks); 1623 } 1624 done: 1625 HN_UNLOCK(sc); 1626 } 1627 1628 static void 1629 hn_ifnet_detevent(void *xsc, struct ifnet *ifp) 1630 { 1631 struct hn_softc *sc = xsc; 1632 1633 HN_LOCK(sc); 1634 1635 if (sc->hn_vf_ifp == NULL) 1636 goto done; 1637 1638 if (!hn_ismyvf(sc, ifp)) 1639 goto done; 1640 1641 if (hn_xpnt_vf) { 1642 /* 1643 * Make sure that the delayed initialization is not running. 1644 * 1645 * NOTE: 1646 * - This lock _must_ be released, since the hn_vf_init task 1647 * will try holding this lock. 1648 * - It is safe to release this lock here, since the 1649 * hn_ifnet_attevent() is interlocked by the hn_vf_ifp. 1650 * 1651 * XXX racy, if hn(4) ever detached. 1652 */ 1653 HN_UNLOCK(sc); 1654 taskqueue_drain_timeout(sc->hn_vf_taskq, &sc->hn_vf_init); 1655 HN_LOCK(sc); 1656 1657 KASSERT(sc->hn_vf_input != NULL, ("%s VF input is not saved", 1658 sc->hn_ifp->if_xname)); 1659 ifp->if_input = sc->hn_vf_input; 1660 sc->hn_vf_input = NULL; 1661 1662 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) 1663 hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_SYNTH); 1664 1665 if (sc->hn_vf_rdytick == 0) { 1666 /* 1667 * The VF was ready; restore some settings. 1668 */ 1669 sc->hn_ifp->if_capabilities = sc->hn_saved_caps; 1670 /* 1671 * NOTE: 1672 * There is _no_ need to fixup if_capenable and 1673 * if_hwassist, since the if_capabilities before 1674 * restoration was an intersection of the VF's 1675 * if_capabilites and the synthetic device's 1676 * if_capabilites. 1677 */ 1678 sc->hn_ifp->if_hw_tsomax = sc->hn_saved_tsomax; 1679 sc->hn_ifp->if_hw_tsomaxsegcount = 1680 sc->hn_saved_tsosegcnt; 1681 sc->hn_ifp->if_hw_tsomaxsegsize = sc->hn_saved_tsosegsz; 1682 } 1683 1684 /* 1685 * Resume link status management, which was suspended 1686 * by hn_ifnet_attevent(). 1687 */ 1688 hn_resume_mgmt(sc); 1689 } 1690 1691 /* Mark transparent mode VF as disabled. */ 1692 hn_xpnt_vf_setdisable(sc, true /* clear hn_vf_ifp */); 1693 1694 rm_wlock(&hn_vfmap_lock); 1695 1696 KASSERT(ifp->if_index < hn_vfmap_size, 1697 ("ifindex %d, vfmapsize %d", ifp->if_index, hn_vfmap_size)); 1698 if (hn_vfmap[ifp->if_index] != NULL) { 1699 KASSERT(hn_vfmap[ifp->if_index] == sc->hn_ifp, 1700 ("%s: ifindex %d was mapped to %s", 1701 ifp->if_xname, ifp->if_index, 1702 hn_vfmap[ifp->if_index]->if_xname)); 1703 hn_vfmap[ifp->if_index] = NULL; 1704 } 1705 1706 rm_wunlock(&hn_vfmap_lock); 1707 done: 1708 HN_UNLOCK(sc); 1709 } 1710 1711 static void 1712 hn_ifnet_lnkevent(void *xsc, struct ifnet *ifp, int link_state) 1713 { 1714 struct hn_softc *sc = xsc; 1715 1716 if (sc->hn_vf_ifp == ifp) 1717 if_link_state_change(sc->hn_ifp, link_state); 1718 } 1719 1720 static int 1721 hn_probe(device_t dev) 1722 { 1723 1724 if (VMBUS_PROBE_GUID(device_get_parent(dev), dev, &hn_guid) == 0) { 1725 device_set_desc(dev, "Hyper-V Network Interface"); 1726 return BUS_PROBE_DEFAULT; 1727 } 1728 return ENXIO; 1729 } 1730 1731 static int 1732 hn_attach(device_t dev) 1733 { 1734 struct hn_softc *sc = device_get_softc(dev); 1735 struct sysctl_oid_list *child; 1736 struct sysctl_ctx_list *ctx; 1737 uint8_t eaddr[ETHER_ADDR_LEN]; 1738 struct ifnet *ifp = NULL; 1739 int error, ring_cnt, tx_ring_cnt; 1740 1741 sc->hn_dev = dev; 1742 sc->hn_prichan = vmbus_get_channel(dev); 1743 HN_LOCK_INIT(sc); 1744 rm_init(&sc->hn_vf_lock, "hnvf"); 1745 if (hn_xpnt_vf && hn_xpnt_vf_accbpf) 1746 sc->hn_xvf_flags |= HN_XVFFLAG_ACCBPF; 1747 1748 /* 1749 * Initialize these tunables once. 1750 */ 1751 sc->hn_agg_size = hn_tx_agg_size; 1752 sc->hn_agg_pkts = hn_tx_agg_pkts; 1753 1754 /* 1755 * Setup taskqueue for transmission. 1756 */ 1757 if (hn_tx_taskq_mode == HN_TX_TASKQ_M_INDEP) { 1758 int i; 1759 1760 sc->hn_tx_taskqs = 1761 malloc(hn_tx_taskq_cnt * sizeof(struct taskqueue *), 1762 M_DEVBUF, M_WAITOK); 1763 for (i = 0; i < hn_tx_taskq_cnt; ++i) { 1764 sc->hn_tx_taskqs[i] = taskqueue_create("hn_tx", 1765 M_WAITOK, taskqueue_thread_enqueue, 1766 &sc->hn_tx_taskqs[i]); 1767 taskqueue_start_threads(&sc->hn_tx_taskqs[i], 1, PI_NET, 1768 "%s tx%d", device_get_nameunit(dev), i); 1769 } 1770 } else if (hn_tx_taskq_mode == HN_TX_TASKQ_M_GLOBAL) { 1771 sc->hn_tx_taskqs = hn_tx_taskque; 1772 } 1773 1774 /* 1775 * Setup taskqueue for mangement tasks, e.g. link status. 1776 */ 1777 sc->hn_mgmt_taskq0 = taskqueue_create("hn_mgmt", M_WAITOK, 1778 taskqueue_thread_enqueue, &sc->hn_mgmt_taskq0); 1779 taskqueue_start_threads(&sc->hn_mgmt_taskq0, 1, PI_NET, "%s mgmt", 1780 device_get_nameunit(dev)); 1781 TASK_INIT(&sc->hn_link_task, 0, hn_link_taskfunc, sc); 1782 TASK_INIT(&sc->hn_netchg_init, 0, hn_netchg_init_taskfunc, sc); 1783 TIMEOUT_TASK_INIT(sc->hn_mgmt_taskq0, &sc->hn_netchg_status, 0, 1784 hn_netchg_status_taskfunc, sc); 1785 1786 if (hn_xpnt_vf) { 1787 /* 1788 * Setup taskqueue for VF tasks, e.g. delayed VF bringing up. 1789 */ 1790 sc->hn_vf_taskq = taskqueue_create("hn_vf", M_WAITOK, 1791 taskqueue_thread_enqueue, &sc->hn_vf_taskq); 1792 taskqueue_start_threads(&sc->hn_vf_taskq, 1, PI_NET, "%s vf", 1793 device_get_nameunit(dev)); 1794 TIMEOUT_TASK_INIT(sc->hn_vf_taskq, &sc->hn_vf_init, 0, 1795 hn_xpnt_vf_init_taskfunc, sc); 1796 } 1797 1798 /* 1799 * Allocate ifnet and setup its name earlier, so that if_printf 1800 * can be used by functions, which will be called after 1801 * ether_ifattach(). 1802 */ 1803 ifp = sc->hn_ifp = if_alloc(IFT_ETHER); 1804 ifp->if_softc = sc; 1805 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 1806 1807 /* 1808 * Initialize ifmedia earlier so that it can be unconditionally 1809 * destroyed, if error happened later on. 1810 */ 1811 ifmedia_init(&sc->hn_media, 0, hn_ifmedia_upd, hn_ifmedia_sts); 1812 1813 /* 1814 * Figure out the # of RX rings (ring_cnt) and the # of TX rings 1815 * to use (tx_ring_cnt). 1816 * 1817 * NOTE: 1818 * The # of RX rings to use is same as the # of channels to use. 1819 */ 1820 ring_cnt = hn_chan_cnt; 1821 if (ring_cnt <= 0) { 1822 /* Default */ 1823 ring_cnt = mp_ncpus; 1824 if (ring_cnt > HN_RING_CNT_DEF_MAX) 1825 ring_cnt = HN_RING_CNT_DEF_MAX; 1826 } else if (ring_cnt > mp_ncpus) { 1827 ring_cnt = mp_ncpus; 1828 } 1829 #ifdef RSS 1830 if (ring_cnt > rss_getnumbuckets()) 1831 ring_cnt = rss_getnumbuckets(); 1832 #endif 1833 1834 tx_ring_cnt = hn_tx_ring_cnt; 1835 if (tx_ring_cnt <= 0 || tx_ring_cnt > ring_cnt) 1836 tx_ring_cnt = ring_cnt; 1837 #ifdef HN_IFSTART_SUPPORT 1838 if (hn_use_if_start) { 1839 /* ifnet.if_start only needs one TX ring. */ 1840 tx_ring_cnt = 1; 1841 } 1842 #endif 1843 1844 /* 1845 * Set the leader CPU for channels. 1846 */ 1847 sc->hn_cpu = atomic_fetchadd_int(&hn_cpu_index, ring_cnt) % mp_ncpus; 1848 1849 /* 1850 * Create enough TX/RX rings, even if only limited number of 1851 * channels can be allocated. 1852 */ 1853 error = hn_create_tx_data(sc, tx_ring_cnt); 1854 if (error) 1855 goto failed; 1856 error = hn_create_rx_data(sc, ring_cnt); 1857 if (error) 1858 goto failed; 1859 1860 /* 1861 * Create transaction context for NVS and RNDIS transactions. 1862 */ 1863 sc->hn_xact = vmbus_xact_ctx_create(bus_get_dma_tag(dev), 1864 HN_XACT_REQ_SIZE, HN_XACT_RESP_SIZE, 0); 1865 if (sc->hn_xact == NULL) { 1866 error = ENXIO; 1867 goto failed; 1868 } 1869 1870 /* 1871 * Install orphan handler for the revocation of this device's 1872 * primary channel. 1873 * 1874 * NOTE: 1875 * The processing order is critical here: 1876 * Install the orphan handler, _before_ testing whether this 1877 * device's primary channel has been revoked or not. 1878 */ 1879 vmbus_chan_set_orphan(sc->hn_prichan, sc->hn_xact); 1880 if (vmbus_chan_is_revoked(sc->hn_prichan)) { 1881 error = ENXIO; 1882 goto failed; 1883 } 1884 1885 /* 1886 * Attach the synthetic parts, i.e. NVS and RNDIS. 1887 */ 1888 error = hn_synth_attach(sc, ETHERMTU); 1889 if (error) 1890 goto failed; 1891 1892 error = hn_rndis_get_eaddr(sc, eaddr); 1893 if (error) 1894 goto failed; 1895 1896 #if __FreeBSD_version >= 1100099 1897 if (sc->hn_rx_ring_inuse > 1) { 1898 /* 1899 * Reduce TCP segment aggregation limit for multiple 1900 * RX rings to increase ACK timeliness. 1901 */ 1902 hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MULTIRX_DEF); 1903 } 1904 #endif 1905 1906 /* 1907 * Fixup TX stuffs after synthetic parts are attached. 1908 */ 1909 hn_fixup_tx_data(sc); 1910 1911 ctx = device_get_sysctl_ctx(dev); 1912 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); 1913 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "nvs_version", CTLFLAG_RD, 1914 &sc->hn_nvs_ver, 0, "NVS version"); 1915 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "ndis_version", 1916 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 1917 hn_ndis_version_sysctl, "A", "NDIS version"); 1918 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "caps", 1919 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 1920 hn_caps_sysctl, "A", "capabilities"); 1921 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "hwassist", 1922 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 1923 hn_hwassist_sysctl, "A", "hwassist"); 1924 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tso_max", 1925 CTLFLAG_RD, &ifp->if_hw_tsomax, 0, "max TSO size"); 1926 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tso_maxsegcnt", 1927 CTLFLAG_RD, &ifp->if_hw_tsomaxsegcount, 0, 1928 "max # of TSO segments"); 1929 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tso_maxsegsz", 1930 CTLFLAG_RD, &ifp->if_hw_tsomaxsegsize, 0, 1931 "max size of TSO segment"); 1932 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxfilter", 1933 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 1934 hn_rxfilter_sysctl, "A", "rxfilter"); 1935 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_hash", 1936 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 1937 hn_rss_hash_sysctl, "A", "RSS hash"); 1938 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rss_ind_size", 1939 CTLFLAG_RD, &sc->hn_rss_ind_size, 0, "RSS indirect entry count"); 1940 #ifndef RSS 1941 /* 1942 * Don't allow RSS key/indirect table changes, if RSS is defined. 1943 */ 1944 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_key", 1945 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 1946 hn_rss_key_sysctl, "IU", "RSS key"); 1947 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_ind", 1948 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 1949 hn_rss_ind_sysctl, "IU", "RSS indirect table"); 1950 #endif 1951 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_size", 1952 CTLFLAG_RD, &sc->hn_rndis_agg_size, 0, 1953 "RNDIS offered packet transmission aggregation size limit"); 1954 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_pkts", 1955 CTLFLAG_RD, &sc->hn_rndis_agg_pkts, 0, 1956 "RNDIS offered packet transmission aggregation count limit"); 1957 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_align", 1958 CTLFLAG_RD, &sc->hn_rndis_agg_align, 0, 1959 "RNDIS packet transmission aggregation alignment"); 1960 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_size", 1961 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 1962 hn_txagg_size_sysctl, "I", 1963 "Packet transmission aggregation size, 0 -- disable, -1 -- auto"); 1964 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pkts", 1965 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 1966 hn_txagg_pkts_sysctl, "I", 1967 "Packet transmission aggregation packets, " 1968 "0 -- disable, -1 -- auto"); 1969 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "polling", 1970 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 1971 hn_polling_sysctl, "I", 1972 "Polling frequency: [100,1000000], 0 disable polling"); 1973 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf", 1974 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 1975 hn_vf_sysctl, "A", "Virtual Function's name"); 1976 if (!hn_xpnt_vf) { 1977 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxvf", 1978 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 1979 hn_rxvf_sysctl, "A", "activated Virtual Function's name"); 1980 } else { 1981 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf_xpnt_enabled", 1982 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 1983 hn_xpnt_vf_enabled_sysctl, "I", 1984 "Transparent VF enabled"); 1985 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf_xpnt_accbpf", 1986 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 1987 hn_xpnt_vf_accbpf_sysctl, "I", 1988 "Accurate BPF for transparent VF"); 1989 } 1990 1991 /* 1992 * Setup the ifmedia, which has been initialized earlier. 1993 */ 1994 ifmedia_add(&sc->hn_media, IFM_ETHER | IFM_AUTO, 0, NULL); 1995 ifmedia_set(&sc->hn_media, IFM_ETHER | IFM_AUTO); 1996 /* XXX ifmedia_set really should do this for us */ 1997 sc->hn_media.ifm_media = sc->hn_media.ifm_cur->ifm_media; 1998 1999 /* 2000 * Setup the ifnet for this interface. 2001 */ 2002 2003 ifp->if_baudrate = IF_Gbps(10); 2004 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 2005 ifp->if_ioctl = hn_ioctl; 2006 ifp->if_init = hn_init; 2007 #ifdef HN_IFSTART_SUPPORT 2008 if (hn_use_if_start) { 2009 int qdepth = hn_get_txswq_depth(&sc->hn_tx_ring[0]); 2010 2011 ifp->if_start = hn_start; 2012 IFQ_SET_MAXLEN(&ifp->if_snd, qdepth); 2013 ifp->if_snd.ifq_drv_maxlen = qdepth - 1; 2014 IFQ_SET_READY(&ifp->if_snd); 2015 } else 2016 #endif 2017 { 2018 ifp->if_transmit = hn_transmit; 2019 ifp->if_qflush = hn_xmit_qflush; 2020 } 2021 2022 ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_LRO | IFCAP_LINKSTATE; 2023 #ifdef foo 2024 /* We can't diff IPv6 packets from IPv4 packets on RX path. */ 2025 ifp->if_capabilities |= IFCAP_RXCSUM_IPV6; 2026 #endif 2027 if (sc->hn_caps & HN_CAP_VLAN) { 2028 /* XXX not sure about VLAN_MTU. */ 2029 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU; 2030 } 2031 2032 ifp->if_hwassist = sc->hn_tx_ring[0].hn_csum_assist; 2033 if (ifp->if_hwassist & HN_CSUM_IP_MASK) 2034 ifp->if_capabilities |= IFCAP_TXCSUM; 2035 if (ifp->if_hwassist & HN_CSUM_IP6_MASK) 2036 ifp->if_capabilities |= IFCAP_TXCSUM_IPV6; 2037 if (sc->hn_caps & HN_CAP_TSO4) { 2038 ifp->if_capabilities |= IFCAP_TSO4; 2039 ifp->if_hwassist |= CSUM_IP_TSO; 2040 } 2041 if (sc->hn_caps & HN_CAP_TSO6) { 2042 ifp->if_capabilities |= IFCAP_TSO6; 2043 ifp->if_hwassist |= CSUM_IP6_TSO; 2044 } 2045 2046 /* Enable all available capabilities by default. */ 2047 ifp->if_capenable = ifp->if_capabilities; 2048 2049 /* 2050 * Disable IPv6 TSO and TXCSUM by default, they still can 2051 * be enabled through SIOCSIFCAP. 2052 */ 2053 ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6 | IFCAP_TSO6); 2054 ifp->if_hwassist &= ~(HN_CSUM_IP6_MASK | CSUM_IP6_TSO); 2055 2056 if (ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) { 2057 /* 2058 * Lock hn_set_tso_maxsize() to simplify its 2059 * internal logic. 2060 */ 2061 HN_LOCK(sc); 2062 hn_set_tso_maxsize(sc, hn_tso_maxlen, ETHERMTU); 2063 HN_UNLOCK(sc); 2064 ifp->if_hw_tsomaxsegcount = HN_TX_DATA_SEGCNT_MAX; 2065 ifp->if_hw_tsomaxsegsize = PAGE_SIZE; 2066 } 2067 2068 ether_ifattach(ifp, eaddr); 2069 2070 if ((ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) && bootverbose) { 2071 if_printf(ifp, "TSO segcnt %u segsz %u\n", 2072 ifp->if_hw_tsomaxsegcount, ifp->if_hw_tsomaxsegsize); 2073 } 2074 2075 /* Inform the upper layer about the long frame support. */ 2076 ifp->if_hdrlen = sizeof(struct ether_vlan_header); 2077 2078 /* 2079 * Kick off link status check. 2080 */ 2081 sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0; 2082 hn_update_link_status(sc); 2083 2084 if (!hn_xpnt_vf) { 2085 sc->hn_ifnet_evthand = EVENTHANDLER_REGISTER(ifnet_event, 2086 hn_ifnet_event, sc, EVENTHANDLER_PRI_ANY); 2087 sc->hn_ifaddr_evthand = EVENTHANDLER_REGISTER(ifaddr_event, 2088 hn_ifaddr_event, sc, EVENTHANDLER_PRI_ANY); 2089 } else { 2090 sc->hn_ifnet_lnkhand = EVENTHANDLER_REGISTER(ifnet_link_event, 2091 hn_ifnet_lnkevent, sc, EVENTHANDLER_PRI_ANY); 2092 } 2093 2094 /* 2095 * NOTE: 2096 * Subscribe ether_ifattach event, instead of ifnet_arrival event, 2097 * since interface's LLADDR is needed; interface LLADDR is not 2098 * available when ifnet_arrival event is triggered. 2099 */ 2100 sc->hn_ifnet_atthand = EVENTHANDLER_REGISTER(ether_ifattach_event, 2101 hn_ifnet_attevent, sc, EVENTHANDLER_PRI_ANY); 2102 sc->hn_ifnet_dethand = EVENTHANDLER_REGISTER(ifnet_departure_event, 2103 hn_ifnet_detevent, sc, EVENTHANDLER_PRI_ANY); 2104 2105 return (0); 2106 failed: 2107 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) 2108 hn_synth_detach(sc); 2109 hn_detach(dev); 2110 return (error); 2111 } 2112 2113 static int 2114 hn_detach(device_t dev) 2115 { 2116 struct hn_softc *sc = device_get_softc(dev); 2117 struct ifnet *ifp = sc->hn_ifp, *vf_ifp; 2118 2119 if (sc->hn_xact != NULL && vmbus_chan_is_revoked(sc->hn_prichan)) { 2120 /* 2121 * In case that the vmbus missed the orphan handler 2122 * installation. 2123 */ 2124 vmbus_xact_ctx_orphan(sc->hn_xact); 2125 } 2126 2127 if (sc->hn_ifaddr_evthand != NULL) 2128 EVENTHANDLER_DEREGISTER(ifaddr_event, sc->hn_ifaddr_evthand); 2129 if (sc->hn_ifnet_evthand != NULL) 2130 EVENTHANDLER_DEREGISTER(ifnet_event, sc->hn_ifnet_evthand); 2131 if (sc->hn_ifnet_atthand != NULL) { 2132 EVENTHANDLER_DEREGISTER(ether_ifattach_event, 2133 sc->hn_ifnet_atthand); 2134 } 2135 if (sc->hn_ifnet_dethand != NULL) { 2136 EVENTHANDLER_DEREGISTER(ifnet_departure_event, 2137 sc->hn_ifnet_dethand); 2138 } 2139 if (sc->hn_ifnet_lnkhand != NULL) 2140 EVENTHANDLER_DEREGISTER(ifnet_link_event, sc->hn_ifnet_lnkhand); 2141 2142 vf_ifp = sc->hn_vf_ifp; 2143 __compiler_membar(); 2144 if (vf_ifp != NULL) 2145 hn_ifnet_detevent(sc, vf_ifp); 2146 2147 if (device_is_attached(dev)) { 2148 HN_LOCK(sc); 2149 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) { 2150 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 2151 hn_stop(sc, true); 2152 /* 2153 * NOTE: 2154 * hn_stop() only suspends data, so managment 2155 * stuffs have to be suspended manually here. 2156 */ 2157 hn_suspend_mgmt(sc); 2158 hn_synth_detach(sc); 2159 } 2160 HN_UNLOCK(sc); 2161 ether_ifdetach(ifp); 2162 } 2163 2164 ifmedia_removeall(&sc->hn_media); 2165 hn_destroy_rx_data(sc); 2166 hn_destroy_tx_data(sc); 2167 2168 if (sc->hn_tx_taskqs != NULL && sc->hn_tx_taskqs != hn_tx_taskque) { 2169 int i; 2170 2171 for (i = 0; i < hn_tx_taskq_cnt; ++i) 2172 taskqueue_free(sc->hn_tx_taskqs[i]); 2173 free(sc->hn_tx_taskqs, M_DEVBUF); 2174 } 2175 taskqueue_free(sc->hn_mgmt_taskq0); 2176 if (sc->hn_vf_taskq != NULL) 2177 taskqueue_free(sc->hn_vf_taskq); 2178 2179 if (sc->hn_xact != NULL) { 2180 /* 2181 * Uninstall the orphan handler _before_ the xact is 2182 * destructed. 2183 */ 2184 vmbus_chan_unset_orphan(sc->hn_prichan); 2185 vmbus_xact_ctx_destroy(sc->hn_xact); 2186 } 2187 2188 if_free(ifp); 2189 2190 HN_LOCK_DESTROY(sc); 2191 rm_destroy(&sc->hn_vf_lock); 2192 return (0); 2193 } 2194 2195 static int 2196 hn_shutdown(device_t dev) 2197 { 2198 2199 return (0); 2200 } 2201 2202 static void 2203 hn_link_status(struct hn_softc *sc) 2204 { 2205 uint32_t link_status; 2206 int error; 2207 2208 error = hn_rndis_get_linkstatus(sc, &link_status); 2209 if (error) { 2210 /* XXX what to do? */ 2211 return; 2212 } 2213 2214 if (link_status == NDIS_MEDIA_STATE_CONNECTED) 2215 sc->hn_link_flags |= HN_LINK_FLAG_LINKUP; 2216 else 2217 sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP; 2218 if_link_state_change(sc->hn_ifp, 2219 (sc->hn_link_flags & HN_LINK_FLAG_LINKUP) ? 2220 LINK_STATE_UP : LINK_STATE_DOWN); 2221 } 2222 2223 static void 2224 hn_link_taskfunc(void *xsc, int pending __unused) 2225 { 2226 struct hn_softc *sc = xsc; 2227 2228 if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG) 2229 return; 2230 hn_link_status(sc); 2231 } 2232 2233 static void 2234 hn_netchg_init_taskfunc(void *xsc, int pending __unused) 2235 { 2236 struct hn_softc *sc = xsc; 2237 2238 /* Prevent any link status checks from running. */ 2239 sc->hn_link_flags |= HN_LINK_FLAG_NETCHG; 2240 2241 /* 2242 * Fake up a [link down --> link up] state change; 5 seconds 2243 * delay is used, which closely simulates miibus reaction 2244 * upon link down event. 2245 */ 2246 sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP; 2247 if_link_state_change(sc->hn_ifp, LINK_STATE_DOWN); 2248 taskqueue_enqueue_timeout(sc->hn_mgmt_taskq0, 2249 &sc->hn_netchg_status, 5 * hz); 2250 } 2251 2252 static void 2253 hn_netchg_status_taskfunc(void *xsc, int pending __unused) 2254 { 2255 struct hn_softc *sc = xsc; 2256 2257 /* Re-allow link status checks. */ 2258 sc->hn_link_flags &= ~HN_LINK_FLAG_NETCHG; 2259 hn_link_status(sc); 2260 } 2261 2262 static void 2263 hn_update_link_status(struct hn_softc *sc) 2264 { 2265 2266 if (sc->hn_mgmt_taskq != NULL) 2267 taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_link_task); 2268 } 2269 2270 static void 2271 hn_change_network(struct hn_softc *sc) 2272 { 2273 2274 if (sc->hn_mgmt_taskq != NULL) 2275 taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_netchg_init); 2276 } 2277 2278 static __inline int 2279 hn_txdesc_dmamap_load(struct hn_tx_ring *txr, struct hn_txdesc *txd, 2280 struct mbuf **m_head, bus_dma_segment_t *segs, int *nsegs) 2281 { 2282 struct mbuf *m = *m_head; 2283 int error; 2284 2285 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID, ("txd uses chim")); 2286 2287 error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap, 2288 m, segs, nsegs, BUS_DMA_NOWAIT); 2289 if (error == EFBIG) { 2290 struct mbuf *m_new; 2291 2292 m_new = m_collapse(m, M_NOWAIT, HN_TX_DATA_SEGCNT_MAX); 2293 if (m_new == NULL) 2294 return ENOBUFS; 2295 else 2296 *m_head = m = m_new; 2297 txr->hn_tx_collapsed++; 2298 2299 error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, 2300 txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT); 2301 } 2302 if (!error) { 2303 bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap, 2304 BUS_DMASYNC_PREWRITE); 2305 txd->flags |= HN_TXD_FLAG_DMAMAP; 2306 } 2307 return error; 2308 } 2309 2310 static __inline int 2311 hn_txdesc_put(struct hn_tx_ring *txr, struct hn_txdesc *txd) 2312 { 2313 2314 KASSERT((txd->flags & HN_TXD_FLAG_ONLIST) == 0, 2315 ("put an onlist txd %#x", txd->flags)); 2316 KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0, 2317 ("put an onagg txd %#x", txd->flags)); 2318 2319 KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs)); 2320 if (atomic_fetchadd_int(&txd->refs, -1) != 1) 2321 return 0; 2322 2323 if (!STAILQ_EMPTY(&txd->agg_list)) { 2324 struct hn_txdesc *tmp_txd; 2325 2326 while ((tmp_txd = STAILQ_FIRST(&txd->agg_list)) != NULL) { 2327 int freed; 2328 2329 KASSERT(STAILQ_EMPTY(&tmp_txd->agg_list), 2330 ("resursive aggregation on aggregated txdesc")); 2331 KASSERT((tmp_txd->flags & HN_TXD_FLAG_ONAGG), 2332 ("not aggregated txdesc")); 2333 KASSERT((tmp_txd->flags & HN_TXD_FLAG_DMAMAP) == 0, 2334 ("aggregated txdesc uses dmamap")); 2335 KASSERT(tmp_txd->chim_index == HN_NVS_CHIM_IDX_INVALID, 2336 ("aggregated txdesc consumes " 2337 "chimney sending buffer")); 2338 KASSERT(tmp_txd->chim_size == 0, 2339 ("aggregated txdesc has non-zero " 2340 "chimney sending size")); 2341 2342 STAILQ_REMOVE_HEAD(&txd->agg_list, agg_link); 2343 tmp_txd->flags &= ~HN_TXD_FLAG_ONAGG; 2344 freed = hn_txdesc_put(txr, tmp_txd); 2345 KASSERT(freed, ("failed to free aggregated txdesc")); 2346 } 2347 } 2348 2349 if (txd->chim_index != HN_NVS_CHIM_IDX_INVALID) { 2350 KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, 2351 ("chim txd uses dmamap")); 2352 hn_chim_free(txr->hn_sc, txd->chim_index); 2353 txd->chim_index = HN_NVS_CHIM_IDX_INVALID; 2354 txd->chim_size = 0; 2355 } else if (txd->flags & HN_TXD_FLAG_DMAMAP) { 2356 bus_dmamap_sync(txr->hn_tx_data_dtag, 2357 txd->data_dmap, BUS_DMASYNC_POSTWRITE); 2358 bus_dmamap_unload(txr->hn_tx_data_dtag, 2359 txd->data_dmap); 2360 txd->flags &= ~HN_TXD_FLAG_DMAMAP; 2361 } 2362 2363 if (txd->m != NULL) { 2364 m_freem(txd->m); 2365 txd->m = NULL; 2366 } 2367 2368 txd->flags |= HN_TXD_FLAG_ONLIST; 2369 #ifndef HN_USE_TXDESC_BUFRING 2370 mtx_lock_spin(&txr->hn_txlist_spin); 2371 KASSERT(txr->hn_txdesc_avail >= 0 && 2372 txr->hn_txdesc_avail < txr->hn_txdesc_cnt, 2373 ("txdesc_put: invalid txd avail %d", txr->hn_txdesc_avail)); 2374 txr->hn_txdesc_avail++; 2375 SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link); 2376 mtx_unlock_spin(&txr->hn_txlist_spin); 2377 #else /* HN_USE_TXDESC_BUFRING */ 2378 #ifdef HN_DEBUG 2379 atomic_add_int(&txr->hn_txdesc_avail, 1); 2380 #endif 2381 buf_ring_enqueue(txr->hn_txdesc_br, txd); 2382 #endif /* !HN_USE_TXDESC_BUFRING */ 2383 2384 return 1; 2385 } 2386 2387 static __inline struct hn_txdesc * 2388 hn_txdesc_get(struct hn_tx_ring *txr) 2389 { 2390 struct hn_txdesc *txd; 2391 2392 #ifndef HN_USE_TXDESC_BUFRING 2393 mtx_lock_spin(&txr->hn_txlist_spin); 2394 txd = SLIST_FIRST(&txr->hn_txlist); 2395 if (txd != NULL) { 2396 KASSERT(txr->hn_txdesc_avail > 0, 2397 ("txdesc_get: invalid txd avail %d", txr->hn_txdesc_avail)); 2398 txr->hn_txdesc_avail--; 2399 SLIST_REMOVE_HEAD(&txr->hn_txlist, link); 2400 } 2401 mtx_unlock_spin(&txr->hn_txlist_spin); 2402 #else 2403 txd = buf_ring_dequeue_sc(txr->hn_txdesc_br); 2404 #endif 2405 2406 if (txd != NULL) { 2407 #ifdef HN_USE_TXDESC_BUFRING 2408 #ifdef HN_DEBUG 2409 atomic_subtract_int(&txr->hn_txdesc_avail, 1); 2410 #endif 2411 #endif /* HN_USE_TXDESC_BUFRING */ 2412 KASSERT(txd->m == NULL && txd->refs == 0 && 2413 STAILQ_EMPTY(&txd->agg_list) && 2414 txd->chim_index == HN_NVS_CHIM_IDX_INVALID && 2415 txd->chim_size == 0 && 2416 (txd->flags & HN_TXD_FLAG_ONLIST) && 2417 (txd->flags & HN_TXD_FLAG_ONAGG) == 0 && 2418 (txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("invalid txd")); 2419 txd->flags &= ~HN_TXD_FLAG_ONLIST; 2420 txd->refs = 1; 2421 } 2422 return txd; 2423 } 2424 2425 static __inline void 2426 hn_txdesc_hold(struct hn_txdesc *txd) 2427 { 2428 2429 /* 0->1 transition will never work */ 2430 KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs)); 2431 atomic_add_int(&txd->refs, 1); 2432 } 2433 2434 static __inline void 2435 hn_txdesc_agg(struct hn_txdesc *agg_txd, struct hn_txdesc *txd) 2436 { 2437 2438 KASSERT((agg_txd->flags & HN_TXD_FLAG_ONAGG) == 0, 2439 ("recursive aggregation on aggregating txdesc")); 2440 2441 KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0, 2442 ("already aggregated")); 2443 KASSERT(STAILQ_EMPTY(&txd->agg_list), 2444 ("recursive aggregation on to-be-aggregated txdesc")); 2445 2446 txd->flags |= HN_TXD_FLAG_ONAGG; 2447 STAILQ_INSERT_TAIL(&agg_txd->agg_list, txd, agg_link); 2448 } 2449 2450 static bool 2451 hn_tx_ring_pending(struct hn_tx_ring *txr) 2452 { 2453 bool pending = false; 2454 2455 #ifndef HN_USE_TXDESC_BUFRING 2456 mtx_lock_spin(&txr->hn_txlist_spin); 2457 if (txr->hn_txdesc_avail != txr->hn_txdesc_cnt) 2458 pending = true; 2459 mtx_unlock_spin(&txr->hn_txlist_spin); 2460 #else 2461 if (!buf_ring_full(txr->hn_txdesc_br)) 2462 pending = true; 2463 #endif 2464 return (pending); 2465 } 2466 2467 static __inline void 2468 hn_txeof(struct hn_tx_ring *txr) 2469 { 2470 txr->hn_has_txeof = 0; 2471 txr->hn_txeof(txr); 2472 } 2473 2474 static void 2475 hn_txpkt_done(struct hn_nvs_sendctx *sndc, struct hn_softc *sc, 2476 struct vmbus_channel *chan, const void *data __unused, int dlen __unused) 2477 { 2478 struct hn_txdesc *txd = sndc->hn_cbarg; 2479 struct hn_tx_ring *txr; 2480 2481 txr = txd->txr; 2482 KASSERT(txr->hn_chan == chan, 2483 ("channel mismatch, on chan%u, should be chan%u", 2484 vmbus_chan_id(chan), vmbus_chan_id(txr->hn_chan))); 2485 2486 txr->hn_has_txeof = 1; 2487 hn_txdesc_put(txr, txd); 2488 2489 ++txr->hn_txdone_cnt; 2490 if (txr->hn_txdone_cnt >= HN_EARLY_TXEOF_THRESH) { 2491 txr->hn_txdone_cnt = 0; 2492 if (txr->hn_oactive) 2493 hn_txeof(txr); 2494 } 2495 } 2496 2497 static void 2498 hn_chan_rollup(struct hn_rx_ring *rxr, struct hn_tx_ring *txr) 2499 { 2500 #if defined(INET) || defined(INET6) 2501 tcp_lro_flush_all(&rxr->hn_lro); 2502 #endif 2503 2504 /* 2505 * NOTE: 2506 * 'txr' could be NULL, if multiple channels and 2507 * ifnet.if_start method are enabled. 2508 */ 2509 if (txr == NULL || !txr->hn_has_txeof) 2510 return; 2511 2512 txr->hn_txdone_cnt = 0; 2513 hn_txeof(txr); 2514 } 2515 2516 static __inline uint32_t 2517 hn_rndis_pktmsg_offset(uint32_t ofs) 2518 { 2519 2520 KASSERT(ofs >= sizeof(struct rndis_packet_msg), 2521 ("invalid RNDIS packet msg offset %u", ofs)); 2522 return (ofs - __offsetof(struct rndis_packet_msg, rm_dataoffset)); 2523 } 2524 2525 static __inline void * 2526 hn_rndis_pktinfo_append(struct rndis_packet_msg *pkt, size_t pktsize, 2527 size_t pi_dlen, uint32_t pi_type) 2528 { 2529 const size_t pi_size = HN_RNDIS_PKTINFO_SIZE(pi_dlen); 2530 struct rndis_pktinfo *pi; 2531 2532 KASSERT((pi_size & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK) == 0, 2533 ("unaligned pktinfo size %zu, pktinfo dlen %zu", pi_size, pi_dlen)); 2534 2535 /* 2536 * Per-packet-info does not move; it only grows. 2537 * 2538 * NOTE: 2539 * rm_pktinfooffset in this phase counts from the beginning 2540 * of rndis_packet_msg. 2541 */ 2542 KASSERT(pkt->rm_pktinfooffset + pkt->rm_pktinfolen + pi_size <= pktsize, 2543 ("%u pktinfo overflows RNDIS packet msg", pi_type)); 2544 pi = (struct rndis_pktinfo *)((uint8_t *)pkt + pkt->rm_pktinfooffset + 2545 pkt->rm_pktinfolen); 2546 pkt->rm_pktinfolen += pi_size; 2547 2548 pi->rm_size = pi_size; 2549 pi->rm_type = pi_type; 2550 pi->rm_pktinfooffset = RNDIS_PKTINFO_OFFSET; 2551 2552 return (pi->rm_data); 2553 } 2554 2555 static __inline int 2556 hn_flush_txagg(struct ifnet *ifp, struct hn_tx_ring *txr) 2557 { 2558 struct hn_txdesc *txd; 2559 struct mbuf *m; 2560 int error, pkts; 2561 2562 txd = txr->hn_agg_txd; 2563 KASSERT(txd != NULL, ("no aggregate txdesc")); 2564 2565 /* 2566 * Since hn_txpkt() will reset this temporary stat, save 2567 * it now, so that oerrors can be updated properly, if 2568 * hn_txpkt() ever fails. 2569 */ 2570 pkts = txr->hn_stat_pkts; 2571 2572 /* 2573 * Since txd's mbuf will _not_ be freed upon hn_txpkt() 2574 * failure, save it for later freeing, if hn_txpkt() ever 2575 * fails. 2576 */ 2577 m = txd->m; 2578 error = hn_txpkt(ifp, txr, txd); 2579 if (__predict_false(error)) { 2580 /* txd is freed, but m is not. */ 2581 m_freem(m); 2582 2583 txr->hn_flush_failed++; 2584 if_inc_counter(ifp, IFCOUNTER_OERRORS, pkts); 2585 } 2586 2587 /* Reset all aggregation states. */ 2588 txr->hn_agg_txd = NULL; 2589 txr->hn_agg_szleft = 0; 2590 txr->hn_agg_pktleft = 0; 2591 txr->hn_agg_prevpkt = NULL; 2592 2593 return (error); 2594 } 2595 2596 static void * 2597 hn_try_txagg(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd, 2598 int pktsize) 2599 { 2600 void *chim; 2601 2602 if (txr->hn_agg_txd != NULL) { 2603 if (txr->hn_agg_pktleft >= 1 && txr->hn_agg_szleft > pktsize) { 2604 struct hn_txdesc *agg_txd = txr->hn_agg_txd; 2605 struct rndis_packet_msg *pkt = txr->hn_agg_prevpkt; 2606 int olen; 2607 2608 /* 2609 * Update the previous RNDIS packet's total length, 2610 * it can be increased due to the mandatory alignment 2611 * padding for this RNDIS packet. And update the 2612 * aggregating txdesc's chimney sending buffer size 2613 * accordingly. 2614 * 2615 * XXX 2616 * Zero-out the padding, as required by the RNDIS spec. 2617 */ 2618 olen = pkt->rm_len; 2619 pkt->rm_len = roundup2(olen, txr->hn_agg_align); 2620 agg_txd->chim_size += pkt->rm_len - olen; 2621 2622 /* Link this txdesc to the parent. */ 2623 hn_txdesc_agg(agg_txd, txd); 2624 2625 chim = (uint8_t *)pkt + pkt->rm_len; 2626 /* Save the current packet for later fixup. */ 2627 txr->hn_agg_prevpkt = chim; 2628 2629 txr->hn_agg_pktleft--; 2630 txr->hn_agg_szleft -= pktsize; 2631 if (txr->hn_agg_szleft <= 2632 HN_PKTSIZE_MIN(txr->hn_agg_align)) { 2633 /* 2634 * Probably can't aggregate more packets, 2635 * flush this aggregating txdesc proactively. 2636 */ 2637 txr->hn_agg_pktleft = 0; 2638 } 2639 /* Done! */ 2640 return (chim); 2641 } 2642 hn_flush_txagg(ifp, txr); 2643 } 2644 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc")); 2645 2646 txr->hn_tx_chimney_tried++; 2647 txd->chim_index = hn_chim_alloc(txr->hn_sc); 2648 if (txd->chim_index == HN_NVS_CHIM_IDX_INVALID) 2649 return (NULL); 2650 txr->hn_tx_chimney++; 2651 2652 chim = txr->hn_sc->hn_chim + 2653 (txd->chim_index * txr->hn_sc->hn_chim_szmax); 2654 2655 if (txr->hn_agg_pktmax > 1 && 2656 txr->hn_agg_szmax > pktsize + HN_PKTSIZE_MIN(txr->hn_agg_align)) { 2657 txr->hn_agg_txd = txd; 2658 txr->hn_agg_pktleft = txr->hn_agg_pktmax - 1; 2659 txr->hn_agg_szleft = txr->hn_agg_szmax - pktsize; 2660 txr->hn_agg_prevpkt = chim; 2661 } 2662 return (chim); 2663 } 2664 2665 /* 2666 * NOTE: 2667 * If this function fails, then both txd and m_head0 will be freed. 2668 */ 2669 static int 2670 hn_encap(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd, 2671 struct mbuf **m_head0) 2672 { 2673 bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX]; 2674 int error, nsegs, i; 2675 struct mbuf *m_head = *m_head0; 2676 struct rndis_packet_msg *pkt; 2677 uint32_t *pi_data; 2678 void *chim = NULL; 2679 int pkt_hlen, pkt_size; 2680 2681 pkt = txd->rndis_pkt; 2682 pkt_size = HN_PKTSIZE(m_head, txr->hn_agg_align); 2683 if (pkt_size < txr->hn_chim_size) { 2684 chim = hn_try_txagg(ifp, txr, txd, pkt_size); 2685 if (chim != NULL) 2686 pkt = chim; 2687 } else { 2688 if (txr->hn_agg_txd != NULL) 2689 hn_flush_txagg(ifp, txr); 2690 } 2691 2692 pkt->rm_type = REMOTE_NDIS_PACKET_MSG; 2693 pkt->rm_len = m_head->m_pkthdr.len; 2694 pkt->rm_dataoffset = 0; 2695 pkt->rm_datalen = m_head->m_pkthdr.len; 2696 pkt->rm_oobdataoffset = 0; 2697 pkt->rm_oobdatalen = 0; 2698 pkt->rm_oobdataelements = 0; 2699 pkt->rm_pktinfooffset = sizeof(*pkt); 2700 pkt->rm_pktinfolen = 0; 2701 pkt->rm_vchandle = 0; 2702 pkt->rm_reserved = 0; 2703 2704 if (txr->hn_tx_flags & HN_TX_FLAG_HASHVAL) { 2705 /* 2706 * Set the hash value for this packet, so that the host could 2707 * dispatch the TX done event for this packet back to this TX 2708 * ring's channel. 2709 */ 2710 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 2711 HN_NDIS_HASH_VALUE_SIZE, HN_NDIS_PKTINFO_TYPE_HASHVAL); 2712 *pi_data = txr->hn_tx_idx; 2713 } 2714 2715 if (m_head->m_flags & M_VLANTAG) { 2716 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 2717 NDIS_VLAN_INFO_SIZE, NDIS_PKTINFO_TYPE_VLAN); 2718 *pi_data = NDIS_VLAN_INFO_MAKE( 2719 EVL_VLANOFTAG(m_head->m_pkthdr.ether_vtag), 2720 EVL_PRIOFTAG(m_head->m_pkthdr.ether_vtag), 2721 EVL_CFIOFTAG(m_head->m_pkthdr.ether_vtag)); 2722 } 2723 2724 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 2725 #if defined(INET6) || defined(INET) 2726 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 2727 NDIS_LSO2_INFO_SIZE, NDIS_PKTINFO_TYPE_LSO); 2728 #ifdef INET 2729 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) { 2730 *pi_data = NDIS_LSO2_INFO_MAKEIPV4(0, 2731 m_head->m_pkthdr.tso_segsz); 2732 } 2733 #endif 2734 #if defined(INET6) && defined(INET) 2735 else 2736 #endif 2737 #ifdef INET6 2738 { 2739 *pi_data = NDIS_LSO2_INFO_MAKEIPV6(0, 2740 m_head->m_pkthdr.tso_segsz); 2741 } 2742 #endif 2743 #endif /* INET6 || INET */ 2744 } else if (m_head->m_pkthdr.csum_flags & txr->hn_csum_assist) { 2745 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 2746 NDIS_TXCSUM_INFO_SIZE, NDIS_PKTINFO_TYPE_CSUM); 2747 if (m_head->m_pkthdr.csum_flags & 2748 (CSUM_IP6_TCP | CSUM_IP6_UDP)) { 2749 *pi_data = NDIS_TXCSUM_INFO_IPV6; 2750 } else { 2751 *pi_data = NDIS_TXCSUM_INFO_IPV4; 2752 if (m_head->m_pkthdr.csum_flags & CSUM_IP) 2753 *pi_data |= NDIS_TXCSUM_INFO_IPCS; 2754 } 2755 2756 if (m_head->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP)) 2757 *pi_data |= NDIS_TXCSUM_INFO_TCPCS; 2758 else if (m_head->m_pkthdr.csum_flags & 2759 (CSUM_IP_UDP | CSUM_IP6_UDP)) 2760 *pi_data |= NDIS_TXCSUM_INFO_UDPCS; 2761 } 2762 2763 pkt_hlen = pkt->rm_pktinfooffset + pkt->rm_pktinfolen; 2764 /* Fixup RNDIS packet message total length */ 2765 pkt->rm_len += pkt_hlen; 2766 /* Convert RNDIS packet message offsets */ 2767 pkt->rm_dataoffset = hn_rndis_pktmsg_offset(pkt_hlen); 2768 pkt->rm_pktinfooffset = hn_rndis_pktmsg_offset(pkt->rm_pktinfooffset); 2769 2770 /* 2771 * Fast path: Chimney sending. 2772 */ 2773 if (chim != NULL) { 2774 struct hn_txdesc *tgt_txd = txd; 2775 2776 if (txr->hn_agg_txd != NULL) { 2777 tgt_txd = txr->hn_agg_txd; 2778 #ifdef INVARIANTS 2779 *m_head0 = NULL; 2780 #endif 2781 } 2782 2783 KASSERT(pkt == chim, 2784 ("RNDIS pkt not in chimney sending buffer")); 2785 KASSERT(tgt_txd->chim_index != HN_NVS_CHIM_IDX_INVALID, 2786 ("chimney sending buffer is not used")); 2787 tgt_txd->chim_size += pkt->rm_len; 2788 2789 m_copydata(m_head, 0, m_head->m_pkthdr.len, 2790 ((uint8_t *)chim) + pkt_hlen); 2791 2792 txr->hn_gpa_cnt = 0; 2793 txr->hn_sendpkt = hn_txpkt_chim; 2794 goto done; 2795 } 2796 2797 KASSERT(txr->hn_agg_txd == NULL, ("aggregating sglist txdesc")); 2798 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID, 2799 ("chimney buffer is used")); 2800 KASSERT(pkt == txd->rndis_pkt, ("RNDIS pkt not in txdesc")); 2801 2802 error = hn_txdesc_dmamap_load(txr, txd, &m_head, segs, &nsegs); 2803 if (__predict_false(error)) { 2804 int freed; 2805 2806 /* 2807 * This mbuf is not linked w/ the txd yet, so free it now. 2808 */ 2809 m_freem(m_head); 2810 *m_head0 = NULL; 2811 2812 freed = hn_txdesc_put(txr, txd); 2813 KASSERT(freed != 0, 2814 ("fail to free txd upon txdma error")); 2815 2816 txr->hn_txdma_failed++; 2817 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2818 return error; 2819 } 2820 *m_head0 = m_head; 2821 2822 /* +1 RNDIS packet message */ 2823 txr->hn_gpa_cnt = nsegs + 1; 2824 2825 /* send packet with page buffer */ 2826 txr->hn_gpa[0].gpa_page = atop(txd->rndis_pkt_paddr); 2827 txr->hn_gpa[0].gpa_ofs = txd->rndis_pkt_paddr & PAGE_MASK; 2828 txr->hn_gpa[0].gpa_len = pkt_hlen; 2829 2830 /* 2831 * Fill the page buffers with mbuf info after the page 2832 * buffer for RNDIS packet message. 2833 */ 2834 for (i = 0; i < nsegs; ++i) { 2835 struct vmbus_gpa *gpa = &txr->hn_gpa[i + 1]; 2836 2837 gpa->gpa_page = atop(segs[i].ds_addr); 2838 gpa->gpa_ofs = segs[i].ds_addr & PAGE_MASK; 2839 gpa->gpa_len = segs[i].ds_len; 2840 } 2841 2842 txd->chim_index = HN_NVS_CHIM_IDX_INVALID; 2843 txd->chim_size = 0; 2844 txr->hn_sendpkt = hn_txpkt_sglist; 2845 done: 2846 txd->m = m_head; 2847 2848 /* Set the completion routine */ 2849 hn_nvs_sendctx_init(&txd->send_ctx, hn_txpkt_done, txd); 2850 2851 /* Update temporary stats for later use. */ 2852 txr->hn_stat_pkts++; 2853 txr->hn_stat_size += m_head->m_pkthdr.len; 2854 if (m_head->m_flags & M_MCAST) 2855 txr->hn_stat_mcasts++; 2856 2857 return 0; 2858 } 2859 2860 /* 2861 * NOTE: 2862 * If this function fails, then txd will be freed, but the mbuf 2863 * associated w/ the txd will _not_ be freed. 2864 */ 2865 static int 2866 hn_txpkt(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd) 2867 { 2868 int error, send_failed = 0, has_bpf; 2869 2870 again: 2871 has_bpf = bpf_peers_present(ifp->if_bpf); 2872 if (has_bpf) { 2873 /* 2874 * Make sure that this txd and any aggregated txds are not 2875 * freed before ETHER_BPF_MTAP. 2876 */ 2877 hn_txdesc_hold(txd); 2878 } 2879 error = txr->hn_sendpkt(txr, txd); 2880 if (!error) { 2881 if (has_bpf) { 2882 const struct hn_txdesc *tmp_txd; 2883 2884 ETHER_BPF_MTAP(ifp, txd->m); 2885 STAILQ_FOREACH(tmp_txd, &txd->agg_list, agg_link) 2886 ETHER_BPF_MTAP(ifp, tmp_txd->m); 2887 } 2888 2889 if_inc_counter(ifp, IFCOUNTER_OPACKETS, txr->hn_stat_pkts); 2890 #ifdef HN_IFSTART_SUPPORT 2891 if (!hn_use_if_start) 2892 #endif 2893 { 2894 if_inc_counter(ifp, IFCOUNTER_OBYTES, 2895 txr->hn_stat_size); 2896 if (txr->hn_stat_mcasts != 0) { 2897 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 2898 txr->hn_stat_mcasts); 2899 } 2900 } 2901 txr->hn_pkts += txr->hn_stat_pkts; 2902 txr->hn_sends++; 2903 } 2904 if (has_bpf) 2905 hn_txdesc_put(txr, txd); 2906 2907 if (__predict_false(error)) { 2908 int freed; 2909 2910 /* 2911 * This should "really rarely" happen. 2912 * 2913 * XXX Too many RX to be acked or too many sideband 2914 * commands to run? Ask netvsc_channel_rollup() 2915 * to kick start later. 2916 */ 2917 txr->hn_has_txeof = 1; 2918 if (!send_failed) { 2919 txr->hn_send_failed++; 2920 send_failed = 1; 2921 /* 2922 * Try sending again after set hn_has_txeof; 2923 * in case that we missed the last 2924 * netvsc_channel_rollup(). 2925 */ 2926 goto again; 2927 } 2928 if_printf(ifp, "send failed\n"); 2929 2930 /* 2931 * Caller will perform further processing on the 2932 * associated mbuf, so don't free it in hn_txdesc_put(); 2933 * only unload it from the DMA map in hn_txdesc_put(), 2934 * if it was loaded. 2935 */ 2936 txd->m = NULL; 2937 freed = hn_txdesc_put(txr, txd); 2938 KASSERT(freed != 0, 2939 ("fail to free txd upon send error")); 2940 2941 txr->hn_send_failed++; 2942 } 2943 2944 /* Reset temporary stats, after this sending is done. */ 2945 txr->hn_stat_size = 0; 2946 txr->hn_stat_pkts = 0; 2947 txr->hn_stat_mcasts = 0; 2948 2949 return (error); 2950 } 2951 2952 /* 2953 * Append the specified data to the indicated mbuf chain, 2954 * Extend the mbuf chain if the new data does not fit in 2955 * existing space. 2956 * 2957 * This is a minor rewrite of m_append() from sys/kern/uipc_mbuf.c. 2958 * There should be an equivalent in the kernel mbuf code, 2959 * but there does not appear to be one yet. 2960 * 2961 * Differs from m_append() in that additional mbufs are 2962 * allocated with cluster size MJUMPAGESIZE, and filled 2963 * accordingly. 2964 * 2965 * Return 1 if able to complete the job; otherwise 0. 2966 */ 2967 static int 2968 hv_m_append(struct mbuf *m0, int len, c_caddr_t cp) 2969 { 2970 struct mbuf *m, *n; 2971 int remainder, space; 2972 2973 for (m = m0; m->m_next != NULL; m = m->m_next) 2974 ; 2975 remainder = len; 2976 space = M_TRAILINGSPACE(m); 2977 if (space > 0) { 2978 /* 2979 * Copy into available space. 2980 */ 2981 if (space > remainder) 2982 space = remainder; 2983 bcopy(cp, mtod(m, caddr_t) + m->m_len, space); 2984 m->m_len += space; 2985 cp += space; 2986 remainder -= space; 2987 } 2988 while (remainder > 0) { 2989 /* 2990 * Allocate a new mbuf; could check space 2991 * and allocate a cluster instead. 2992 */ 2993 n = m_getjcl(M_NOWAIT, m->m_type, 0, MJUMPAGESIZE); 2994 if (n == NULL) 2995 break; 2996 n->m_len = min(MJUMPAGESIZE, remainder); 2997 bcopy(cp, mtod(n, caddr_t), n->m_len); 2998 cp += n->m_len; 2999 remainder -= n->m_len; 3000 m->m_next = n; 3001 m = n; 3002 } 3003 if (m0->m_flags & M_PKTHDR) 3004 m0->m_pkthdr.len += len - remainder; 3005 3006 return (remainder == 0); 3007 } 3008 3009 #if defined(INET) || defined(INET6) 3010 static __inline int 3011 hn_lro_rx(struct lro_ctrl *lc, struct mbuf *m) 3012 { 3013 #if __FreeBSD_version >= 1100095 3014 if (hn_lro_mbufq_depth) { 3015 tcp_lro_queue_mbuf(lc, m); 3016 return 0; 3017 } 3018 #endif 3019 return tcp_lro_rx(lc, m, 0); 3020 } 3021 #endif 3022 3023 static int 3024 hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen, 3025 const struct hn_rxinfo *info) 3026 { 3027 struct ifnet *ifp, *hn_ifp = rxr->hn_ifp; 3028 struct mbuf *m_new; 3029 int size, do_lro = 0, do_csum = 1; 3030 int hash_type; 3031 3032 /* 3033 * If the non-transparent mode VF is active, inject this packet 3034 * into the VF. 3035 */ 3036 ifp = rxr->hn_rxvf_ifp ? rxr->hn_rxvf_ifp : hn_ifp; 3037 3038 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 3039 /* 3040 * NOTE: 3041 * See the NOTE of hn_rndis_init_fixat(). This 3042 * function can be reached, immediately after the 3043 * RNDIS is initialized but before the ifnet is 3044 * setup on the hn_attach() path; drop the unexpected 3045 * packets. 3046 */ 3047 return (0); 3048 } 3049 3050 if (__predict_false(dlen < ETHER_HDR_LEN)) { 3051 if_inc_counter(hn_ifp, IFCOUNTER_IERRORS, 1); 3052 return (0); 3053 } 3054 3055 if (dlen <= MHLEN) { 3056 m_new = m_gethdr(M_NOWAIT, MT_DATA); 3057 if (m_new == NULL) { 3058 if_inc_counter(hn_ifp, IFCOUNTER_IQDROPS, 1); 3059 return (0); 3060 } 3061 memcpy(mtod(m_new, void *), data, dlen); 3062 m_new->m_pkthdr.len = m_new->m_len = dlen; 3063 rxr->hn_small_pkts++; 3064 } else { 3065 /* 3066 * Get an mbuf with a cluster. For packets 2K or less, 3067 * get a standard 2K cluster. For anything larger, get a 3068 * 4K cluster. Any buffers larger than 4K can cause problems 3069 * if looped around to the Hyper-V TX channel, so avoid them. 3070 */ 3071 size = MCLBYTES; 3072 if (dlen > MCLBYTES) { 3073 /* 4096 */ 3074 size = MJUMPAGESIZE; 3075 } 3076 3077 m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size); 3078 if (m_new == NULL) { 3079 if_inc_counter(hn_ifp, IFCOUNTER_IQDROPS, 1); 3080 return (0); 3081 } 3082 3083 hv_m_append(m_new, dlen, data); 3084 } 3085 m_new->m_pkthdr.rcvif = ifp; 3086 3087 if (__predict_false((hn_ifp->if_capenable & IFCAP_RXCSUM) == 0)) 3088 do_csum = 0; 3089 3090 /* receive side checksum offload */ 3091 if (info->csum_info != HN_NDIS_RXCSUM_INFO_INVALID) { 3092 /* IP csum offload */ 3093 if ((info->csum_info & NDIS_RXCSUM_INFO_IPCS_OK) && do_csum) { 3094 m_new->m_pkthdr.csum_flags |= 3095 (CSUM_IP_CHECKED | CSUM_IP_VALID); 3096 rxr->hn_csum_ip++; 3097 } 3098 3099 /* TCP/UDP csum offload */ 3100 if ((info->csum_info & (NDIS_RXCSUM_INFO_UDPCS_OK | 3101 NDIS_RXCSUM_INFO_TCPCS_OK)) && do_csum) { 3102 m_new->m_pkthdr.csum_flags |= 3103 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 3104 m_new->m_pkthdr.csum_data = 0xffff; 3105 if (info->csum_info & NDIS_RXCSUM_INFO_TCPCS_OK) 3106 rxr->hn_csum_tcp++; 3107 else 3108 rxr->hn_csum_udp++; 3109 } 3110 3111 /* 3112 * XXX 3113 * As of this write (Oct 28th, 2016), host side will turn 3114 * on only TCPCS_OK and IPCS_OK even for UDP datagrams, so 3115 * the do_lro setting here is actually _not_ accurate. We 3116 * depend on the RSS hash type check to reset do_lro. 3117 */ 3118 if ((info->csum_info & 3119 (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) == 3120 (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) 3121 do_lro = 1; 3122 } else { 3123 const struct ether_header *eh; 3124 uint16_t etype; 3125 int hoff; 3126 3127 hoff = sizeof(*eh); 3128 /* Checked at the beginning of this function. */ 3129 KASSERT(m_new->m_len >= hoff, ("not ethernet frame")); 3130 3131 eh = mtod(m_new, struct ether_header *); 3132 etype = ntohs(eh->ether_type); 3133 if (etype == ETHERTYPE_VLAN) { 3134 const struct ether_vlan_header *evl; 3135 3136 hoff = sizeof(*evl); 3137 if (m_new->m_len < hoff) 3138 goto skip; 3139 evl = mtod(m_new, struct ether_vlan_header *); 3140 etype = ntohs(evl->evl_proto); 3141 } 3142 3143 if (etype == ETHERTYPE_IP) { 3144 int pr; 3145 3146 pr = hn_check_iplen(m_new, hoff); 3147 if (pr == IPPROTO_TCP) { 3148 if (do_csum && 3149 (rxr->hn_trust_hcsum & 3150 HN_TRUST_HCSUM_TCP)) { 3151 rxr->hn_csum_trusted++; 3152 m_new->m_pkthdr.csum_flags |= 3153 (CSUM_IP_CHECKED | CSUM_IP_VALID | 3154 CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 3155 m_new->m_pkthdr.csum_data = 0xffff; 3156 } 3157 do_lro = 1; 3158 } else if (pr == IPPROTO_UDP) { 3159 if (do_csum && 3160 (rxr->hn_trust_hcsum & 3161 HN_TRUST_HCSUM_UDP)) { 3162 rxr->hn_csum_trusted++; 3163 m_new->m_pkthdr.csum_flags |= 3164 (CSUM_IP_CHECKED | CSUM_IP_VALID | 3165 CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 3166 m_new->m_pkthdr.csum_data = 0xffff; 3167 } 3168 } else if (pr != IPPROTO_DONE && do_csum && 3169 (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_IP)) { 3170 rxr->hn_csum_trusted++; 3171 m_new->m_pkthdr.csum_flags |= 3172 (CSUM_IP_CHECKED | CSUM_IP_VALID); 3173 } 3174 } 3175 } 3176 skip: 3177 if (info->vlan_info != HN_NDIS_VLAN_INFO_INVALID) { 3178 m_new->m_pkthdr.ether_vtag = EVL_MAKETAG( 3179 NDIS_VLAN_INFO_ID(info->vlan_info), 3180 NDIS_VLAN_INFO_PRI(info->vlan_info), 3181 NDIS_VLAN_INFO_CFI(info->vlan_info)); 3182 m_new->m_flags |= M_VLANTAG; 3183 } 3184 3185 /* 3186 * If VF is activated (tranparent/non-transparent mode does not 3187 * matter here). 3188 * 3189 * - Don't setup mbuf hash, if 'options RSS' is set. 3190 * 3191 * In Azure, when VF is activated, TCP SYN and SYN|ACK go 3192 * through hn(4) while the rest of segments and ACKs belonging 3193 * to the same TCP 4-tuple go through the VF. So don't setup 3194 * mbuf hash, if a VF is activated and 'options RSS' is not 3195 * enabled. hn(4) and the VF may use neither the same RSS 3196 * hash key nor the same RSS hash function, so the hash value 3197 * for packets belonging to the same flow could be different! 3198 * 3199 * - Disable LRO 3200 * 3201 * hn(4) will only receive broadcast packets, multicast packets, 3202 * TCP SYN and SYN|ACK (in Azure), LRO is useless for these 3203 * packet types. 3204 * 3205 * For non-transparent, we definitely _cannot_ enable LRO at 3206 * all, since the LRO flush will use hn(4) as the receiving 3207 * interface; i.e. hn_ifp->if_input(hn_ifp, m). 3208 */ 3209 if (hn_ifp != ifp || (rxr->hn_rx_flags & HN_RX_FLAG_XPNT_VF)) { 3210 do_lro = 0; /* disable LRO. */ 3211 #ifndef RSS 3212 goto skip_hash; /* skip mbuf hash setup */ 3213 #endif 3214 } 3215 3216 if (info->hash_info != HN_NDIS_HASH_INFO_INVALID) { 3217 rxr->hn_rss_pkts++; 3218 m_new->m_pkthdr.flowid = info->hash_value; 3219 hash_type = M_HASHTYPE_OPAQUE_HASH; 3220 if ((info->hash_info & NDIS_HASH_FUNCTION_MASK) == 3221 NDIS_HASH_FUNCTION_TOEPLITZ) { 3222 uint32_t type = (info->hash_info & NDIS_HASH_TYPE_MASK); 3223 3224 /* 3225 * NOTE: 3226 * do_lro is resetted, if the hash types are not TCP 3227 * related. See the comment in the above csum_flags 3228 * setup section. 3229 */ 3230 switch (type) { 3231 case NDIS_HASH_IPV4: 3232 hash_type = M_HASHTYPE_RSS_IPV4; 3233 do_lro = 0; 3234 break; 3235 3236 case NDIS_HASH_TCP_IPV4: 3237 hash_type = M_HASHTYPE_RSS_TCP_IPV4; 3238 break; 3239 3240 case NDIS_HASH_IPV6: 3241 hash_type = M_HASHTYPE_RSS_IPV6; 3242 do_lro = 0; 3243 break; 3244 3245 case NDIS_HASH_IPV6_EX: 3246 hash_type = M_HASHTYPE_RSS_IPV6_EX; 3247 do_lro = 0; 3248 break; 3249 3250 case NDIS_HASH_TCP_IPV6: 3251 hash_type = M_HASHTYPE_RSS_TCP_IPV6; 3252 break; 3253 3254 case NDIS_HASH_TCP_IPV6_EX: 3255 hash_type = M_HASHTYPE_RSS_TCP_IPV6_EX; 3256 break; 3257 } 3258 } 3259 } else { 3260 m_new->m_pkthdr.flowid = rxr->hn_rx_idx; 3261 hash_type = M_HASHTYPE_OPAQUE; 3262 } 3263 M_HASHTYPE_SET(m_new, hash_type); 3264 3265 #ifndef RSS 3266 skip_hash: 3267 #endif 3268 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); 3269 if (hn_ifp != ifp) { 3270 const struct ether_header *eh; 3271 3272 /* 3273 * Non-transparent mode VF is activated. 3274 */ 3275 3276 /* 3277 * Allow tapping on hn(4). 3278 */ 3279 ETHER_BPF_MTAP(hn_ifp, m_new); 3280 3281 /* 3282 * Update hn(4)'s stats. 3283 */ 3284 if_inc_counter(hn_ifp, IFCOUNTER_IPACKETS, 1); 3285 if_inc_counter(hn_ifp, IFCOUNTER_IBYTES, m_new->m_pkthdr.len); 3286 /* Checked at the beginning of this function. */ 3287 KASSERT(m_new->m_len >= ETHER_HDR_LEN, ("not ethernet frame")); 3288 eh = mtod(m_new, struct ether_header *); 3289 if (ETHER_IS_MULTICAST(eh->ether_dhost)) 3290 if_inc_counter(hn_ifp, IFCOUNTER_IMCASTS, 1); 3291 } 3292 rxr->hn_pkts++; 3293 3294 if ((hn_ifp->if_capenable & IFCAP_LRO) && do_lro) { 3295 #if defined(INET) || defined(INET6) 3296 struct lro_ctrl *lro = &rxr->hn_lro; 3297 3298 if (lro->lro_cnt) { 3299 rxr->hn_lro_tried++; 3300 if (hn_lro_rx(lro, m_new) == 0) { 3301 /* DONE! */ 3302 return 0; 3303 } 3304 } 3305 #endif 3306 } 3307 ifp->if_input(ifp, m_new); 3308 3309 return (0); 3310 } 3311 3312 static int 3313 hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 3314 { 3315 struct hn_softc *sc = ifp->if_softc; 3316 struct ifreq *ifr = (struct ifreq *)data, ifr_vf; 3317 struct ifnet *vf_ifp; 3318 int mask, error = 0; 3319 3320 switch (cmd) { 3321 case SIOCSIFMTU: 3322 if (ifr->ifr_mtu > HN_MTU_MAX) { 3323 error = EINVAL; 3324 break; 3325 } 3326 3327 HN_LOCK(sc); 3328 3329 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) { 3330 HN_UNLOCK(sc); 3331 break; 3332 } 3333 3334 if ((sc->hn_caps & HN_CAP_MTU) == 0) { 3335 /* Can't change MTU */ 3336 HN_UNLOCK(sc); 3337 error = EOPNOTSUPP; 3338 break; 3339 } 3340 3341 if (ifp->if_mtu == ifr->ifr_mtu) { 3342 HN_UNLOCK(sc); 3343 break; 3344 } 3345 3346 if (hn_xpnt_vf_isready(sc)) { 3347 vf_ifp = sc->hn_vf_ifp; 3348 ifr_vf = *ifr; 3349 strlcpy(ifr_vf.ifr_name, vf_ifp->if_xname, 3350 sizeof(ifr_vf.ifr_name)); 3351 error = vf_ifp->if_ioctl(vf_ifp, SIOCSIFMTU, 3352 (caddr_t)&ifr_vf); 3353 if (error) { 3354 HN_UNLOCK(sc); 3355 if_printf(ifp, "%s SIOCSIFMTU %d failed: %d\n", 3356 vf_ifp->if_xname, ifr->ifr_mtu, error); 3357 break; 3358 } 3359 } 3360 3361 /* 3362 * Suspend this interface before the synthetic parts 3363 * are ripped. 3364 */ 3365 hn_suspend(sc); 3366 3367 /* 3368 * Detach the synthetics parts, i.e. NVS and RNDIS. 3369 */ 3370 hn_synth_detach(sc); 3371 3372 /* 3373 * Reattach the synthetic parts, i.e. NVS and RNDIS, 3374 * with the new MTU setting. 3375 */ 3376 error = hn_synth_attach(sc, ifr->ifr_mtu); 3377 if (error) { 3378 HN_UNLOCK(sc); 3379 break; 3380 } 3381 3382 /* 3383 * Commit the requested MTU, after the synthetic parts 3384 * have been successfully attached. 3385 */ 3386 ifp->if_mtu = ifr->ifr_mtu; 3387 3388 /* 3389 * Synthetic parts' reattach may change the chimney 3390 * sending size; update it. 3391 */ 3392 if (sc->hn_tx_ring[0].hn_chim_size > sc->hn_chim_szmax) 3393 hn_set_chim_size(sc, sc->hn_chim_szmax); 3394 3395 /* 3396 * Make sure that various parameters based on MTU are 3397 * still valid, after the MTU change. 3398 */ 3399 hn_mtu_change_fixup(sc); 3400 3401 /* 3402 * All done! Resume the interface now. 3403 */ 3404 hn_resume(sc); 3405 3406 if ((sc->hn_flags & HN_FLAG_RXVF) || 3407 (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) { 3408 /* 3409 * Since we have reattached the NVS part, 3410 * change the datapath to VF again; in case 3411 * that it is lost, after the NVS was detached. 3412 */ 3413 hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_VF); 3414 } 3415 3416 HN_UNLOCK(sc); 3417 break; 3418 3419 case SIOCSIFFLAGS: 3420 HN_LOCK(sc); 3421 3422 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) { 3423 HN_UNLOCK(sc); 3424 break; 3425 } 3426 3427 if (hn_xpnt_vf_isready(sc)) 3428 hn_xpnt_vf_saveifflags(sc); 3429 3430 if (ifp->if_flags & IFF_UP) { 3431 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 3432 /* 3433 * Caller meight hold mutex, e.g. 3434 * bpf; use busy-wait for the RNDIS 3435 * reply. 3436 */ 3437 HN_NO_SLEEPING(sc); 3438 hn_rxfilter_config(sc); 3439 HN_SLEEPING_OK(sc); 3440 3441 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) 3442 error = hn_xpnt_vf_iocsetflags(sc); 3443 } else { 3444 hn_init_locked(sc); 3445 } 3446 } else { 3447 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 3448 hn_stop(sc, false); 3449 } 3450 sc->hn_if_flags = ifp->if_flags; 3451 3452 HN_UNLOCK(sc); 3453 break; 3454 3455 case SIOCSIFCAP: 3456 HN_LOCK(sc); 3457 3458 if (hn_xpnt_vf_isready(sc)) { 3459 ifr_vf = *ifr; 3460 strlcpy(ifr_vf.ifr_name, sc->hn_vf_ifp->if_xname, 3461 sizeof(ifr_vf.ifr_name)); 3462 error = hn_xpnt_vf_iocsetcaps(sc, &ifr_vf); 3463 HN_UNLOCK(sc); 3464 break; 3465 } 3466 3467 /* 3468 * Fix up requested capabilities w/ supported capabilities, 3469 * since the supported capabilities could have been changed. 3470 */ 3471 mask = (ifr->ifr_reqcap & ifp->if_capabilities) ^ 3472 ifp->if_capenable; 3473 3474 if (mask & IFCAP_TXCSUM) { 3475 ifp->if_capenable ^= IFCAP_TXCSUM; 3476 if (ifp->if_capenable & IFCAP_TXCSUM) 3477 ifp->if_hwassist |= HN_CSUM_IP_HWASSIST(sc); 3478 else 3479 ifp->if_hwassist &= ~HN_CSUM_IP_HWASSIST(sc); 3480 } 3481 if (mask & IFCAP_TXCSUM_IPV6) { 3482 ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; 3483 if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) 3484 ifp->if_hwassist |= HN_CSUM_IP6_HWASSIST(sc); 3485 else 3486 ifp->if_hwassist &= ~HN_CSUM_IP6_HWASSIST(sc); 3487 } 3488 3489 /* TODO: flip RNDIS offload parameters for RXCSUM. */ 3490 if (mask & IFCAP_RXCSUM) 3491 ifp->if_capenable ^= IFCAP_RXCSUM; 3492 #ifdef foo 3493 /* We can't diff IPv6 packets from IPv4 packets on RX path. */ 3494 if (mask & IFCAP_RXCSUM_IPV6) 3495 ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; 3496 #endif 3497 3498 if (mask & IFCAP_LRO) 3499 ifp->if_capenable ^= IFCAP_LRO; 3500 3501 if (mask & IFCAP_TSO4) { 3502 ifp->if_capenable ^= IFCAP_TSO4; 3503 if (ifp->if_capenable & IFCAP_TSO4) 3504 ifp->if_hwassist |= CSUM_IP_TSO; 3505 else 3506 ifp->if_hwassist &= ~CSUM_IP_TSO; 3507 } 3508 if (mask & IFCAP_TSO6) { 3509 ifp->if_capenable ^= IFCAP_TSO6; 3510 if (ifp->if_capenable & IFCAP_TSO6) 3511 ifp->if_hwassist |= CSUM_IP6_TSO; 3512 else 3513 ifp->if_hwassist &= ~CSUM_IP6_TSO; 3514 } 3515 3516 HN_UNLOCK(sc); 3517 break; 3518 3519 case SIOCADDMULTI: 3520 case SIOCDELMULTI: 3521 HN_LOCK(sc); 3522 3523 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) { 3524 HN_UNLOCK(sc); 3525 break; 3526 } 3527 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 3528 /* 3529 * Multicast uses mutex; use busy-wait for 3530 * the RNDIS reply. 3531 */ 3532 HN_NO_SLEEPING(sc); 3533 hn_rxfilter_config(sc); 3534 HN_SLEEPING_OK(sc); 3535 } 3536 3537 /* XXX vlan(4) style mcast addr maintenance */ 3538 if (hn_xpnt_vf_isready(sc)) { 3539 int old_if_flags; 3540 3541 old_if_flags = sc->hn_vf_ifp->if_flags; 3542 hn_xpnt_vf_saveifflags(sc); 3543 3544 if ((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) && 3545 ((old_if_flags ^ sc->hn_vf_ifp->if_flags) & 3546 IFF_ALLMULTI)) 3547 error = hn_xpnt_vf_iocsetflags(sc); 3548 } 3549 3550 HN_UNLOCK(sc); 3551 break; 3552 3553 case SIOCSIFMEDIA: 3554 case SIOCGIFMEDIA: 3555 HN_LOCK(sc); 3556 if (hn_xpnt_vf_isready(sc)) { 3557 /* 3558 * SIOCGIFMEDIA expects ifmediareq, so don't 3559 * create and pass ifr_vf to the VF here; just 3560 * replace the ifr_name. 3561 */ 3562 vf_ifp = sc->hn_vf_ifp; 3563 strlcpy(ifr->ifr_name, vf_ifp->if_xname, 3564 sizeof(ifr->ifr_name)); 3565 error = vf_ifp->if_ioctl(vf_ifp, cmd, data); 3566 /* Restore the ifr_name. */ 3567 strlcpy(ifr->ifr_name, ifp->if_xname, 3568 sizeof(ifr->ifr_name)); 3569 HN_UNLOCK(sc); 3570 break; 3571 } 3572 HN_UNLOCK(sc); 3573 error = ifmedia_ioctl(ifp, ifr, &sc->hn_media, cmd); 3574 break; 3575 3576 default: 3577 error = ether_ioctl(ifp, cmd, data); 3578 break; 3579 } 3580 return (error); 3581 } 3582 3583 static void 3584 hn_stop(struct hn_softc *sc, bool detaching) 3585 { 3586 struct ifnet *ifp = sc->hn_ifp; 3587 int i; 3588 3589 HN_LOCK_ASSERT(sc); 3590 3591 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, 3592 ("synthetic parts were not attached")); 3593 3594 /* Clear RUNNING bit ASAP. */ 3595 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_RUNNING); 3596 3597 /* Disable polling. */ 3598 hn_polling(sc, 0); 3599 3600 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) { 3601 KASSERT(sc->hn_vf_ifp != NULL, 3602 ("%s: VF is not attached", ifp->if_xname)); 3603 3604 /* Mark transparent mode VF as disabled. */ 3605 hn_xpnt_vf_setdisable(sc, false /* keep hn_vf_ifp */); 3606 3607 /* 3608 * NOTE: 3609 * Datapath setting must happen _before_ bringing 3610 * the VF down. 3611 */ 3612 hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_SYNTH); 3613 3614 /* 3615 * Bring the VF down. 3616 */ 3617 hn_xpnt_vf_saveifflags(sc); 3618 sc->hn_vf_ifp->if_flags &= ~IFF_UP; 3619 hn_xpnt_vf_iocsetflags(sc); 3620 } 3621 3622 /* Suspend data transfers. */ 3623 hn_suspend_data(sc); 3624 3625 /* Clear OACTIVE bit. */ 3626 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 3627 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) 3628 sc->hn_tx_ring[i].hn_oactive = 0; 3629 3630 /* 3631 * If the non-transparent mode VF is active, make sure 3632 * that the RX filter still allows packet reception. 3633 */ 3634 if (!detaching && (sc->hn_flags & HN_FLAG_RXVF)) 3635 hn_rxfilter_config(sc); 3636 } 3637 3638 static void 3639 hn_init_locked(struct hn_softc *sc) 3640 { 3641 struct ifnet *ifp = sc->hn_ifp; 3642 int i; 3643 3644 HN_LOCK_ASSERT(sc); 3645 3646 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) 3647 return; 3648 3649 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 3650 return; 3651 3652 /* Configure RX filter */ 3653 hn_rxfilter_config(sc); 3654 3655 /* Clear OACTIVE bit. */ 3656 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 3657 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) 3658 sc->hn_tx_ring[i].hn_oactive = 0; 3659 3660 /* Clear TX 'suspended' bit. */ 3661 hn_resume_tx(sc, sc->hn_tx_ring_inuse); 3662 3663 if (hn_xpnt_vf_isready(sc)) { 3664 /* Initialize transparent VF. */ 3665 hn_xpnt_vf_init(sc); 3666 } 3667 3668 /* Everything is ready; unleash! */ 3669 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_RUNNING); 3670 3671 /* Re-enable polling if requested. */ 3672 if (sc->hn_pollhz > 0) 3673 hn_polling(sc, sc->hn_pollhz); 3674 } 3675 3676 static void 3677 hn_init(void *xsc) 3678 { 3679 struct hn_softc *sc = xsc; 3680 3681 HN_LOCK(sc); 3682 hn_init_locked(sc); 3683 HN_UNLOCK(sc); 3684 } 3685 3686 #if __FreeBSD_version >= 1100099 3687 3688 static int 3689 hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS) 3690 { 3691 struct hn_softc *sc = arg1; 3692 unsigned int lenlim; 3693 int error; 3694 3695 lenlim = sc->hn_rx_ring[0].hn_lro.lro_length_lim; 3696 error = sysctl_handle_int(oidp, &lenlim, 0, req); 3697 if (error || req->newptr == NULL) 3698 return error; 3699 3700 HN_LOCK(sc); 3701 if (lenlim < HN_LRO_LENLIM_MIN(sc->hn_ifp) || 3702 lenlim > TCP_LRO_LENGTH_MAX) { 3703 HN_UNLOCK(sc); 3704 return EINVAL; 3705 } 3706 hn_set_lro_lenlim(sc, lenlim); 3707 HN_UNLOCK(sc); 3708 3709 return 0; 3710 } 3711 3712 static int 3713 hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS) 3714 { 3715 struct hn_softc *sc = arg1; 3716 int ackcnt, error, i; 3717 3718 /* 3719 * lro_ackcnt_lim is append count limit, 3720 * +1 to turn it into aggregation limit. 3721 */ 3722 ackcnt = sc->hn_rx_ring[0].hn_lro.lro_ackcnt_lim + 1; 3723 error = sysctl_handle_int(oidp, &ackcnt, 0, req); 3724 if (error || req->newptr == NULL) 3725 return error; 3726 3727 if (ackcnt < 2 || ackcnt > (TCP_LRO_ACKCNT_MAX + 1)) 3728 return EINVAL; 3729 3730 /* 3731 * Convert aggregation limit back to append 3732 * count limit. 3733 */ 3734 --ackcnt; 3735 HN_LOCK(sc); 3736 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 3737 sc->hn_rx_ring[i].hn_lro.lro_ackcnt_lim = ackcnt; 3738 HN_UNLOCK(sc); 3739 return 0; 3740 } 3741 3742 #endif 3743 3744 static int 3745 hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS) 3746 { 3747 struct hn_softc *sc = arg1; 3748 int hcsum = arg2; 3749 int on, error, i; 3750 3751 on = 0; 3752 if (sc->hn_rx_ring[0].hn_trust_hcsum & hcsum) 3753 on = 1; 3754 3755 error = sysctl_handle_int(oidp, &on, 0, req); 3756 if (error || req->newptr == NULL) 3757 return error; 3758 3759 HN_LOCK(sc); 3760 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 3761 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 3762 3763 if (on) 3764 rxr->hn_trust_hcsum |= hcsum; 3765 else 3766 rxr->hn_trust_hcsum &= ~hcsum; 3767 } 3768 HN_UNLOCK(sc); 3769 return 0; 3770 } 3771 3772 static int 3773 hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS) 3774 { 3775 struct hn_softc *sc = arg1; 3776 int chim_size, error; 3777 3778 chim_size = sc->hn_tx_ring[0].hn_chim_size; 3779 error = sysctl_handle_int(oidp, &chim_size, 0, req); 3780 if (error || req->newptr == NULL) 3781 return error; 3782 3783 if (chim_size > sc->hn_chim_szmax || chim_size <= 0) 3784 return EINVAL; 3785 3786 HN_LOCK(sc); 3787 hn_set_chim_size(sc, chim_size); 3788 HN_UNLOCK(sc); 3789 return 0; 3790 } 3791 3792 #if __FreeBSD_version < 1100095 3793 static int 3794 hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS) 3795 { 3796 struct hn_softc *sc = arg1; 3797 int ofs = arg2, i, error; 3798 struct hn_rx_ring *rxr; 3799 uint64_t stat; 3800 3801 stat = 0; 3802 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 3803 rxr = &sc->hn_rx_ring[i]; 3804 stat += *((int *)((uint8_t *)rxr + ofs)); 3805 } 3806 3807 error = sysctl_handle_64(oidp, &stat, 0, req); 3808 if (error || req->newptr == NULL) 3809 return error; 3810 3811 /* Zero out this stat. */ 3812 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 3813 rxr = &sc->hn_rx_ring[i]; 3814 *((int *)((uint8_t *)rxr + ofs)) = 0; 3815 } 3816 return 0; 3817 } 3818 #else 3819 static int 3820 hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS) 3821 { 3822 struct hn_softc *sc = arg1; 3823 int ofs = arg2, i, error; 3824 struct hn_rx_ring *rxr; 3825 uint64_t stat; 3826 3827 stat = 0; 3828 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 3829 rxr = &sc->hn_rx_ring[i]; 3830 stat += *((uint64_t *)((uint8_t *)rxr + ofs)); 3831 } 3832 3833 error = sysctl_handle_64(oidp, &stat, 0, req); 3834 if (error || req->newptr == NULL) 3835 return error; 3836 3837 /* Zero out this stat. */ 3838 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 3839 rxr = &sc->hn_rx_ring[i]; 3840 *((uint64_t *)((uint8_t *)rxr + ofs)) = 0; 3841 } 3842 return 0; 3843 } 3844 3845 #endif 3846 3847 static int 3848 hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS) 3849 { 3850 struct hn_softc *sc = arg1; 3851 int ofs = arg2, i, error; 3852 struct hn_rx_ring *rxr; 3853 u_long stat; 3854 3855 stat = 0; 3856 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 3857 rxr = &sc->hn_rx_ring[i]; 3858 stat += *((u_long *)((uint8_t *)rxr + ofs)); 3859 } 3860 3861 error = sysctl_handle_long(oidp, &stat, 0, req); 3862 if (error || req->newptr == NULL) 3863 return error; 3864 3865 /* Zero out this stat. */ 3866 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 3867 rxr = &sc->hn_rx_ring[i]; 3868 *((u_long *)((uint8_t *)rxr + ofs)) = 0; 3869 } 3870 return 0; 3871 } 3872 3873 static int 3874 hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS) 3875 { 3876 struct hn_softc *sc = arg1; 3877 int ofs = arg2, i, error; 3878 struct hn_tx_ring *txr; 3879 u_long stat; 3880 3881 stat = 0; 3882 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 3883 txr = &sc->hn_tx_ring[i]; 3884 stat += *((u_long *)((uint8_t *)txr + ofs)); 3885 } 3886 3887 error = sysctl_handle_long(oidp, &stat, 0, req); 3888 if (error || req->newptr == NULL) 3889 return error; 3890 3891 /* Zero out this stat. */ 3892 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 3893 txr = &sc->hn_tx_ring[i]; 3894 *((u_long *)((uint8_t *)txr + ofs)) = 0; 3895 } 3896 return 0; 3897 } 3898 3899 static int 3900 hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS) 3901 { 3902 struct hn_softc *sc = arg1; 3903 int ofs = arg2, i, error, conf; 3904 struct hn_tx_ring *txr; 3905 3906 txr = &sc->hn_tx_ring[0]; 3907 conf = *((int *)((uint8_t *)txr + ofs)); 3908 3909 error = sysctl_handle_int(oidp, &conf, 0, req); 3910 if (error || req->newptr == NULL) 3911 return error; 3912 3913 HN_LOCK(sc); 3914 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 3915 txr = &sc->hn_tx_ring[i]; 3916 *((int *)((uint8_t *)txr + ofs)) = conf; 3917 } 3918 HN_UNLOCK(sc); 3919 3920 return 0; 3921 } 3922 3923 static int 3924 hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS) 3925 { 3926 struct hn_softc *sc = arg1; 3927 int error, size; 3928 3929 size = sc->hn_agg_size; 3930 error = sysctl_handle_int(oidp, &size, 0, req); 3931 if (error || req->newptr == NULL) 3932 return (error); 3933 3934 HN_LOCK(sc); 3935 sc->hn_agg_size = size; 3936 hn_set_txagg(sc); 3937 HN_UNLOCK(sc); 3938 3939 return (0); 3940 } 3941 3942 static int 3943 hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS) 3944 { 3945 struct hn_softc *sc = arg1; 3946 int error, pkts; 3947 3948 pkts = sc->hn_agg_pkts; 3949 error = sysctl_handle_int(oidp, &pkts, 0, req); 3950 if (error || req->newptr == NULL) 3951 return (error); 3952 3953 HN_LOCK(sc); 3954 sc->hn_agg_pkts = pkts; 3955 hn_set_txagg(sc); 3956 HN_UNLOCK(sc); 3957 3958 return (0); 3959 } 3960 3961 static int 3962 hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS) 3963 { 3964 struct hn_softc *sc = arg1; 3965 int pkts; 3966 3967 pkts = sc->hn_tx_ring[0].hn_agg_pktmax; 3968 return (sysctl_handle_int(oidp, &pkts, 0, req)); 3969 } 3970 3971 static int 3972 hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS) 3973 { 3974 struct hn_softc *sc = arg1; 3975 int align; 3976 3977 align = sc->hn_tx_ring[0].hn_agg_align; 3978 return (sysctl_handle_int(oidp, &align, 0, req)); 3979 } 3980 3981 static void 3982 hn_chan_polling(struct vmbus_channel *chan, u_int pollhz) 3983 { 3984 if (pollhz == 0) 3985 vmbus_chan_poll_disable(chan); 3986 else 3987 vmbus_chan_poll_enable(chan, pollhz); 3988 } 3989 3990 static void 3991 hn_polling(struct hn_softc *sc, u_int pollhz) 3992 { 3993 int nsubch = sc->hn_rx_ring_inuse - 1; 3994 3995 HN_LOCK_ASSERT(sc); 3996 3997 if (nsubch > 0) { 3998 struct vmbus_channel **subch; 3999 int i; 4000 4001 subch = vmbus_subchan_get(sc->hn_prichan, nsubch); 4002 for (i = 0; i < nsubch; ++i) 4003 hn_chan_polling(subch[i], pollhz); 4004 vmbus_subchan_rel(subch, nsubch); 4005 } 4006 hn_chan_polling(sc->hn_prichan, pollhz); 4007 } 4008 4009 static int 4010 hn_polling_sysctl(SYSCTL_HANDLER_ARGS) 4011 { 4012 struct hn_softc *sc = arg1; 4013 int pollhz, error; 4014 4015 pollhz = sc->hn_pollhz; 4016 error = sysctl_handle_int(oidp, &pollhz, 0, req); 4017 if (error || req->newptr == NULL) 4018 return (error); 4019 4020 if (pollhz != 0 && 4021 (pollhz < VMBUS_CHAN_POLLHZ_MIN || pollhz > VMBUS_CHAN_POLLHZ_MAX)) 4022 return (EINVAL); 4023 4024 HN_LOCK(sc); 4025 if (sc->hn_pollhz != pollhz) { 4026 sc->hn_pollhz = pollhz; 4027 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) && 4028 (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED)) 4029 hn_polling(sc, sc->hn_pollhz); 4030 } 4031 HN_UNLOCK(sc); 4032 4033 return (0); 4034 } 4035 4036 static int 4037 hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS) 4038 { 4039 struct hn_softc *sc = arg1; 4040 char verstr[16]; 4041 4042 snprintf(verstr, sizeof(verstr), "%u.%u", 4043 HN_NDIS_VERSION_MAJOR(sc->hn_ndis_ver), 4044 HN_NDIS_VERSION_MINOR(sc->hn_ndis_ver)); 4045 return sysctl_handle_string(oidp, verstr, sizeof(verstr), req); 4046 } 4047 4048 static int 4049 hn_caps_sysctl(SYSCTL_HANDLER_ARGS) 4050 { 4051 struct hn_softc *sc = arg1; 4052 char caps_str[128]; 4053 uint32_t caps; 4054 4055 HN_LOCK(sc); 4056 caps = sc->hn_caps; 4057 HN_UNLOCK(sc); 4058 snprintf(caps_str, sizeof(caps_str), "%b", caps, HN_CAP_BITS); 4059 return sysctl_handle_string(oidp, caps_str, sizeof(caps_str), req); 4060 } 4061 4062 static int 4063 hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS) 4064 { 4065 struct hn_softc *sc = arg1; 4066 char assist_str[128]; 4067 uint32_t hwassist; 4068 4069 HN_LOCK(sc); 4070 hwassist = sc->hn_ifp->if_hwassist; 4071 HN_UNLOCK(sc); 4072 snprintf(assist_str, sizeof(assist_str), "%b", hwassist, CSUM_BITS); 4073 return sysctl_handle_string(oidp, assist_str, sizeof(assist_str), req); 4074 } 4075 4076 static int 4077 hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS) 4078 { 4079 struct hn_softc *sc = arg1; 4080 char filter_str[128]; 4081 uint32_t filter; 4082 4083 HN_LOCK(sc); 4084 filter = sc->hn_rx_filter; 4085 HN_UNLOCK(sc); 4086 snprintf(filter_str, sizeof(filter_str), "%b", filter, 4087 NDIS_PACKET_TYPES); 4088 return sysctl_handle_string(oidp, filter_str, sizeof(filter_str), req); 4089 } 4090 4091 #ifndef RSS 4092 4093 static int 4094 hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS) 4095 { 4096 struct hn_softc *sc = arg1; 4097 int error; 4098 4099 HN_LOCK(sc); 4100 4101 error = SYSCTL_OUT(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key)); 4102 if (error || req->newptr == NULL) 4103 goto back; 4104 4105 error = SYSCTL_IN(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key)); 4106 if (error) 4107 goto back; 4108 sc->hn_flags |= HN_FLAG_HAS_RSSKEY; 4109 4110 if (sc->hn_rx_ring_inuse > 1) { 4111 error = hn_rss_reconfig(sc); 4112 } else { 4113 /* Not RSS capable, at least for now; just save the RSS key. */ 4114 error = 0; 4115 } 4116 back: 4117 HN_UNLOCK(sc); 4118 return (error); 4119 } 4120 4121 static int 4122 hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS) 4123 { 4124 struct hn_softc *sc = arg1; 4125 int error; 4126 4127 HN_LOCK(sc); 4128 4129 error = SYSCTL_OUT(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind)); 4130 if (error || req->newptr == NULL) 4131 goto back; 4132 4133 /* 4134 * Don't allow RSS indirect table change, if this interface is not 4135 * RSS capable currently. 4136 */ 4137 if (sc->hn_rx_ring_inuse == 1) { 4138 error = EOPNOTSUPP; 4139 goto back; 4140 } 4141 4142 error = SYSCTL_IN(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind)); 4143 if (error) 4144 goto back; 4145 sc->hn_flags |= HN_FLAG_HAS_RSSIND; 4146 4147 hn_rss_ind_fixup(sc); 4148 error = hn_rss_reconfig(sc); 4149 back: 4150 HN_UNLOCK(sc); 4151 return (error); 4152 } 4153 4154 #endif /* !RSS */ 4155 4156 static int 4157 hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS) 4158 { 4159 struct hn_softc *sc = arg1; 4160 char hash_str[128]; 4161 uint32_t hash; 4162 4163 HN_LOCK(sc); 4164 hash = sc->hn_rss_hash; 4165 HN_UNLOCK(sc); 4166 snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS); 4167 return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req); 4168 } 4169 4170 static int 4171 hn_vf_sysctl(SYSCTL_HANDLER_ARGS) 4172 { 4173 struct hn_softc *sc = arg1; 4174 char vf_name[IFNAMSIZ + 1]; 4175 struct ifnet *vf_ifp; 4176 4177 HN_LOCK(sc); 4178 vf_name[0] = '\0'; 4179 vf_ifp = sc->hn_vf_ifp; 4180 if (vf_ifp != NULL) 4181 snprintf(vf_name, sizeof(vf_name), "%s", vf_ifp->if_xname); 4182 HN_UNLOCK(sc); 4183 return sysctl_handle_string(oidp, vf_name, sizeof(vf_name), req); 4184 } 4185 4186 static int 4187 hn_rxvf_sysctl(SYSCTL_HANDLER_ARGS) 4188 { 4189 struct hn_softc *sc = arg1; 4190 char vf_name[IFNAMSIZ + 1]; 4191 struct ifnet *vf_ifp; 4192 4193 HN_LOCK(sc); 4194 vf_name[0] = '\0'; 4195 vf_ifp = sc->hn_rx_ring[0].hn_rxvf_ifp; 4196 if (vf_ifp != NULL) 4197 snprintf(vf_name, sizeof(vf_name), "%s", vf_ifp->if_xname); 4198 HN_UNLOCK(sc); 4199 return sysctl_handle_string(oidp, vf_name, sizeof(vf_name), req); 4200 } 4201 4202 static int 4203 hn_vflist_sysctl(SYSCTL_HANDLER_ARGS) 4204 { 4205 struct rm_priotracker pt; 4206 struct sbuf *sb; 4207 int error, i; 4208 bool first; 4209 4210 error = sysctl_wire_old_buffer(req, 0); 4211 if (error != 0) 4212 return (error); 4213 4214 sb = sbuf_new_for_sysctl(NULL, NULL, 128, req); 4215 if (sb == NULL) 4216 return (ENOMEM); 4217 4218 rm_rlock(&hn_vfmap_lock, &pt); 4219 4220 first = true; 4221 for (i = 0; i < hn_vfmap_size; ++i) { 4222 struct ifnet *ifp; 4223 4224 if (hn_vfmap[i] == NULL) 4225 continue; 4226 4227 ifp = ifnet_byindex(i); 4228 if (ifp != NULL) { 4229 if (first) 4230 sbuf_printf(sb, "%s", ifp->if_xname); 4231 else 4232 sbuf_printf(sb, " %s", ifp->if_xname); 4233 first = false; 4234 } 4235 } 4236 4237 rm_runlock(&hn_vfmap_lock, &pt); 4238 4239 error = sbuf_finish(sb); 4240 sbuf_delete(sb); 4241 return (error); 4242 } 4243 4244 static int 4245 hn_vfmap_sysctl(SYSCTL_HANDLER_ARGS) 4246 { 4247 struct rm_priotracker pt; 4248 struct sbuf *sb; 4249 int error, i; 4250 bool first; 4251 4252 error = sysctl_wire_old_buffer(req, 0); 4253 if (error != 0) 4254 return (error); 4255 4256 sb = sbuf_new_for_sysctl(NULL, NULL, 128, req); 4257 if (sb == NULL) 4258 return (ENOMEM); 4259 4260 rm_rlock(&hn_vfmap_lock, &pt); 4261 4262 first = true; 4263 for (i = 0; i < hn_vfmap_size; ++i) { 4264 struct ifnet *ifp, *hn_ifp; 4265 4266 hn_ifp = hn_vfmap[i]; 4267 if (hn_ifp == NULL) 4268 continue; 4269 4270 ifp = ifnet_byindex(i); 4271 if (ifp != NULL) { 4272 if (first) { 4273 sbuf_printf(sb, "%s:%s", ifp->if_xname, 4274 hn_ifp->if_xname); 4275 } else { 4276 sbuf_printf(sb, " %s:%s", ifp->if_xname, 4277 hn_ifp->if_xname); 4278 } 4279 first = false; 4280 } 4281 } 4282 4283 rm_runlock(&hn_vfmap_lock, &pt); 4284 4285 error = sbuf_finish(sb); 4286 sbuf_delete(sb); 4287 return (error); 4288 } 4289 4290 static int 4291 hn_xpnt_vf_accbpf_sysctl(SYSCTL_HANDLER_ARGS) 4292 { 4293 struct hn_softc *sc = arg1; 4294 int error, onoff = 0; 4295 4296 if (sc->hn_xvf_flags & HN_XVFFLAG_ACCBPF) 4297 onoff = 1; 4298 error = sysctl_handle_int(oidp, &onoff, 0, req); 4299 if (error || req->newptr == NULL) 4300 return (error); 4301 4302 HN_LOCK(sc); 4303 /* NOTE: hn_vf_lock for hn_transmit() */ 4304 rm_wlock(&sc->hn_vf_lock); 4305 if (onoff) 4306 sc->hn_xvf_flags |= HN_XVFFLAG_ACCBPF; 4307 else 4308 sc->hn_xvf_flags &= ~HN_XVFFLAG_ACCBPF; 4309 rm_wunlock(&sc->hn_vf_lock); 4310 HN_UNLOCK(sc); 4311 4312 return (0); 4313 } 4314 4315 static int 4316 hn_xpnt_vf_enabled_sysctl(SYSCTL_HANDLER_ARGS) 4317 { 4318 struct hn_softc *sc = arg1; 4319 int enabled = 0; 4320 4321 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) 4322 enabled = 1; 4323 return (sysctl_handle_int(oidp, &enabled, 0, req)); 4324 } 4325 4326 static int 4327 hn_check_iplen(const struct mbuf *m, int hoff) 4328 { 4329 const struct ip *ip; 4330 int len, iphlen, iplen; 4331 const struct tcphdr *th; 4332 int thoff; /* TCP data offset */ 4333 4334 len = hoff + sizeof(struct ip); 4335 4336 /* The packet must be at least the size of an IP header. */ 4337 if (m->m_pkthdr.len < len) 4338 return IPPROTO_DONE; 4339 4340 /* The fixed IP header must reside completely in the first mbuf. */ 4341 if (m->m_len < len) 4342 return IPPROTO_DONE; 4343 4344 ip = mtodo(m, hoff); 4345 4346 /* Bound check the packet's stated IP header length. */ 4347 iphlen = ip->ip_hl << 2; 4348 if (iphlen < sizeof(struct ip)) /* minimum header length */ 4349 return IPPROTO_DONE; 4350 4351 /* The full IP header must reside completely in the one mbuf. */ 4352 if (m->m_len < hoff + iphlen) 4353 return IPPROTO_DONE; 4354 4355 iplen = ntohs(ip->ip_len); 4356 4357 /* 4358 * Check that the amount of data in the buffers is as 4359 * at least much as the IP header would have us expect. 4360 */ 4361 if (m->m_pkthdr.len < hoff + iplen) 4362 return IPPROTO_DONE; 4363 4364 /* 4365 * Ignore IP fragments. 4366 */ 4367 if (ntohs(ip->ip_off) & (IP_OFFMASK | IP_MF)) 4368 return IPPROTO_DONE; 4369 4370 /* 4371 * The TCP/IP or UDP/IP header must be entirely contained within 4372 * the first fragment of a packet. 4373 */ 4374 switch (ip->ip_p) { 4375 case IPPROTO_TCP: 4376 if (iplen < iphlen + sizeof(struct tcphdr)) 4377 return IPPROTO_DONE; 4378 if (m->m_len < hoff + iphlen + sizeof(struct tcphdr)) 4379 return IPPROTO_DONE; 4380 th = (const struct tcphdr *)((const uint8_t *)ip + iphlen); 4381 thoff = th->th_off << 2; 4382 if (thoff < sizeof(struct tcphdr) || thoff + iphlen > iplen) 4383 return IPPROTO_DONE; 4384 if (m->m_len < hoff + iphlen + thoff) 4385 return IPPROTO_DONE; 4386 break; 4387 case IPPROTO_UDP: 4388 if (iplen < iphlen + sizeof(struct udphdr)) 4389 return IPPROTO_DONE; 4390 if (m->m_len < hoff + iphlen + sizeof(struct udphdr)) 4391 return IPPROTO_DONE; 4392 break; 4393 default: 4394 if (iplen < iphlen) 4395 return IPPROTO_DONE; 4396 break; 4397 } 4398 return ip->ip_p; 4399 } 4400 4401 static int 4402 hn_create_rx_data(struct hn_softc *sc, int ring_cnt) 4403 { 4404 struct sysctl_oid_list *child; 4405 struct sysctl_ctx_list *ctx; 4406 device_t dev = sc->hn_dev; 4407 #if defined(INET) || defined(INET6) 4408 #if __FreeBSD_version >= 1100095 4409 int lroent_cnt; 4410 #endif 4411 #endif 4412 int i; 4413 4414 /* 4415 * Create RXBUF for reception. 4416 * 4417 * NOTE: 4418 * - It is shared by all channels. 4419 * - A large enough buffer is allocated, certain version of NVSes 4420 * may further limit the usable space. 4421 */ 4422 sc->hn_rxbuf = hyperv_dmamem_alloc(bus_get_dma_tag(dev), 4423 PAGE_SIZE, 0, HN_RXBUF_SIZE, &sc->hn_rxbuf_dma, 4424 BUS_DMA_WAITOK | BUS_DMA_ZERO); 4425 if (sc->hn_rxbuf == NULL) { 4426 device_printf(sc->hn_dev, "allocate rxbuf failed\n"); 4427 return (ENOMEM); 4428 } 4429 4430 sc->hn_rx_ring_cnt = ring_cnt; 4431 sc->hn_rx_ring_inuse = sc->hn_rx_ring_cnt; 4432 4433 sc->hn_rx_ring = malloc(sizeof(struct hn_rx_ring) * sc->hn_rx_ring_cnt, 4434 M_DEVBUF, M_WAITOK | M_ZERO); 4435 4436 #if defined(INET) || defined(INET6) 4437 #if __FreeBSD_version >= 1100095 4438 lroent_cnt = hn_lro_entry_count; 4439 if (lroent_cnt < TCP_LRO_ENTRIES) 4440 lroent_cnt = TCP_LRO_ENTRIES; 4441 if (bootverbose) 4442 device_printf(dev, "LRO: entry count %d\n", lroent_cnt); 4443 #endif 4444 #endif /* INET || INET6 */ 4445 4446 ctx = device_get_sysctl_ctx(dev); 4447 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); 4448 4449 /* Create dev.hn.UNIT.rx sysctl tree */ 4450 sc->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rx", 4451 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 4452 4453 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4454 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 4455 4456 rxr->hn_br = hyperv_dmamem_alloc(bus_get_dma_tag(dev), 4457 PAGE_SIZE, 0, HN_TXBR_SIZE + HN_RXBR_SIZE, 4458 &rxr->hn_br_dma, BUS_DMA_WAITOK); 4459 if (rxr->hn_br == NULL) { 4460 device_printf(dev, "allocate bufring failed\n"); 4461 return (ENOMEM); 4462 } 4463 4464 if (hn_trust_hosttcp) 4465 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_TCP; 4466 if (hn_trust_hostudp) 4467 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_UDP; 4468 if (hn_trust_hostip) 4469 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_IP; 4470 rxr->hn_ifp = sc->hn_ifp; 4471 if (i < sc->hn_tx_ring_cnt) 4472 rxr->hn_txr = &sc->hn_tx_ring[i]; 4473 rxr->hn_pktbuf_len = HN_PKTBUF_LEN_DEF; 4474 rxr->hn_pktbuf = malloc(rxr->hn_pktbuf_len, M_DEVBUF, M_WAITOK); 4475 rxr->hn_rx_idx = i; 4476 rxr->hn_rxbuf = sc->hn_rxbuf; 4477 4478 /* 4479 * Initialize LRO. 4480 */ 4481 #if defined(INET) || defined(INET6) 4482 #if __FreeBSD_version >= 1100095 4483 tcp_lro_init_args(&rxr->hn_lro, sc->hn_ifp, lroent_cnt, 4484 hn_lro_mbufq_depth); 4485 #else 4486 tcp_lro_init(&rxr->hn_lro); 4487 rxr->hn_lro.ifp = sc->hn_ifp; 4488 #endif 4489 #if __FreeBSD_version >= 1100099 4490 rxr->hn_lro.lro_length_lim = HN_LRO_LENLIM_DEF; 4491 rxr->hn_lro.lro_ackcnt_lim = HN_LRO_ACKCNT_DEF; 4492 #endif 4493 #endif /* INET || INET6 */ 4494 4495 if (sc->hn_rx_sysctl_tree != NULL) { 4496 char name[16]; 4497 4498 /* 4499 * Create per RX ring sysctl tree: 4500 * dev.hn.UNIT.rx.RINGID 4501 */ 4502 snprintf(name, sizeof(name), "%d", i); 4503 rxr->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, 4504 SYSCTL_CHILDREN(sc->hn_rx_sysctl_tree), 4505 OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 4506 4507 if (rxr->hn_rx_sysctl_tree != NULL) { 4508 SYSCTL_ADD_ULONG(ctx, 4509 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), 4510 OID_AUTO, "packets", CTLFLAG_RW, 4511 &rxr->hn_pkts, "# of packets received"); 4512 SYSCTL_ADD_ULONG(ctx, 4513 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), 4514 OID_AUTO, "rss_pkts", CTLFLAG_RW, 4515 &rxr->hn_rss_pkts, 4516 "# of packets w/ RSS info received"); 4517 SYSCTL_ADD_INT(ctx, 4518 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), 4519 OID_AUTO, "pktbuf_len", CTLFLAG_RD, 4520 &rxr->hn_pktbuf_len, 0, 4521 "Temporary channel packet buffer length"); 4522 } 4523 } 4524 } 4525 4526 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_queued", 4527 CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 4528 __offsetof(struct hn_rx_ring, hn_lro.lro_queued), 4529 #if __FreeBSD_version < 1100095 4530 hn_rx_stat_int_sysctl, 4531 #else 4532 hn_rx_stat_u64_sysctl, 4533 #endif 4534 "LU", "LRO queued"); 4535 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_flushed", 4536 CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 4537 __offsetof(struct hn_rx_ring, hn_lro.lro_flushed), 4538 #if __FreeBSD_version < 1100095 4539 hn_rx_stat_int_sysctl, 4540 #else 4541 hn_rx_stat_u64_sysctl, 4542 #endif 4543 "LU", "LRO flushed"); 4544 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_tried", 4545 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 4546 __offsetof(struct hn_rx_ring, hn_lro_tried), 4547 hn_rx_stat_ulong_sysctl, "LU", "# of LRO tries"); 4548 #if __FreeBSD_version >= 1100099 4549 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_length_lim", 4550 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 4551 hn_lro_lenlim_sysctl, "IU", 4552 "Max # of data bytes to be aggregated by LRO"); 4553 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_ackcnt_lim", 4554 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 4555 hn_lro_ackcnt_sysctl, "I", 4556 "Max # of ACKs to be aggregated by LRO"); 4557 #endif 4558 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hosttcp", 4559 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_TCP, 4560 hn_trust_hcsum_sysctl, "I", 4561 "Trust tcp segement verification on host side, " 4562 "when csum info is missing"); 4563 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostudp", 4564 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_UDP, 4565 hn_trust_hcsum_sysctl, "I", 4566 "Trust udp datagram verification on host side, " 4567 "when csum info is missing"); 4568 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostip", 4569 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_IP, 4570 hn_trust_hcsum_sysctl, "I", 4571 "Trust ip packet verification on host side, " 4572 "when csum info is missing"); 4573 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_ip", 4574 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 4575 __offsetof(struct hn_rx_ring, hn_csum_ip), 4576 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM IP"); 4577 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_tcp", 4578 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 4579 __offsetof(struct hn_rx_ring, hn_csum_tcp), 4580 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM TCP"); 4581 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_udp", 4582 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 4583 __offsetof(struct hn_rx_ring, hn_csum_udp), 4584 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM UDP"); 4585 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_trusted", 4586 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 4587 __offsetof(struct hn_rx_ring, hn_csum_trusted), 4588 hn_rx_stat_ulong_sysctl, "LU", 4589 "# of packets that we trust host's csum verification"); 4590 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "small_pkts", 4591 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 4592 __offsetof(struct hn_rx_ring, hn_small_pkts), 4593 hn_rx_stat_ulong_sysctl, "LU", "# of small packets received"); 4594 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_ack_failed", 4595 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 4596 __offsetof(struct hn_rx_ring, hn_ack_failed), 4597 hn_rx_stat_ulong_sysctl, "LU", "# of RXBUF ack failures"); 4598 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_cnt", 4599 CTLFLAG_RD, &sc->hn_rx_ring_cnt, 0, "# created RX rings"); 4600 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_inuse", 4601 CTLFLAG_RD, &sc->hn_rx_ring_inuse, 0, "# used RX rings"); 4602 4603 return (0); 4604 } 4605 4606 static void 4607 hn_destroy_rx_data(struct hn_softc *sc) 4608 { 4609 int i; 4610 4611 if (sc->hn_rxbuf != NULL) { 4612 if ((sc->hn_flags & HN_FLAG_RXBUF_REF) == 0) 4613 hyperv_dmamem_free(&sc->hn_rxbuf_dma, sc->hn_rxbuf); 4614 else 4615 device_printf(sc->hn_dev, "RXBUF is referenced\n"); 4616 sc->hn_rxbuf = NULL; 4617 } 4618 4619 if (sc->hn_rx_ring_cnt == 0) 4620 return; 4621 4622 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4623 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 4624 4625 if (rxr->hn_br == NULL) 4626 continue; 4627 if ((rxr->hn_rx_flags & HN_RX_FLAG_BR_REF) == 0) { 4628 hyperv_dmamem_free(&rxr->hn_br_dma, rxr->hn_br); 4629 } else { 4630 device_printf(sc->hn_dev, 4631 "%dth channel bufring is referenced", i); 4632 } 4633 rxr->hn_br = NULL; 4634 4635 #if defined(INET) || defined(INET6) 4636 tcp_lro_free(&rxr->hn_lro); 4637 #endif 4638 free(rxr->hn_pktbuf, M_DEVBUF); 4639 } 4640 free(sc->hn_rx_ring, M_DEVBUF); 4641 sc->hn_rx_ring = NULL; 4642 4643 sc->hn_rx_ring_cnt = 0; 4644 sc->hn_rx_ring_inuse = 0; 4645 } 4646 4647 static int 4648 hn_tx_ring_create(struct hn_softc *sc, int id) 4649 { 4650 struct hn_tx_ring *txr = &sc->hn_tx_ring[id]; 4651 device_t dev = sc->hn_dev; 4652 bus_dma_tag_t parent_dtag; 4653 int error, i; 4654 4655 txr->hn_sc = sc; 4656 txr->hn_tx_idx = id; 4657 4658 #ifndef HN_USE_TXDESC_BUFRING 4659 mtx_init(&txr->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN); 4660 #endif 4661 mtx_init(&txr->hn_tx_lock, "hn tx", NULL, MTX_DEF); 4662 4663 txr->hn_txdesc_cnt = HN_TX_DESC_CNT; 4664 txr->hn_txdesc = malloc(sizeof(struct hn_txdesc) * txr->hn_txdesc_cnt, 4665 M_DEVBUF, M_WAITOK | M_ZERO); 4666 #ifndef HN_USE_TXDESC_BUFRING 4667 SLIST_INIT(&txr->hn_txlist); 4668 #else 4669 txr->hn_txdesc_br = buf_ring_alloc(txr->hn_txdesc_cnt, M_DEVBUF, 4670 M_WAITOK, &txr->hn_tx_lock); 4671 #endif 4672 4673 if (hn_tx_taskq_mode == HN_TX_TASKQ_M_EVTTQ) { 4674 txr->hn_tx_taskq = VMBUS_GET_EVENT_TASKQ( 4675 device_get_parent(dev), dev, HN_RING_IDX2CPU(sc, id)); 4676 } else { 4677 txr->hn_tx_taskq = sc->hn_tx_taskqs[id % hn_tx_taskq_cnt]; 4678 } 4679 4680 #ifdef HN_IFSTART_SUPPORT 4681 if (hn_use_if_start) { 4682 txr->hn_txeof = hn_start_txeof; 4683 TASK_INIT(&txr->hn_tx_task, 0, hn_start_taskfunc, txr); 4684 TASK_INIT(&txr->hn_txeof_task, 0, hn_start_txeof_taskfunc, txr); 4685 } else 4686 #endif 4687 { 4688 int br_depth; 4689 4690 txr->hn_txeof = hn_xmit_txeof; 4691 TASK_INIT(&txr->hn_tx_task, 0, hn_xmit_taskfunc, txr); 4692 TASK_INIT(&txr->hn_txeof_task, 0, hn_xmit_txeof_taskfunc, txr); 4693 4694 br_depth = hn_get_txswq_depth(txr); 4695 txr->hn_mbuf_br = buf_ring_alloc(br_depth, M_DEVBUF, 4696 M_WAITOK, &txr->hn_tx_lock); 4697 } 4698 4699 txr->hn_direct_tx_size = hn_direct_tx_size; 4700 4701 /* 4702 * Always schedule transmission instead of trying to do direct 4703 * transmission. This one gives the best performance so far. 4704 */ 4705 txr->hn_sched_tx = 1; 4706 4707 parent_dtag = bus_get_dma_tag(dev); 4708 4709 /* DMA tag for RNDIS packet messages. */ 4710 error = bus_dma_tag_create(parent_dtag, /* parent */ 4711 HN_RNDIS_PKT_ALIGN, /* alignment */ 4712 HN_RNDIS_PKT_BOUNDARY, /* boundary */ 4713 BUS_SPACE_MAXADDR, /* lowaddr */ 4714 BUS_SPACE_MAXADDR, /* highaddr */ 4715 NULL, NULL, /* filter, filterarg */ 4716 HN_RNDIS_PKT_LEN, /* maxsize */ 4717 1, /* nsegments */ 4718 HN_RNDIS_PKT_LEN, /* maxsegsize */ 4719 0, /* flags */ 4720 NULL, /* lockfunc */ 4721 NULL, /* lockfuncarg */ 4722 &txr->hn_tx_rndis_dtag); 4723 if (error) { 4724 device_printf(dev, "failed to create rndis dmatag\n"); 4725 return error; 4726 } 4727 4728 /* DMA tag for data. */ 4729 error = bus_dma_tag_create(parent_dtag, /* parent */ 4730 1, /* alignment */ 4731 HN_TX_DATA_BOUNDARY, /* boundary */ 4732 BUS_SPACE_MAXADDR, /* lowaddr */ 4733 BUS_SPACE_MAXADDR, /* highaddr */ 4734 NULL, NULL, /* filter, filterarg */ 4735 HN_TX_DATA_MAXSIZE, /* maxsize */ 4736 HN_TX_DATA_SEGCNT_MAX, /* nsegments */ 4737 HN_TX_DATA_SEGSIZE, /* maxsegsize */ 4738 0, /* flags */ 4739 NULL, /* lockfunc */ 4740 NULL, /* lockfuncarg */ 4741 &txr->hn_tx_data_dtag); 4742 if (error) { 4743 device_printf(dev, "failed to create data dmatag\n"); 4744 return error; 4745 } 4746 4747 for (i = 0; i < txr->hn_txdesc_cnt; ++i) { 4748 struct hn_txdesc *txd = &txr->hn_txdesc[i]; 4749 4750 txd->txr = txr; 4751 txd->chim_index = HN_NVS_CHIM_IDX_INVALID; 4752 STAILQ_INIT(&txd->agg_list); 4753 4754 /* 4755 * Allocate and load RNDIS packet message. 4756 */ 4757 error = bus_dmamem_alloc(txr->hn_tx_rndis_dtag, 4758 (void **)&txd->rndis_pkt, 4759 BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, 4760 &txd->rndis_pkt_dmap); 4761 if (error) { 4762 device_printf(dev, 4763 "failed to allocate rndis_packet_msg, %d\n", i); 4764 return error; 4765 } 4766 4767 error = bus_dmamap_load(txr->hn_tx_rndis_dtag, 4768 txd->rndis_pkt_dmap, 4769 txd->rndis_pkt, HN_RNDIS_PKT_LEN, 4770 hyperv_dma_map_paddr, &txd->rndis_pkt_paddr, 4771 BUS_DMA_NOWAIT); 4772 if (error) { 4773 device_printf(dev, 4774 "failed to load rndis_packet_msg, %d\n", i); 4775 bus_dmamem_free(txr->hn_tx_rndis_dtag, 4776 txd->rndis_pkt, txd->rndis_pkt_dmap); 4777 return error; 4778 } 4779 4780 /* DMA map for TX data. */ 4781 error = bus_dmamap_create(txr->hn_tx_data_dtag, 0, 4782 &txd->data_dmap); 4783 if (error) { 4784 device_printf(dev, 4785 "failed to allocate tx data dmamap\n"); 4786 bus_dmamap_unload(txr->hn_tx_rndis_dtag, 4787 txd->rndis_pkt_dmap); 4788 bus_dmamem_free(txr->hn_tx_rndis_dtag, 4789 txd->rndis_pkt, txd->rndis_pkt_dmap); 4790 return error; 4791 } 4792 4793 /* All set, put it to list */ 4794 txd->flags |= HN_TXD_FLAG_ONLIST; 4795 #ifndef HN_USE_TXDESC_BUFRING 4796 SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link); 4797 #else 4798 buf_ring_enqueue(txr->hn_txdesc_br, txd); 4799 #endif 4800 } 4801 txr->hn_txdesc_avail = txr->hn_txdesc_cnt; 4802 4803 if (sc->hn_tx_sysctl_tree != NULL) { 4804 struct sysctl_oid_list *child; 4805 struct sysctl_ctx_list *ctx; 4806 char name[16]; 4807 4808 /* 4809 * Create per TX ring sysctl tree: 4810 * dev.hn.UNIT.tx.RINGID 4811 */ 4812 ctx = device_get_sysctl_ctx(dev); 4813 child = SYSCTL_CHILDREN(sc->hn_tx_sysctl_tree); 4814 4815 snprintf(name, sizeof(name), "%d", id); 4816 txr->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, 4817 name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 4818 4819 if (txr->hn_tx_sysctl_tree != NULL) { 4820 child = SYSCTL_CHILDREN(txr->hn_tx_sysctl_tree); 4821 4822 #ifdef HN_DEBUG 4823 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail", 4824 CTLFLAG_RD, &txr->hn_txdesc_avail, 0, 4825 "# of available TX descs"); 4826 #endif 4827 #ifdef HN_IFSTART_SUPPORT 4828 if (!hn_use_if_start) 4829 #endif 4830 { 4831 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "oactive", 4832 CTLFLAG_RD, &txr->hn_oactive, 0, 4833 "over active"); 4834 } 4835 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "packets", 4836 CTLFLAG_RW, &txr->hn_pkts, 4837 "# of packets transmitted"); 4838 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "sends", 4839 CTLFLAG_RW, &txr->hn_sends, "# of sends"); 4840 } 4841 } 4842 4843 return 0; 4844 } 4845 4846 static void 4847 hn_txdesc_dmamap_destroy(struct hn_txdesc *txd) 4848 { 4849 struct hn_tx_ring *txr = txd->txr; 4850 4851 KASSERT(txd->m == NULL, ("still has mbuf installed")); 4852 KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("still dma mapped")); 4853 4854 bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_pkt_dmap); 4855 bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_pkt, 4856 txd->rndis_pkt_dmap); 4857 bus_dmamap_destroy(txr->hn_tx_data_dtag, txd->data_dmap); 4858 } 4859 4860 static void 4861 hn_txdesc_gc(struct hn_tx_ring *txr, struct hn_txdesc *txd) 4862 { 4863 4864 KASSERT(txd->refs == 0 || txd->refs == 1, 4865 ("invalid txd refs %d", txd->refs)); 4866 4867 /* Aggregated txds will be freed by their aggregating txd. */ 4868 if (txd->refs > 0 && (txd->flags & HN_TXD_FLAG_ONAGG) == 0) { 4869 int freed; 4870 4871 freed = hn_txdesc_put(txr, txd); 4872 KASSERT(freed, ("can't free txdesc")); 4873 } 4874 } 4875 4876 static void 4877 hn_tx_ring_destroy(struct hn_tx_ring *txr) 4878 { 4879 int i; 4880 4881 if (txr->hn_txdesc == NULL) 4882 return; 4883 4884 /* 4885 * NOTE: 4886 * Because the freeing of aggregated txds will be deferred 4887 * to the aggregating txd, two passes are used here: 4888 * - The first pass GCes any pending txds. This GC is necessary, 4889 * since if the channels are revoked, hypervisor will not 4890 * deliver send-done for all pending txds. 4891 * - The second pass frees the busdma stuffs, i.e. after all txds 4892 * were freed. 4893 */ 4894 for (i = 0; i < txr->hn_txdesc_cnt; ++i) 4895 hn_txdesc_gc(txr, &txr->hn_txdesc[i]); 4896 for (i = 0; i < txr->hn_txdesc_cnt; ++i) 4897 hn_txdesc_dmamap_destroy(&txr->hn_txdesc[i]); 4898 4899 if (txr->hn_tx_data_dtag != NULL) 4900 bus_dma_tag_destroy(txr->hn_tx_data_dtag); 4901 if (txr->hn_tx_rndis_dtag != NULL) 4902 bus_dma_tag_destroy(txr->hn_tx_rndis_dtag); 4903 4904 #ifdef HN_USE_TXDESC_BUFRING 4905 buf_ring_free(txr->hn_txdesc_br, M_DEVBUF); 4906 #endif 4907 4908 free(txr->hn_txdesc, M_DEVBUF); 4909 txr->hn_txdesc = NULL; 4910 4911 if (txr->hn_mbuf_br != NULL) 4912 buf_ring_free(txr->hn_mbuf_br, M_DEVBUF); 4913 4914 #ifndef HN_USE_TXDESC_BUFRING 4915 mtx_destroy(&txr->hn_txlist_spin); 4916 #endif 4917 mtx_destroy(&txr->hn_tx_lock); 4918 } 4919 4920 static int 4921 hn_create_tx_data(struct hn_softc *sc, int ring_cnt) 4922 { 4923 struct sysctl_oid_list *child; 4924 struct sysctl_ctx_list *ctx; 4925 int i; 4926 4927 /* 4928 * Create TXBUF for chimney sending. 4929 * 4930 * NOTE: It is shared by all channels. 4931 */ 4932 sc->hn_chim = hyperv_dmamem_alloc(bus_get_dma_tag(sc->hn_dev), 4933 PAGE_SIZE, 0, HN_CHIM_SIZE, &sc->hn_chim_dma, 4934 BUS_DMA_WAITOK | BUS_DMA_ZERO); 4935 if (sc->hn_chim == NULL) { 4936 device_printf(sc->hn_dev, "allocate txbuf failed\n"); 4937 return (ENOMEM); 4938 } 4939 4940 sc->hn_tx_ring_cnt = ring_cnt; 4941 sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt; 4942 4943 sc->hn_tx_ring = malloc(sizeof(struct hn_tx_ring) * sc->hn_tx_ring_cnt, 4944 M_DEVBUF, M_WAITOK | M_ZERO); 4945 4946 ctx = device_get_sysctl_ctx(sc->hn_dev); 4947 child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->hn_dev)); 4948 4949 /* Create dev.hn.UNIT.tx sysctl tree */ 4950 sc->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "tx", 4951 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 4952 4953 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 4954 int error; 4955 4956 error = hn_tx_ring_create(sc, i); 4957 if (error) 4958 return error; 4959 } 4960 4961 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "no_txdescs", 4962 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 4963 __offsetof(struct hn_tx_ring, hn_no_txdescs), 4964 hn_tx_stat_ulong_sysctl, "LU", "# of times short of TX descs"); 4965 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "send_failed", 4966 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 4967 __offsetof(struct hn_tx_ring, hn_send_failed), 4968 hn_tx_stat_ulong_sysctl, "LU", "# of hyper-v sending failure"); 4969 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txdma_failed", 4970 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 4971 __offsetof(struct hn_tx_ring, hn_txdma_failed), 4972 hn_tx_stat_ulong_sysctl, "LU", "# of TX DMA failure"); 4973 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_flush_failed", 4974 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 4975 __offsetof(struct hn_tx_ring, hn_flush_failed), 4976 hn_tx_stat_ulong_sysctl, "LU", 4977 "# of packet transmission aggregation flush failure"); 4978 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_collapsed", 4979 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 4980 __offsetof(struct hn_tx_ring, hn_tx_collapsed), 4981 hn_tx_stat_ulong_sysctl, "LU", "# of TX mbuf collapsed"); 4982 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney", 4983 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 4984 __offsetof(struct hn_tx_ring, hn_tx_chimney), 4985 hn_tx_stat_ulong_sysctl, "LU", "# of chimney send"); 4986 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_tried", 4987 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 4988 __offsetof(struct hn_tx_ring, hn_tx_chimney_tried), 4989 hn_tx_stat_ulong_sysctl, "LU", "# of chimney send tries"); 4990 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt", 4991 CTLFLAG_RD, &sc->hn_tx_ring[0].hn_txdesc_cnt, 0, 4992 "# of total TX descs"); 4993 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max", 4994 CTLFLAG_RD, &sc->hn_chim_szmax, 0, 4995 "Chimney send packet size upper boundary"); 4996 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size", 4997 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 4998 hn_chim_size_sysctl, "I", "Chimney send packet size limit"); 4999 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "direct_tx_size", 5000 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5001 __offsetof(struct hn_tx_ring, hn_direct_tx_size), 5002 hn_tx_conf_int_sysctl, "I", 5003 "Size of the packet for direct transmission"); 5004 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "sched_tx", 5005 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5006 __offsetof(struct hn_tx_ring, hn_sched_tx), 5007 hn_tx_conf_int_sysctl, "I", 5008 "Always schedule transmission " 5009 "instead of doing direct transmission"); 5010 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_cnt", 5011 CTLFLAG_RD, &sc->hn_tx_ring_cnt, 0, "# created TX rings"); 5012 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_inuse", 5013 CTLFLAG_RD, &sc->hn_tx_ring_inuse, 0, "# used TX rings"); 5014 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "agg_szmax", 5015 CTLFLAG_RD, &sc->hn_tx_ring[0].hn_agg_szmax, 0, 5016 "Applied packet transmission aggregation size"); 5017 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pktmax", 5018 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 5019 hn_txagg_pktmax_sysctl, "I", 5020 "Applied packet transmission aggregation packets"); 5021 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_align", 5022 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 5023 hn_txagg_align_sysctl, "I", 5024 "Applied packet transmission aggregation alignment"); 5025 5026 return 0; 5027 } 5028 5029 static void 5030 hn_set_chim_size(struct hn_softc *sc, int chim_size) 5031 { 5032 int i; 5033 5034 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 5035 sc->hn_tx_ring[i].hn_chim_size = chim_size; 5036 } 5037 5038 static void 5039 hn_set_tso_maxsize(struct hn_softc *sc, int tso_maxlen, int mtu) 5040 { 5041 struct ifnet *ifp = sc->hn_ifp; 5042 u_int hw_tsomax; 5043 int tso_minlen; 5044 5045 HN_LOCK_ASSERT(sc); 5046 5047 if ((ifp->if_capabilities & (IFCAP_TSO4 | IFCAP_TSO6)) == 0) 5048 return; 5049 5050 KASSERT(sc->hn_ndis_tso_sgmin >= 2, 5051 ("invalid NDIS tso sgmin %d", sc->hn_ndis_tso_sgmin)); 5052 tso_minlen = sc->hn_ndis_tso_sgmin * mtu; 5053 5054 KASSERT(sc->hn_ndis_tso_szmax >= tso_minlen && 5055 sc->hn_ndis_tso_szmax <= IP_MAXPACKET, 5056 ("invalid NDIS tso szmax %d", sc->hn_ndis_tso_szmax)); 5057 5058 if (tso_maxlen < tso_minlen) 5059 tso_maxlen = tso_minlen; 5060 else if (tso_maxlen > IP_MAXPACKET) 5061 tso_maxlen = IP_MAXPACKET; 5062 if (tso_maxlen > sc->hn_ndis_tso_szmax) 5063 tso_maxlen = sc->hn_ndis_tso_szmax; 5064 hw_tsomax = tso_maxlen - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); 5065 5066 if (hn_xpnt_vf_isready(sc)) { 5067 if (hw_tsomax > sc->hn_vf_ifp->if_hw_tsomax) 5068 hw_tsomax = sc->hn_vf_ifp->if_hw_tsomax; 5069 } 5070 ifp->if_hw_tsomax = hw_tsomax; 5071 if (bootverbose) 5072 if_printf(ifp, "TSO size max %u\n", ifp->if_hw_tsomax); 5073 } 5074 5075 static void 5076 hn_fixup_tx_data(struct hn_softc *sc) 5077 { 5078 uint64_t csum_assist; 5079 int i; 5080 5081 hn_set_chim_size(sc, sc->hn_chim_szmax); 5082 if (hn_tx_chimney_size > 0 && 5083 hn_tx_chimney_size < sc->hn_chim_szmax) 5084 hn_set_chim_size(sc, hn_tx_chimney_size); 5085 5086 csum_assist = 0; 5087 if (sc->hn_caps & HN_CAP_IPCS) 5088 csum_assist |= CSUM_IP; 5089 if (sc->hn_caps & HN_CAP_TCP4CS) 5090 csum_assist |= CSUM_IP_TCP; 5091 if (sc->hn_caps & HN_CAP_UDP4CS) 5092 csum_assist |= CSUM_IP_UDP; 5093 if (sc->hn_caps & HN_CAP_TCP6CS) 5094 csum_assist |= CSUM_IP6_TCP; 5095 if (sc->hn_caps & HN_CAP_UDP6CS) 5096 csum_assist |= CSUM_IP6_UDP; 5097 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 5098 sc->hn_tx_ring[i].hn_csum_assist = csum_assist; 5099 5100 if (sc->hn_caps & HN_CAP_HASHVAL) { 5101 /* 5102 * Support HASHVAL pktinfo on TX path. 5103 */ 5104 if (bootverbose) 5105 if_printf(sc->hn_ifp, "support HASHVAL pktinfo\n"); 5106 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 5107 sc->hn_tx_ring[i].hn_tx_flags |= HN_TX_FLAG_HASHVAL; 5108 } 5109 } 5110 5111 static void 5112 hn_destroy_tx_data(struct hn_softc *sc) 5113 { 5114 int i; 5115 5116 if (sc->hn_chim != NULL) { 5117 if ((sc->hn_flags & HN_FLAG_CHIM_REF) == 0) { 5118 hyperv_dmamem_free(&sc->hn_chim_dma, sc->hn_chim); 5119 } else { 5120 device_printf(sc->hn_dev, 5121 "chimney sending buffer is referenced"); 5122 } 5123 sc->hn_chim = NULL; 5124 } 5125 5126 if (sc->hn_tx_ring_cnt == 0) 5127 return; 5128 5129 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 5130 hn_tx_ring_destroy(&sc->hn_tx_ring[i]); 5131 5132 free(sc->hn_tx_ring, M_DEVBUF); 5133 sc->hn_tx_ring = NULL; 5134 5135 sc->hn_tx_ring_cnt = 0; 5136 sc->hn_tx_ring_inuse = 0; 5137 } 5138 5139 #ifdef HN_IFSTART_SUPPORT 5140 5141 static void 5142 hn_start_taskfunc(void *xtxr, int pending __unused) 5143 { 5144 struct hn_tx_ring *txr = xtxr; 5145 5146 mtx_lock(&txr->hn_tx_lock); 5147 hn_start_locked(txr, 0); 5148 mtx_unlock(&txr->hn_tx_lock); 5149 } 5150 5151 static int 5152 hn_start_locked(struct hn_tx_ring *txr, int len) 5153 { 5154 struct hn_softc *sc = txr->hn_sc; 5155 struct ifnet *ifp = sc->hn_ifp; 5156 int sched = 0; 5157 5158 KASSERT(hn_use_if_start, 5159 ("hn_start_locked is called, when if_start is disabled")); 5160 KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring")); 5161 mtx_assert(&txr->hn_tx_lock, MA_OWNED); 5162 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc")); 5163 5164 if (__predict_false(txr->hn_suspended)) 5165 return (0); 5166 5167 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != 5168 IFF_DRV_RUNNING) 5169 return (0); 5170 5171 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { 5172 struct hn_txdesc *txd; 5173 struct mbuf *m_head; 5174 int error; 5175 5176 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); 5177 if (m_head == NULL) 5178 break; 5179 5180 if (len > 0 && m_head->m_pkthdr.len > len) { 5181 /* 5182 * This sending could be time consuming; let callers 5183 * dispatch this packet sending (and sending of any 5184 * following up packets) to tx taskqueue. 5185 */ 5186 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 5187 sched = 1; 5188 break; 5189 } 5190 5191 #if defined(INET6) || defined(INET) 5192 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 5193 m_head = hn_tso_fixup(m_head); 5194 if (__predict_false(m_head == NULL)) { 5195 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 5196 continue; 5197 } 5198 } 5199 #endif 5200 5201 txd = hn_txdesc_get(txr); 5202 if (txd == NULL) { 5203 txr->hn_no_txdescs++; 5204 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 5205 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 5206 break; 5207 } 5208 5209 error = hn_encap(ifp, txr, txd, &m_head); 5210 if (error) { 5211 /* Both txd and m_head are freed */ 5212 KASSERT(txr->hn_agg_txd == NULL, 5213 ("encap failed w/ pending aggregating txdesc")); 5214 continue; 5215 } 5216 5217 if (txr->hn_agg_pktleft == 0) { 5218 if (txr->hn_agg_txd != NULL) { 5219 KASSERT(m_head == NULL, 5220 ("pending mbuf for aggregating txdesc")); 5221 error = hn_flush_txagg(ifp, txr); 5222 if (__predict_false(error)) { 5223 atomic_set_int(&ifp->if_drv_flags, 5224 IFF_DRV_OACTIVE); 5225 break; 5226 } 5227 } else { 5228 KASSERT(m_head != NULL, ("mbuf was freed")); 5229 error = hn_txpkt(ifp, txr, txd); 5230 if (__predict_false(error)) { 5231 /* txd is freed, but m_head is not */ 5232 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 5233 atomic_set_int(&ifp->if_drv_flags, 5234 IFF_DRV_OACTIVE); 5235 break; 5236 } 5237 } 5238 } 5239 #ifdef INVARIANTS 5240 else { 5241 KASSERT(txr->hn_agg_txd != NULL, 5242 ("no aggregating txdesc")); 5243 KASSERT(m_head == NULL, 5244 ("pending mbuf for aggregating txdesc")); 5245 } 5246 #endif 5247 } 5248 5249 /* Flush pending aggerated transmission. */ 5250 if (txr->hn_agg_txd != NULL) 5251 hn_flush_txagg(ifp, txr); 5252 return (sched); 5253 } 5254 5255 static void 5256 hn_start(struct ifnet *ifp) 5257 { 5258 struct hn_softc *sc = ifp->if_softc; 5259 struct hn_tx_ring *txr = &sc->hn_tx_ring[0]; 5260 5261 if (txr->hn_sched_tx) 5262 goto do_sched; 5263 5264 if (mtx_trylock(&txr->hn_tx_lock)) { 5265 int sched; 5266 5267 sched = hn_start_locked(txr, txr->hn_direct_tx_size); 5268 mtx_unlock(&txr->hn_tx_lock); 5269 if (!sched) 5270 return; 5271 } 5272 do_sched: 5273 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); 5274 } 5275 5276 static void 5277 hn_start_txeof_taskfunc(void *xtxr, int pending __unused) 5278 { 5279 struct hn_tx_ring *txr = xtxr; 5280 5281 mtx_lock(&txr->hn_tx_lock); 5282 atomic_clear_int(&txr->hn_sc->hn_ifp->if_drv_flags, IFF_DRV_OACTIVE); 5283 hn_start_locked(txr, 0); 5284 mtx_unlock(&txr->hn_tx_lock); 5285 } 5286 5287 static void 5288 hn_start_txeof(struct hn_tx_ring *txr) 5289 { 5290 struct hn_softc *sc = txr->hn_sc; 5291 struct ifnet *ifp = sc->hn_ifp; 5292 5293 KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring")); 5294 5295 if (txr->hn_sched_tx) 5296 goto do_sched; 5297 5298 if (mtx_trylock(&txr->hn_tx_lock)) { 5299 int sched; 5300 5301 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 5302 sched = hn_start_locked(txr, txr->hn_direct_tx_size); 5303 mtx_unlock(&txr->hn_tx_lock); 5304 if (sched) { 5305 taskqueue_enqueue(txr->hn_tx_taskq, 5306 &txr->hn_tx_task); 5307 } 5308 } else { 5309 do_sched: 5310 /* 5311 * Release the OACTIVE earlier, with the hope, that 5312 * others could catch up. The task will clear the 5313 * flag again with the hn_tx_lock to avoid possible 5314 * races. 5315 */ 5316 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 5317 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); 5318 } 5319 } 5320 5321 #endif /* HN_IFSTART_SUPPORT */ 5322 5323 static int 5324 hn_xmit(struct hn_tx_ring *txr, int len) 5325 { 5326 struct hn_softc *sc = txr->hn_sc; 5327 struct ifnet *ifp = sc->hn_ifp; 5328 struct mbuf *m_head; 5329 int sched = 0; 5330 5331 mtx_assert(&txr->hn_tx_lock, MA_OWNED); 5332 #ifdef HN_IFSTART_SUPPORT 5333 KASSERT(hn_use_if_start == 0, 5334 ("hn_xmit is called, when if_start is enabled")); 5335 #endif 5336 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc")); 5337 5338 if (__predict_false(txr->hn_suspended)) 5339 return (0); 5340 5341 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || txr->hn_oactive) 5342 return (0); 5343 5344 while ((m_head = drbr_peek(ifp, txr->hn_mbuf_br)) != NULL) { 5345 struct hn_txdesc *txd; 5346 int error; 5347 5348 if (len > 0 && m_head->m_pkthdr.len > len) { 5349 /* 5350 * This sending could be time consuming; let callers 5351 * dispatch this packet sending (and sending of any 5352 * following up packets) to tx taskqueue. 5353 */ 5354 drbr_putback(ifp, txr->hn_mbuf_br, m_head); 5355 sched = 1; 5356 break; 5357 } 5358 5359 txd = hn_txdesc_get(txr); 5360 if (txd == NULL) { 5361 txr->hn_no_txdescs++; 5362 drbr_putback(ifp, txr->hn_mbuf_br, m_head); 5363 txr->hn_oactive = 1; 5364 break; 5365 } 5366 5367 error = hn_encap(ifp, txr, txd, &m_head); 5368 if (error) { 5369 /* Both txd and m_head are freed; discard */ 5370 KASSERT(txr->hn_agg_txd == NULL, 5371 ("encap failed w/ pending aggregating txdesc")); 5372 drbr_advance(ifp, txr->hn_mbuf_br); 5373 continue; 5374 } 5375 5376 if (txr->hn_agg_pktleft == 0) { 5377 if (txr->hn_agg_txd != NULL) { 5378 KASSERT(m_head == NULL, 5379 ("pending mbuf for aggregating txdesc")); 5380 error = hn_flush_txagg(ifp, txr); 5381 if (__predict_false(error)) { 5382 txr->hn_oactive = 1; 5383 break; 5384 } 5385 } else { 5386 KASSERT(m_head != NULL, ("mbuf was freed")); 5387 error = hn_txpkt(ifp, txr, txd); 5388 if (__predict_false(error)) { 5389 /* txd is freed, but m_head is not */ 5390 drbr_putback(ifp, txr->hn_mbuf_br, 5391 m_head); 5392 txr->hn_oactive = 1; 5393 break; 5394 } 5395 } 5396 } 5397 #ifdef INVARIANTS 5398 else { 5399 KASSERT(txr->hn_agg_txd != NULL, 5400 ("no aggregating txdesc")); 5401 KASSERT(m_head == NULL, 5402 ("pending mbuf for aggregating txdesc")); 5403 } 5404 #endif 5405 5406 /* Sent */ 5407 drbr_advance(ifp, txr->hn_mbuf_br); 5408 } 5409 5410 /* Flush pending aggerated transmission. */ 5411 if (txr->hn_agg_txd != NULL) 5412 hn_flush_txagg(ifp, txr); 5413 return (sched); 5414 } 5415 5416 static int 5417 hn_transmit(struct ifnet *ifp, struct mbuf *m) 5418 { 5419 struct hn_softc *sc = ifp->if_softc; 5420 struct hn_tx_ring *txr; 5421 int error, idx = 0; 5422 5423 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) { 5424 struct rm_priotracker pt; 5425 5426 rm_rlock(&sc->hn_vf_lock, &pt); 5427 if (__predict_true(sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) { 5428 struct mbuf *m_bpf = NULL; 5429 int obytes, omcast; 5430 5431 obytes = m->m_pkthdr.len; 5432 if (m->m_flags & M_MCAST) 5433 omcast = 1; 5434 5435 if (sc->hn_xvf_flags & HN_XVFFLAG_ACCBPF) { 5436 if (bpf_peers_present(ifp->if_bpf)) { 5437 m_bpf = m_copypacket(m, M_NOWAIT); 5438 if (m_bpf == NULL) { 5439 /* 5440 * Failed to grab a shallow 5441 * copy; tap now. 5442 */ 5443 ETHER_BPF_MTAP(ifp, m); 5444 } 5445 } 5446 } else { 5447 ETHER_BPF_MTAP(ifp, m); 5448 } 5449 5450 error = sc->hn_vf_ifp->if_transmit(sc->hn_vf_ifp, m); 5451 rm_runlock(&sc->hn_vf_lock, &pt); 5452 5453 if (m_bpf != NULL) { 5454 if (!error) 5455 ETHER_BPF_MTAP(ifp, m_bpf); 5456 m_freem(m_bpf); 5457 } 5458 5459 if (error == ENOBUFS) { 5460 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); 5461 } else if (error) { 5462 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 5463 } else { 5464 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 5465 if_inc_counter(ifp, IFCOUNTER_OBYTES, obytes); 5466 if (omcast) { 5467 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 5468 omcast); 5469 } 5470 } 5471 return (error); 5472 } 5473 rm_runlock(&sc->hn_vf_lock, &pt); 5474 } 5475 5476 #if defined(INET6) || defined(INET) 5477 /* 5478 * Perform TSO packet header fixup now, since the TSO 5479 * packet header should be cache-hot. 5480 */ 5481 if (m->m_pkthdr.csum_flags & CSUM_TSO) { 5482 m = hn_tso_fixup(m); 5483 if (__predict_false(m == NULL)) { 5484 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 5485 return EIO; 5486 } 5487 } 5488 #endif 5489 5490 /* 5491 * Select the TX ring based on flowid 5492 */ 5493 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 5494 #ifdef RSS 5495 uint32_t bid; 5496 5497 if (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m), 5498 &bid) == 0) 5499 idx = bid % sc->hn_tx_ring_inuse; 5500 else 5501 #endif 5502 { 5503 #if defined(INET6) || defined(INET) 5504 int tcpsyn = 0; 5505 5506 if (m->m_pkthdr.len < 128 && 5507 (m->m_pkthdr.csum_flags & 5508 (CSUM_IP_TCP | CSUM_IP6_TCP)) && 5509 (m->m_pkthdr.csum_flags & CSUM_TSO) == 0) { 5510 m = hn_check_tcpsyn(m, &tcpsyn); 5511 if (__predict_false(m == NULL)) { 5512 if_inc_counter(ifp, 5513 IFCOUNTER_OERRORS, 1); 5514 return (EIO); 5515 } 5516 } 5517 #else 5518 const int tcpsyn = 0; 5519 #endif 5520 if (tcpsyn) 5521 idx = 0; 5522 else 5523 idx = m->m_pkthdr.flowid % sc->hn_tx_ring_inuse; 5524 } 5525 } 5526 txr = &sc->hn_tx_ring[idx]; 5527 5528 error = drbr_enqueue(ifp, txr->hn_mbuf_br, m); 5529 if (error) { 5530 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); 5531 return error; 5532 } 5533 5534 if (txr->hn_oactive) 5535 return 0; 5536 5537 if (txr->hn_sched_tx) 5538 goto do_sched; 5539 5540 if (mtx_trylock(&txr->hn_tx_lock)) { 5541 int sched; 5542 5543 sched = hn_xmit(txr, txr->hn_direct_tx_size); 5544 mtx_unlock(&txr->hn_tx_lock); 5545 if (!sched) 5546 return 0; 5547 } 5548 do_sched: 5549 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); 5550 return 0; 5551 } 5552 5553 static void 5554 hn_tx_ring_qflush(struct hn_tx_ring *txr) 5555 { 5556 struct mbuf *m; 5557 5558 mtx_lock(&txr->hn_tx_lock); 5559 while ((m = buf_ring_dequeue_sc(txr->hn_mbuf_br)) != NULL) 5560 m_freem(m); 5561 mtx_unlock(&txr->hn_tx_lock); 5562 } 5563 5564 static void 5565 hn_xmit_qflush(struct ifnet *ifp) 5566 { 5567 struct hn_softc *sc = ifp->if_softc; 5568 struct rm_priotracker pt; 5569 int i; 5570 5571 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) 5572 hn_tx_ring_qflush(&sc->hn_tx_ring[i]); 5573 if_qflush(ifp); 5574 5575 rm_rlock(&sc->hn_vf_lock, &pt); 5576 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) 5577 sc->hn_vf_ifp->if_qflush(sc->hn_vf_ifp); 5578 rm_runlock(&sc->hn_vf_lock, &pt); 5579 } 5580 5581 static void 5582 hn_xmit_txeof(struct hn_tx_ring *txr) 5583 { 5584 5585 if (txr->hn_sched_tx) 5586 goto do_sched; 5587 5588 if (mtx_trylock(&txr->hn_tx_lock)) { 5589 int sched; 5590 5591 txr->hn_oactive = 0; 5592 sched = hn_xmit(txr, txr->hn_direct_tx_size); 5593 mtx_unlock(&txr->hn_tx_lock); 5594 if (sched) { 5595 taskqueue_enqueue(txr->hn_tx_taskq, 5596 &txr->hn_tx_task); 5597 } 5598 } else { 5599 do_sched: 5600 /* 5601 * Release the oactive earlier, with the hope, that 5602 * others could catch up. The task will clear the 5603 * oactive again with the hn_tx_lock to avoid possible 5604 * races. 5605 */ 5606 txr->hn_oactive = 0; 5607 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); 5608 } 5609 } 5610 5611 static void 5612 hn_xmit_taskfunc(void *xtxr, int pending __unused) 5613 { 5614 struct hn_tx_ring *txr = xtxr; 5615 5616 mtx_lock(&txr->hn_tx_lock); 5617 hn_xmit(txr, 0); 5618 mtx_unlock(&txr->hn_tx_lock); 5619 } 5620 5621 static void 5622 hn_xmit_txeof_taskfunc(void *xtxr, int pending __unused) 5623 { 5624 struct hn_tx_ring *txr = xtxr; 5625 5626 mtx_lock(&txr->hn_tx_lock); 5627 txr->hn_oactive = 0; 5628 hn_xmit(txr, 0); 5629 mtx_unlock(&txr->hn_tx_lock); 5630 } 5631 5632 static int 5633 hn_chan_attach(struct hn_softc *sc, struct vmbus_channel *chan) 5634 { 5635 struct vmbus_chan_br cbr; 5636 struct hn_rx_ring *rxr; 5637 struct hn_tx_ring *txr = NULL; 5638 int idx, error; 5639 5640 idx = vmbus_chan_subidx(chan); 5641 5642 /* 5643 * Link this channel to RX/TX ring. 5644 */ 5645 KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse, 5646 ("invalid channel index %d, should > 0 && < %d", 5647 idx, sc->hn_rx_ring_inuse)); 5648 rxr = &sc->hn_rx_ring[idx]; 5649 KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED) == 0, 5650 ("RX ring %d already attached", idx)); 5651 rxr->hn_rx_flags |= HN_RX_FLAG_ATTACHED; 5652 rxr->hn_chan = chan; 5653 5654 if (bootverbose) { 5655 if_printf(sc->hn_ifp, "link RX ring %d to chan%u\n", 5656 idx, vmbus_chan_id(chan)); 5657 } 5658 5659 if (idx < sc->hn_tx_ring_inuse) { 5660 txr = &sc->hn_tx_ring[idx]; 5661 KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED) == 0, 5662 ("TX ring %d already attached", idx)); 5663 txr->hn_tx_flags |= HN_TX_FLAG_ATTACHED; 5664 5665 txr->hn_chan = chan; 5666 if (bootverbose) { 5667 if_printf(sc->hn_ifp, "link TX ring %d to chan%u\n", 5668 idx, vmbus_chan_id(chan)); 5669 } 5670 } 5671 5672 /* Bind this channel to a proper CPU. */ 5673 vmbus_chan_cpu_set(chan, HN_RING_IDX2CPU(sc, idx)); 5674 5675 /* 5676 * Open this channel 5677 */ 5678 cbr.cbr = rxr->hn_br; 5679 cbr.cbr_paddr = rxr->hn_br_dma.hv_paddr; 5680 cbr.cbr_txsz = HN_TXBR_SIZE; 5681 cbr.cbr_rxsz = HN_RXBR_SIZE; 5682 error = vmbus_chan_open_br(chan, &cbr, NULL, 0, hn_chan_callback, rxr); 5683 if (error) { 5684 if (error == EISCONN) { 5685 if_printf(sc->hn_ifp, "bufring is connected after " 5686 "chan%u open failure\n", vmbus_chan_id(chan)); 5687 rxr->hn_rx_flags |= HN_RX_FLAG_BR_REF; 5688 } else { 5689 if_printf(sc->hn_ifp, "open chan%u failed: %d\n", 5690 vmbus_chan_id(chan), error); 5691 } 5692 } 5693 return (error); 5694 } 5695 5696 static void 5697 hn_chan_detach(struct hn_softc *sc, struct vmbus_channel *chan) 5698 { 5699 struct hn_rx_ring *rxr; 5700 int idx, error; 5701 5702 idx = vmbus_chan_subidx(chan); 5703 5704 /* 5705 * Link this channel to RX/TX ring. 5706 */ 5707 KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse, 5708 ("invalid channel index %d, should > 0 && < %d", 5709 idx, sc->hn_rx_ring_inuse)); 5710 rxr = &sc->hn_rx_ring[idx]; 5711 KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED), 5712 ("RX ring %d is not attached", idx)); 5713 rxr->hn_rx_flags &= ~HN_RX_FLAG_ATTACHED; 5714 5715 if (idx < sc->hn_tx_ring_inuse) { 5716 struct hn_tx_ring *txr = &sc->hn_tx_ring[idx]; 5717 5718 KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED), 5719 ("TX ring %d is not attached attached", idx)); 5720 txr->hn_tx_flags &= ~HN_TX_FLAG_ATTACHED; 5721 } 5722 5723 /* 5724 * Close this channel. 5725 * 5726 * NOTE: 5727 * Channel closing does _not_ destroy the target channel. 5728 */ 5729 error = vmbus_chan_close_direct(chan); 5730 if (error == EISCONN) { 5731 if_printf(sc->hn_ifp, "chan%u bufring is connected " 5732 "after being closed\n", vmbus_chan_id(chan)); 5733 rxr->hn_rx_flags |= HN_RX_FLAG_BR_REF; 5734 } else if (error) { 5735 if_printf(sc->hn_ifp, "chan%u close failed: %d\n", 5736 vmbus_chan_id(chan), error); 5737 } 5738 } 5739 5740 static int 5741 hn_attach_subchans(struct hn_softc *sc) 5742 { 5743 struct vmbus_channel **subchans; 5744 int subchan_cnt = sc->hn_rx_ring_inuse - 1; 5745 int i, error = 0; 5746 5747 KASSERT(subchan_cnt > 0, ("no sub-channels")); 5748 5749 /* Attach the sub-channels. */ 5750 subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt); 5751 for (i = 0; i < subchan_cnt; ++i) { 5752 int error1; 5753 5754 error1 = hn_chan_attach(sc, subchans[i]); 5755 if (error1) { 5756 error = error1; 5757 /* Move on; all channels will be detached later. */ 5758 } 5759 } 5760 vmbus_subchan_rel(subchans, subchan_cnt); 5761 5762 if (error) { 5763 if_printf(sc->hn_ifp, "sub-channels attach failed: %d\n", error); 5764 } else { 5765 if (bootverbose) { 5766 if_printf(sc->hn_ifp, "%d sub-channels attached\n", 5767 subchan_cnt); 5768 } 5769 } 5770 return (error); 5771 } 5772 5773 static void 5774 hn_detach_allchans(struct hn_softc *sc) 5775 { 5776 struct vmbus_channel **subchans; 5777 int subchan_cnt = sc->hn_rx_ring_inuse - 1; 5778 int i; 5779 5780 if (subchan_cnt == 0) 5781 goto back; 5782 5783 /* Detach the sub-channels. */ 5784 subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt); 5785 for (i = 0; i < subchan_cnt; ++i) 5786 hn_chan_detach(sc, subchans[i]); 5787 vmbus_subchan_rel(subchans, subchan_cnt); 5788 5789 back: 5790 /* 5791 * Detach the primary channel, _after_ all sub-channels 5792 * are detached. 5793 */ 5794 hn_chan_detach(sc, sc->hn_prichan); 5795 5796 /* Wait for sub-channels to be destroyed, if any. */ 5797 vmbus_subchan_drain(sc->hn_prichan); 5798 5799 #ifdef INVARIANTS 5800 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 5801 KASSERT((sc->hn_rx_ring[i].hn_rx_flags & 5802 HN_RX_FLAG_ATTACHED) == 0, 5803 ("%dth RX ring is still attached", i)); 5804 } 5805 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 5806 KASSERT((sc->hn_tx_ring[i].hn_tx_flags & 5807 HN_TX_FLAG_ATTACHED) == 0, 5808 ("%dth TX ring is still attached", i)); 5809 } 5810 #endif 5811 } 5812 5813 static int 5814 hn_synth_alloc_subchans(struct hn_softc *sc, int *nsubch) 5815 { 5816 struct vmbus_channel **subchans; 5817 int nchan, rxr_cnt, error; 5818 5819 nchan = *nsubch + 1; 5820 if (nchan == 1) { 5821 /* 5822 * Multiple RX/TX rings are not requested. 5823 */ 5824 *nsubch = 0; 5825 return (0); 5826 } 5827 5828 /* 5829 * Query RSS capabilities, e.g. # of RX rings, and # of indirect 5830 * table entries. 5831 */ 5832 error = hn_rndis_query_rsscaps(sc, &rxr_cnt); 5833 if (error) { 5834 /* No RSS; this is benign. */ 5835 *nsubch = 0; 5836 return (0); 5837 } 5838 if (bootverbose) { 5839 if_printf(sc->hn_ifp, "RX rings offered %u, requested %d\n", 5840 rxr_cnt, nchan); 5841 } 5842 5843 if (nchan > rxr_cnt) 5844 nchan = rxr_cnt; 5845 if (nchan == 1) { 5846 if_printf(sc->hn_ifp, "only 1 channel is supported, no vRSS\n"); 5847 *nsubch = 0; 5848 return (0); 5849 } 5850 5851 /* 5852 * Allocate sub-channels from NVS. 5853 */ 5854 *nsubch = nchan - 1; 5855 error = hn_nvs_alloc_subchans(sc, nsubch); 5856 if (error || *nsubch == 0) { 5857 /* Failed to allocate sub-channels. */ 5858 *nsubch = 0; 5859 return (0); 5860 } 5861 5862 /* 5863 * Wait for all sub-channels to become ready before moving on. 5864 */ 5865 subchans = vmbus_subchan_get(sc->hn_prichan, *nsubch); 5866 vmbus_subchan_rel(subchans, *nsubch); 5867 return (0); 5868 } 5869 5870 static bool 5871 hn_synth_attachable(const struct hn_softc *sc) 5872 { 5873 int i; 5874 5875 if (sc->hn_flags & HN_FLAG_ERRORS) 5876 return (false); 5877 5878 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 5879 const struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 5880 5881 if (rxr->hn_rx_flags & HN_RX_FLAG_BR_REF) 5882 return (false); 5883 } 5884 return (true); 5885 } 5886 5887 /* 5888 * Make sure that the RX filter is zero after the successful 5889 * RNDIS initialization. 5890 * 5891 * NOTE: 5892 * Under certain conditions on certain versions of Hyper-V, 5893 * the RNDIS rxfilter is _not_ zero on the hypervisor side 5894 * after the successful RNDIS initialization, which breaks 5895 * the assumption of any following code (well, it breaks the 5896 * RNDIS API contract actually). Clear the RNDIS rxfilter 5897 * explicitly, drain packets sneaking through, and drain the 5898 * interrupt taskqueues scheduled due to the stealth packets. 5899 */ 5900 static void 5901 hn_rndis_init_fixat(struct hn_softc *sc, int nchan) 5902 { 5903 5904 hn_disable_rx(sc); 5905 hn_drain_rxtx(sc, nchan); 5906 } 5907 5908 static int 5909 hn_synth_attach(struct hn_softc *sc, int mtu) 5910 { 5911 #define ATTACHED_NVS 0x0002 5912 #define ATTACHED_RNDIS 0x0004 5913 5914 struct ndis_rssprm_toeplitz *rss = &sc->hn_rss; 5915 int error, nsubch, nchan = 1, i, rndis_inited; 5916 uint32_t old_caps, attached = 0; 5917 5918 KASSERT((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0, 5919 ("synthetic parts were attached")); 5920 5921 if (!hn_synth_attachable(sc)) 5922 return (ENXIO); 5923 5924 /* Save capabilities for later verification. */ 5925 old_caps = sc->hn_caps; 5926 sc->hn_caps = 0; 5927 5928 /* Clear RSS stuffs. */ 5929 sc->hn_rss_ind_size = 0; 5930 sc->hn_rss_hash = 0; 5931 5932 /* 5933 * Attach the primary channel _before_ attaching NVS and RNDIS. 5934 */ 5935 error = hn_chan_attach(sc, sc->hn_prichan); 5936 if (error) 5937 goto failed; 5938 5939 /* 5940 * Attach NVS. 5941 */ 5942 error = hn_nvs_attach(sc, mtu); 5943 if (error) 5944 goto failed; 5945 attached |= ATTACHED_NVS; 5946 5947 /* 5948 * Attach RNDIS _after_ NVS is attached. 5949 */ 5950 error = hn_rndis_attach(sc, mtu, &rndis_inited); 5951 if (rndis_inited) 5952 attached |= ATTACHED_RNDIS; 5953 if (error) 5954 goto failed; 5955 5956 /* 5957 * Make sure capabilities are not changed. 5958 */ 5959 if (device_is_attached(sc->hn_dev) && old_caps != sc->hn_caps) { 5960 if_printf(sc->hn_ifp, "caps mismatch old 0x%08x, new 0x%08x\n", 5961 old_caps, sc->hn_caps); 5962 error = ENXIO; 5963 goto failed; 5964 } 5965 5966 /* 5967 * Allocate sub-channels for multi-TX/RX rings. 5968 * 5969 * NOTE: 5970 * The # of RX rings that can be used is equivalent to the # of 5971 * channels to be requested. 5972 */ 5973 nsubch = sc->hn_rx_ring_cnt - 1; 5974 error = hn_synth_alloc_subchans(sc, &nsubch); 5975 if (error) 5976 goto failed; 5977 /* NOTE: _Full_ synthetic parts detach is required now. */ 5978 sc->hn_flags |= HN_FLAG_SYNTH_ATTACHED; 5979 5980 /* 5981 * Set the # of TX/RX rings that could be used according to 5982 * the # of channels that NVS offered. 5983 */ 5984 nchan = nsubch + 1; 5985 hn_set_ring_inuse(sc, nchan); 5986 if (nchan == 1) { 5987 /* Only the primary channel can be used; done */ 5988 goto back; 5989 } 5990 5991 /* 5992 * Attach the sub-channels. 5993 * 5994 * NOTE: hn_set_ring_inuse() _must_ have been called. 5995 */ 5996 error = hn_attach_subchans(sc); 5997 if (error) 5998 goto failed; 5999 6000 /* 6001 * Configure RSS key and indirect table _after_ all sub-channels 6002 * are attached. 6003 */ 6004 if ((sc->hn_flags & HN_FLAG_HAS_RSSKEY) == 0) { 6005 /* 6006 * RSS key is not set yet; set it to the default RSS key. 6007 */ 6008 if (bootverbose) 6009 if_printf(sc->hn_ifp, "setup default RSS key\n"); 6010 #ifdef RSS 6011 rss_getkey(rss->rss_key); 6012 #else 6013 memcpy(rss->rss_key, hn_rss_key_default, sizeof(rss->rss_key)); 6014 #endif 6015 sc->hn_flags |= HN_FLAG_HAS_RSSKEY; 6016 } 6017 6018 if ((sc->hn_flags & HN_FLAG_HAS_RSSIND) == 0) { 6019 /* 6020 * RSS indirect table is not set yet; set it up in round- 6021 * robin fashion. 6022 */ 6023 if (bootverbose) { 6024 if_printf(sc->hn_ifp, "setup default RSS indirect " 6025 "table\n"); 6026 } 6027 for (i = 0; i < NDIS_HASH_INDCNT; ++i) { 6028 uint32_t subidx; 6029 6030 #ifdef RSS 6031 subidx = rss_get_indirection_to_bucket(i); 6032 #else 6033 subidx = i; 6034 #endif 6035 rss->rss_ind[i] = subidx % nchan; 6036 } 6037 sc->hn_flags |= HN_FLAG_HAS_RSSIND; 6038 } else { 6039 /* 6040 * # of usable channels may be changed, so we have to 6041 * make sure that all entries in RSS indirect table 6042 * are valid. 6043 * 6044 * NOTE: hn_set_ring_inuse() _must_ have been called. 6045 */ 6046 hn_rss_ind_fixup(sc); 6047 } 6048 6049 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE); 6050 if (error) 6051 goto failed; 6052 back: 6053 /* 6054 * Fixup transmission aggregation setup. 6055 */ 6056 hn_set_txagg(sc); 6057 hn_rndis_init_fixat(sc, nchan); 6058 return (0); 6059 6060 failed: 6061 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) { 6062 hn_rndis_init_fixat(sc, nchan); 6063 hn_synth_detach(sc); 6064 } else { 6065 if (attached & ATTACHED_RNDIS) { 6066 hn_rndis_init_fixat(sc, nchan); 6067 hn_rndis_detach(sc); 6068 } 6069 if (attached & ATTACHED_NVS) 6070 hn_nvs_detach(sc); 6071 hn_chan_detach(sc, sc->hn_prichan); 6072 /* Restore old capabilities. */ 6073 sc->hn_caps = old_caps; 6074 } 6075 return (error); 6076 6077 #undef ATTACHED_RNDIS 6078 #undef ATTACHED_NVS 6079 } 6080 6081 /* 6082 * NOTE: 6083 * The interface must have been suspended though hn_suspend(), before 6084 * this function get called. 6085 */ 6086 static void 6087 hn_synth_detach(struct hn_softc *sc) 6088 { 6089 6090 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, 6091 ("synthetic parts were not attached")); 6092 6093 /* Detach the RNDIS first. */ 6094 hn_rndis_detach(sc); 6095 6096 /* Detach NVS. */ 6097 hn_nvs_detach(sc); 6098 6099 /* Detach all of the channels. */ 6100 hn_detach_allchans(sc); 6101 6102 sc->hn_flags &= ~HN_FLAG_SYNTH_ATTACHED; 6103 } 6104 6105 static void 6106 hn_set_ring_inuse(struct hn_softc *sc, int ring_cnt) 6107 { 6108 KASSERT(ring_cnt > 0 && ring_cnt <= sc->hn_rx_ring_cnt, 6109 ("invalid ring count %d", ring_cnt)); 6110 6111 if (sc->hn_tx_ring_cnt > ring_cnt) 6112 sc->hn_tx_ring_inuse = ring_cnt; 6113 else 6114 sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt; 6115 sc->hn_rx_ring_inuse = ring_cnt; 6116 6117 #ifdef RSS 6118 if (sc->hn_rx_ring_inuse != rss_getnumbuckets()) { 6119 if_printf(sc->hn_ifp, "# of RX rings (%d) does not match " 6120 "# of RSS buckets (%d)\n", sc->hn_rx_ring_inuse, 6121 rss_getnumbuckets()); 6122 } 6123 #endif 6124 6125 if (bootverbose) { 6126 if_printf(sc->hn_ifp, "%d TX ring, %d RX ring\n", 6127 sc->hn_tx_ring_inuse, sc->hn_rx_ring_inuse); 6128 } 6129 } 6130 6131 static void 6132 hn_chan_drain(struct hn_softc *sc, struct vmbus_channel *chan) 6133 { 6134 6135 /* 6136 * NOTE: 6137 * The TX bufring will not be drained by the hypervisor, 6138 * if the primary channel is revoked. 6139 */ 6140 while (!vmbus_chan_rx_empty(chan) || 6141 (!vmbus_chan_is_revoked(sc->hn_prichan) && 6142 !vmbus_chan_tx_empty(chan))) 6143 pause("waitch", 1); 6144 vmbus_chan_intr_drain(chan); 6145 } 6146 6147 static void 6148 hn_disable_rx(struct hn_softc *sc) 6149 { 6150 6151 /* 6152 * Disable RX by clearing RX filter forcefully. 6153 */ 6154 sc->hn_rx_filter = NDIS_PACKET_TYPE_NONE; 6155 hn_rndis_set_rxfilter(sc, sc->hn_rx_filter); /* ignore error */ 6156 6157 /* 6158 * Give RNDIS enough time to flush all pending data packets. 6159 */ 6160 pause("waitrx", (200 * hz) / 1000); 6161 } 6162 6163 /* 6164 * NOTE: 6165 * RX/TX _must_ have been suspended/disabled, before this function 6166 * is called. 6167 */ 6168 static void 6169 hn_drain_rxtx(struct hn_softc *sc, int nchan) 6170 { 6171 struct vmbus_channel **subch = NULL; 6172 int nsubch; 6173 6174 /* 6175 * Drain RX/TX bufrings and interrupts. 6176 */ 6177 nsubch = nchan - 1; 6178 if (nsubch > 0) 6179 subch = vmbus_subchan_get(sc->hn_prichan, nsubch); 6180 6181 if (subch != NULL) { 6182 int i; 6183 6184 for (i = 0; i < nsubch; ++i) 6185 hn_chan_drain(sc, subch[i]); 6186 } 6187 hn_chan_drain(sc, sc->hn_prichan); 6188 6189 if (subch != NULL) 6190 vmbus_subchan_rel(subch, nsubch); 6191 } 6192 6193 static void 6194 hn_suspend_data(struct hn_softc *sc) 6195 { 6196 struct hn_tx_ring *txr; 6197 int i; 6198 6199 HN_LOCK_ASSERT(sc); 6200 6201 /* 6202 * Suspend TX. 6203 */ 6204 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { 6205 txr = &sc->hn_tx_ring[i]; 6206 6207 mtx_lock(&txr->hn_tx_lock); 6208 txr->hn_suspended = 1; 6209 mtx_unlock(&txr->hn_tx_lock); 6210 /* No one is able send more packets now. */ 6211 6212 /* 6213 * Wait for all pending sends to finish. 6214 * 6215 * NOTE: 6216 * We will _not_ receive all pending send-done, if the 6217 * primary channel is revoked. 6218 */ 6219 while (hn_tx_ring_pending(txr) && 6220 !vmbus_chan_is_revoked(sc->hn_prichan)) 6221 pause("hnwtx", 1 /* 1 tick */); 6222 } 6223 6224 /* 6225 * Disable RX. 6226 */ 6227 hn_disable_rx(sc); 6228 6229 /* 6230 * Drain RX/TX. 6231 */ 6232 hn_drain_rxtx(sc, sc->hn_rx_ring_inuse); 6233 6234 /* 6235 * Drain any pending TX tasks. 6236 * 6237 * NOTE: 6238 * The above hn_drain_rxtx() can dispatch TX tasks, so the TX 6239 * tasks will have to be drained _after_ the above hn_drain_rxtx(). 6240 */ 6241 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { 6242 txr = &sc->hn_tx_ring[i]; 6243 6244 taskqueue_drain(txr->hn_tx_taskq, &txr->hn_tx_task); 6245 taskqueue_drain(txr->hn_tx_taskq, &txr->hn_txeof_task); 6246 } 6247 } 6248 6249 static void 6250 hn_suspend_mgmt_taskfunc(void *xsc, int pending __unused) 6251 { 6252 6253 ((struct hn_softc *)xsc)->hn_mgmt_taskq = NULL; 6254 } 6255 6256 static void 6257 hn_suspend_mgmt(struct hn_softc *sc) 6258 { 6259 struct task task; 6260 6261 HN_LOCK_ASSERT(sc); 6262 6263 /* 6264 * Make sure that hn_mgmt_taskq0 can nolonger be accessed 6265 * through hn_mgmt_taskq. 6266 */ 6267 TASK_INIT(&task, 0, hn_suspend_mgmt_taskfunc, sc); 6268 vmbus_chan_run_task(sc->hn_prichan, &task); 6269 6270 /* 6271 * Make sure that all pending management tasks are completed. 6272 */ 6273 taskqueue_drain(sc->hn_mgmt_taskq0, &sc->hn_netchg_init); 6274 taskqueue_drain_timeout(sc->hn_mgmt_taskq0, &sc->hn_netchg_status); 6275 taskqueue_drain_all(sc->hn_mgmt_taskq0); 6276 } 6277 6278 static void 6279 hn_suspend(struct hn_softc *sc) 6280 { 6281 6282 /* Disable polling. */ 6283 hn_polling(sc, 0); 6284 6285 /* 6286 * If the non-transparent mode VF is activated, the synthetic 6287 * device is receiving packets, so the data path of the 6288 * synthetic device must be suspended. 6289 */ 6290 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) || 6291 (sc->hn_flags & HN_FLAG_RXVF)) 6292 hn_suspend_data(sc); 6293 hn_suspend_mgmt(sc); 6294 } 6295 6296 static void 6297 hn_resume_tx(struct hn_softc *sc, int tx_ring_cnt) 6298 { 6299 int i; 6300 6301 KASSERT(tx_ring_cnt <= sc->hn_tx_ring_cnt, 6302 ("invalid TX ring count %d", tx_ring_cnt)); 6303 6304 for (i = 0; i < tx_ring_cnt; ++i) { 6305 struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; 6306 6307 mtx_lock(&txr->hn_tx_lock); 6308 txr->hn_suspended = 0; 6309 mtx_unlock(&txr->hn_tx_lock); 6310 } 6311 } 6312 6313 static void 6314 hn_resume_data(struct hn_softc *sc) 6315 { 6316 int i; 6317 6318 HN_LOCK_ASSERT(sc); 6319 6320 /* 6321 * Re-enable RX. 6322 */ 6323 hn_rxfilter_config(sc); 6324 6325 /* 6326 * Make sure to clear suspend status on "all" TX rings, 6327 * since hn_tx_ring_inuse can be changed after 6328 * hn_suspend_data(). 6329 */ 6330 hn_resume_tx(sc, sc->hn_tx_ring_cnt); 6331 6332 #ifdef HN_IFSTART_SUPPORT 6333 if (!hn_use_if_start) 6334 #endif 6335 { 6336 /* 6337 * Flush unused drbrs, since hn_tx_ring_inuse may be 6338 * reduced. 6339 */ 6340 for (i = sc->hn_tx_ring_inuse; i < sc->hn_tx_ring_cnt; ++i) 6341 hn_tx_ring_qflush(&sc->hn_tx_ring[i]); 6342 } 6343 6344 /* 6345 * Kick start TX. 6346 */ 6347 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { 6348 struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; 6349 6350 /* 6351 * Use txeof task, so that any pending oactive can be 6352 * cleared properly. 6353 */ 6354 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); 6355 } 6356 } 6357 6358 static void 6359 hn_resume_mgmt(struct hn_softc *sc) 6360 { 6361 6362 sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0; 6363 6364 /* 6365 * Kick off network change detection, if it was pending. 6366 * If no network change was pending, start link status 6367 * checks, which is more lightweight than network change 6368 * detection. 6369 */ 6370 if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG) 6371 hn_change_network(sc); 6372 else 6373 hn_update_link_status(sc); 6374 } 6375 6376 static void 6377 hn_resume(struct hn_softc *sc) 6378 { 6379 6380 /* 6381 * If the non-transparent mode VF is activated, the synthetic 6382 * device have to receive packets, so the data path of the 6383 * synthetic device must be resumed. 6384 */ 6385 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) || 6386 (sc->hn_flags & HN_FLAG_RXVF)) 6387 hn_resume_data(sc); 6388 6389 /* 6390 * Don't resume link status change if VF is attached/activated. 6391 * - In the non-transparent VF mode, the synthetic device marks 6392 * link down until the VF is deactivated; i.e. VF is down. 6393 * - In transparent VF mode, VF's media status is used until 6394 * the VF is detached. 6395 */ 6396 if ((sc->hn_flags & HN_FLAG_RXVF) == 0 && 6397 !(hn_xpnt_vf && sc->hn_vf_ifp != NULL)) 6398 hn_resume_mgmt(sc); 6399 6400 /* 6401 * Re-enable polling if this interface is running and 6402 * the polling is requested. 6403 */ 6404 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) && sc->hn_pollhz > 0) 6405 hn_polling(sc, sc->hn_pollhz); 6406 } 6407 6408 static void 6409 hn_rndis_rx_status(struct hn_softc *sc, const void *data, int dlen) 6410 { 6411 const struct rndis_status_msg *msg; 6412 int ofs; 6413 6414 if (dlen < sizeof(*msg)) { 6415 if_printf(sc->hn_ifp, "invalid RNDIS status\n"); 6416 return; 6417 } 6418 msg = data; 6419 6420 switch (msg->rm_status) { 6421 case RNDIS_STATUS_MEDIA_CONNECT: 6422 case RNDIS_STATUS_MEDIA_DISCONNECT: 6423 hn_update_link_status(sc); 6424 break; 6425 6426 case RNDIS_STATUS_TASK_OFFLOAD_CURRENT_CONFIG: 6427 case RNDIS_STATUS_LINK_SPEED_CHANGE: 6428 /* Not really useful; ignore. */ 6429 break; 6430 6431 case RNDIS_STATUS_NETWORK_CHANGE: 6432 ofs = RNDIS_STBUFOFFSET_ABS(msg->rm_stbufoffset); 6433 if (dlen < ofs + msg->rm_stbuflen || 6434 msg->rm_stbuflen < sizeof(uint32_t)) { 6435 if_printf(sc->hn_ifp, "network changed\n"); 6436 } else { 6437 uint32_t change; 6438 6439 memcpy(&change, ((const uint8_t *)msg) + ofs, 6440 sizeof(change)); 6441 if_printf(sc->hn_ifp, "network changed, change %u\n", 6442 change); 6443 } 6444 hn_change_network(sc); 6445 break; 6446 6447 default: 6448 if_printf(sc->hn_ifp, "unknown RNDIS status 0x%08x\n", 6449 msg->rm_status); 6450 break; 6451 } 6452 } 6453 6454 static int 6455 hn_rndis_rxinfo(const void *info_data, int info_dlen, struct hn_rxinfo *info) 6456 { 6457 const struct rndis_pktinfo *pi = info_data; 6458 uint32_t mask = 0; 6459 6460 while (info_dlen != 0) { 6461 const void *data; 6462 uint32_t dlen; 6463 6464 if (__predict_false(info_dlen < sizeof(*pi))) 6465 return (EINVAL); 6466 if (__predict_false(info_dlen < pi->rm_size)) 6467 return (EINVAL); 6468 info_dlen -= pi->rm_size; 6469 6470 if (__predict_false(pi->rm_size & RNDIS_PKTINFO_SIZE_ALIGNMASK)) 6471 return (EINVAL); 6472 if (__predict_false(pi->rm_size < pi->rm_pktinfooffset)) 6473 return (EINVAL); 6474 dlen = pi->rm_size - pi->rm_pktinfooffset; 6475 data = pi->rm_data; 6476 6477 switch (pi->rm_type) { 6478 case NDIS_PKTINFO_TYPE_VLAN: 6479 if (__predict_false(dlen < NDIS_VLAN_INFO_SIZE)) 6480 return (EINVAL); 6481 info->vlan_info = *((const uint32_t *)data); 6482 mask |= HN_RXINFO_VLAN; 6483 break; 6484 6485 case NDIS_PKTINFO_TYPE_CSUM: 6486 if (__predict_false(dlen < NDIS_RXCSUM_INFO_SIZE)) 6487 return (EINVAL); 6488 info->csum_info = *((const uint32_t *)data); 6489 mask |= HN_RXINFO_CSUM; 6490 break; 6491 6492 case HN_NDIS_PKTINFO_TYPE_HASHVAL: 6493 if (__predict_false(dlen < HN_NDIS_HASH_VALUE_SIZE)) 6494 return (EINVAL); 6495 info->hash_value = *((const uint32_t *)data); 6496 mask |= HN_RXINFO_HASHVAL; 6497 break; 6498 6499 case HN_NDIS_PKTINFO_TYPE_HASHINF: 6500 if (__predict_false(dlen < HN_NDIS_HASH_INFO_SIZE)) 6501 return (EINVAL); 6502 info->hash_info = *((const uint32_t *)data); 6503 mask |= HN_RXINFO_HASHINF; 6504 break; 6505 6506 default: 6507 goto next; 6508 } 6509 6510 if (mask == HN_RXINFO_ALL) { 6511 /* All found; done */ 6512 break; 6513 } 6514 next: 6515 pi = (const struct rndis_pktinfo *) 6516 ((const uint8_t *)pi + pi->rm_size); 6517 } 6518 6519 /* 6520 * Final fixup. 6521 * - If there is no hash value, invalidate the hash info. 6522 */ 6523 if ((mask & HN_RXINFO_HASHVAL) == 0) 6524 info->hash_info = HN_NDIS_HASH_INFO_INVALID; 6525 return (0); 6526 } 6527 6528 static __inline bool 6529 hn_rndis_check_overlap(int off, int len, int check_off, int check_len) 6530 { 6531 6532 if (off < check_off) { 6533 if (__predict_true(off + len <= check_off)) 6534 return (false); 6535 } else if (off > check_off) { 6536 if (__predict_true(check_off + check_len <= off)) 6537 return (false); 6538 } 6539 return (true); 6540 } 6541 6542 static void 6543 hn_rndis_rx_data(struct hn_rx_ring *rxr, const void *data, int dlen) 6544 { 6545 const struct rndis_packet_msg *pkt; 6546 struct hn_rxinfo info; 6547 int data_off, pktinfo_off, data_len, pktinfo_len; 6548 6549 /* 6550 * Check length. 6551 */ 6552 if (__predict_false(dlen < sizeof(*pkt))) { 6553 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg\n"); 6554 return; 6555 } 6556 pkt = data; 6557 6558 if (__predict_false(dlen < pkt->rm_len)) { 6559 if_printf(rxr->hn_ifp, "truncated RNDIS packet msg, " 6560 "dlen %d, msglen %u\n", dlen, pkt->rm_len); 6561 return; 6562 } 6563 if (__predict_false(pkt->rm_len < 6564 pkt->rm_datalen + pkt->rm_oobdatalen + pkt->rm_pktinfolen)) { 6565 if_printf(rxr->hn_ifp, "invalid RNDIS packet msglen, " 6566 "msglen %u, data %u, oob %u, pktinfo %u\n", 6567 pkt->rm_len, pkt->rm_datalen, pkt->rm_oobdatalen, 6568 pkt->rm_pktinfolen); 6569 return; 6570 } 6571 if (__predict_false(pkt->rm_datalen == 0)) { 6572 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, no data\n"); 6573 return; 6574 } 6575 6576 /* 6577 * Check offests. 6578 */ 6579 #define IS_OFFSET_INVALID(ofs) \ 6580 ((ofs) < RNDIS_PACKET_MSG_OFFSET_MIN || \ 6581 ((ofs) & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK)) 6582 6583 /* XXX Hyper-V does not meet data offset alignment requirement */ 6584 if (__predict_false(pkt->rm_dataoffset < RNDIS_PACKET_MSG_OFFSET_MIN)) { 6585 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 6586 "data offset %u\n", pkt->rm_dataoffset); 6587 return; 6588 } 6589 if (__predict_false(pkt->rm_oobdataoffset > 0 && 6590 IS_OFFSET_INVALID(pkt->rm_oobdataoffset))) { 6591 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 6592 "oob offset %u\n", pkt->rm_oobdataoffset); 6593 return; 6594 } 6595 if (__predict_true(pkt->rm_pktinfooffset > 0) && 6596 __predict_false(IS_OFFSET_INVALID(pkt->rm_pktinfooffset))) { 6597 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 6598 "pktinfo offset %u\n", pkt->rm_pktinfooffset); 6599 return; 6600 } 6601 6602 #undef IS_OFFSET_INVALID 6603 6604 data_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_dataoffset); 6605 data_len = pkt->rm_datalen; 6606 pktinfo_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_pktinfooffset); 6607 pktinfo_len = pkt->rm_pktinfolen; 6608 6609 /* 6610 * Check OOB coverage. 6611 */ 6612 if (__predict_false(pkt->rm_oobdatalen != 0)) { 6613 int oob_off, oob_len; 6614 6615 if_printf(rxr->hn_ifp, "got oobdata\n"); 6616 oob_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_oobdataoffset); 6617 oob_len = pkt->rm_oobdatalen; 6618 6619 if (__predict_false(oob_off + oob_len > pkt->rm_len)) { 6620 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 6621 "oob overflow, msglen %u, oob abs %d len %d\n", 6622 pkt->rm_len, oob_off, oob_len); 6623 return; 6624 } 6625 6626 /* 6627 * Check against data. 6628 */ 6629 if (hn_rndis_check_overlap(oob_off, oob_len, 6630 data_off, data_len)) { 6631 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 6632 "oob overlaps data, oob abs %d len %d, " 6633 "data abs %d len %d\n", 6634 oob_off, oob_len, data_off, data_len); 6635 return; 6636 } 6637 6638 /* 6639 * Check against pktinfo. 6640 */ 6641 if (pktinfo_len != 0 && 6642 hn_rndis_check_overlap(oob_off, oob_len, 6643 pktinfo_off, pktinfo_len)) { 6644 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 6645 "oob overlaps pktinfo, oob abs %d len %d, " 6646 "pktinfo abs %d len %d\n", 6647 oob_off, oob_len, pktinfo_off, pktinfo_len); 6648 return; 6649 } 6650 } 6651 6652 /* 6653 * Check per-packet-info coverage and find useful per-packet-info. 6654 */ 6655 info.vlan_info = HN_NDIS_VLAN_INFO_INVALID; 6656 info.csum_info = HN_NDIS_RXCSUM_INFO_INVALID; 6657 info.hash_info = HN_NDIS_HASH_INFO_INVALID; 6658 if (__predict_true(pktinfo_len != 0)) { 6659 bool overlap; 6660 int error; 6661 6662 if (__predict_false(pktinfo_off + pktinfo_len > pkt->rm_len)) { 6663 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 6664 "pktinfo overflow, msglen %u, " 6665 "pktinfo abs %d len %d\n", 6666 pkt->rm_len, pktinfo_off, pktinfo_len); 6667 return; 6668 } 6669 6670 /* 6671 * Check packet info coverage. 6672 */ 6673 overlap = hn_rndis_check_overlap(pktinfo_off, pktinfo_len, 6674 data_off, data_len); 6675 if (__predict_false(overlap)) { 6676 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 6677 "pktinfo overlap data, pktinfo abs %d len %d, " 6678 "data abs %d len %d\n", 6679 pktinfo_off, pktinfo_len, data_off, data_len); 6680 return; 6681 } 6682 6683 /* 6684 * Find useful per-packet-info. 6685 */ 6686 error = hn_rndis_rxinfo(((const uint8_t *)pkt) + pktinfo_off, 6687 pktinfo_len, &info); 6688 if (__predict_false(error)) { 6689 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg " 6690 "pktinfo\n"); 6691 return; 6692 } 6693 } 6694 6695 if (__predict_false(data_off + data_len > pkt->rm_len)) { 6696 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 6697 "data overflow, msglen %u, data abs %d len %d\n", 6698 pkt->rm_len, data_off, data_len); 6699 return; 6700 } 6701 hn_rxpkt(rxr, ((const uint8_t *)pkt) + data_off, data_len, &info); 6702 } 6703 6704 static __inline void 6705 hn_rndis_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen) 6706 { 6707 const struct rndis_msghdr *hdr; 6708 6709 if (__predict_false(dlen < sizeof(*hdr))) { 6710 if_printf(rxr->hn_ifp, "invalid RNDIS msg\n"); 6711 return; 6712 } 6713 hdr = data; 6714 6715 if (__predict_true(hdr->rm_type == REMOTE_NDIS_PACKET_MSG)) { 6716 /* Hot data path. */ 6717 hn_rndis_rx_data(rxr, data, dlen); 6718 /* Done! */ 6719 return; 6720 } 6721 6722 if (hdr->rm_type == REMOTE_NDIS_INDICATE_STATUS_MSG) 6723 hn_rndis_rx_status(rxr->hn_ifp->if_softc, data, dlen); 6724 else 6725 hn_rndis_rx_ctrl(rxr->hn_ifp->if_softc, data, dlen); 6726 } 6727 6728 static void 6729 hn_nvs_handle_notify(struct hn_softc *sc, const struct vmbus_chanpkt_hdr *pkt) 6730 { 6731 const struct hn_nvs_hdr *hdr; 6732 6733 if (VMBUS_CHANPKT_DATALEN(pkt) < sizeof(*hdr)) { 6734 if_printf(sc->hn_ifp, "invalid nvs notify\n"); 6735 return; 6736 } 6737 hdr = VMBUS_CHANPKT_CONST_DATA(pkt); 6738 6739 if (hdr->nvs_type == HN_NVS_TYPE_TXTBL_NOTE) { 6740 /* Useless; ignore */ 6741 return; 6742 } 6743 if_printf(sc->hn_ifp, "got notify, nvs type %u\n", hdr->nvs_type); 6744 } 6745 6746 static void 6747 hn_nvs_handle_comp(struct hn_softc *sc, struct vmbus_channel *chan, 6748 const struct vmbus_chanpkt_hdr *pkt) 6749 { 6750 struct hn_nvs_sendctx *sndc; 6751 6752 sndc = (struct hn_nvs_sendctx *)(uintptr_t)pkt->cph_xactid; 6753 sndc->hn_cb(sndc, sc, chan, VMBUS_CHANPKT_CONST_DATA(pkt), 6754 VMBUS_CHANPKT_DATALEN(pkt)); 6755 /* 6756 * NOTE: 6757 * 'sndc' CAN NOT be accessed anymore, since it can be freed by 6758 * its callback. 6759 */ 6760 } 6761 6762 static void 6763 hn_nvs_handle_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan, 6764 const struct vmbus_chanpkt_hdr *pkthdr) 6765 { 6766 const struct vmbus_chanpkt_rxbuf *pkt; 6767 const struct hn_nvs_hdr *nvs_hdr; 6768 int count, i, hlen; 6769 6770 if (__predict_false(VMBUS_CHANPKT_DATALEN(pkthdr) < sizeof(*nvs_hdr))) { 6771 if_printf(rxr->hn_ifp, "invalid nvs RNDIS\n"); 6772 return; 6773 } 6774 nvs_hdr = VMBUS_CHANPKT_CONST_DATA(pkthdr); 6775 6776 /* Make sure that this is a RNDIS message. */ 6777 if (__predict_false(nvs_hdr->nvs_type != HN_NVS_TYPE_RNDIS)) { 6778 if_printf(rxr->hn_ifp, "nvs type %u, not RNDIS\n", 6779 nvs_hdr->nvs_type); 6780 return; 6781 } 6782 6783 hlen = VMBUS_CHANPKT_GETLEN(pkthdr->cph_hlen); 6784 if (__predict_false(hlen < sizeof(*pkt))) { 6785 if_printf(rxr->hn_ifp, "invalid rxbuf chanpkt\n"); 6786 return; 6787 } 6788 pkt = (const struct vmbus_chanpkt_rxbuf *)pkthdr; 6789 6790 if (__predict_false(pkt->cp_rxbuf_id != HN_NVS_RXBUF_SIG)) { 6791 if_printf(rxr->hn_ifp, "invalid rxbuf_id 0x%08x\n", 6792 pkt->cp_rxbuf_id); 6793 return; 6794 } 6795 6796 count = pkt->cp_rxbuf_cnt; 6797 if (__predict_false(hlen < 6798 __offsetof(struct vmbus_chanpkt_rxbuf, cp_rxbuf[count]))) { 6799 if_printf(rxr->hn_ifp, "invalid rxbuf_cnt %d\n", count); 6800 return; 6801 } 6802 6803 /* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */ 6804 for (i = 0; i < count; ++i) { 6805 int ofs, len; 6806 6807 ofs = pkt->cp_rxbuf[i].rb_ofs; 6808 len = pkt->cp_rxbuf[i].rb_len; 6809 if (__predict_false(ofs + len > HN_RXBUF_SIZE)) { 6810 if_printf(rxr->hn_ifp, "%dth RNDIS msg overflow rxbuf, " 6811 "ofs %d, len %d\n", i, ofs, len); 6812 continue; 6813 } 6814 hn_rndis_rxpkt(rxr, rxr->hn_rxbuf + ofs, len); 6815 } 6816 6817 /* 6818 * Ack the consumed RXBUF associated w/ this channel packet, 6819 * so that this RXBUF can be recycled by the hypervisor. 6820 */ 6821 hn_nvs_ack_rxbuf(rxr, chan, pkt->cp_hdr.cph_xactid); 6822 } 6823 6824 static void 6825 hn_nvs_ack_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan, 6826 uint64_t tid) 6827 { 6828 struct hn_nvs_rndis_ack ack; 6829 int retries, error; 6830 6831 ack.nvs_type = HN_NVS_TYPE_RNDIS_ACK; 6832 ack.nvs_status = HN_NVS_STATUS_OK; 6833 6834 retries = 0; 6835 again: 6836 error = vmbus_chan_send(chan, VMBUS_CHANPKT_TYPE_COMP, 6837 VMBUS_CHANPKT_FLAG_NONE, &ack, sizeof(ack), tid); 6838 if (__predict_false(error == EAGAIN)) { 6839 /* 6840 * NOTE: 6841 * This should _not_ happen in real world, since the 6842 * consumption of the TX bufring from the TX path is 6843 * controlled. 6844 */ 6845 if (rxr->hn_ack_failed == 0) 6846 if_printf(rxr->hn_ifp, "RXBUF ack retry\n"); 6847 rxr->hn_ack_failed++; 6848 retries++; 6849 if (retries < 10) { 6850 DELAY(100); 6851 goto again; 6852 } 6853 /* RXBUF leaks! */ 6854 if_printf(rxr->hn_ifp, "RXBUF ack failed\n"); 6855 } 6856 } 6857 6858 static void 6859 hn_chan_callback(struct vmbus_channel *chan, void *xrxr) 6860 { 6861 struct hn_rx_ring *rxr = xrxr; 6862 struct hn_softc *sc = rxr->hn_ifp->if_softc; 6863 6864 for (;;) { 6865 struct vmbus_chanpkt_hdr *pkt = rxr->hn_pktbuf; 6866 int error, pktlen; 6867 6868 pktlen = rxr->hn_pktbuf_len; 6869 error = vmbus_chan_recv_pkt(chan, pkt, &pktlen); 6870 if (__predict_false(error == ENOBUFS)) { 6871 void *nbuf; 6872 int nlen; 6873 6874 /* 6875 * Expand channel packet buffer. 6876 * 6877 * XXX 6878 * Use M_WAITOK here, since allocation failure 6879 * is fatal. 6880 */ 6881 nlen = rxr->hn_pktbuf_len * 2; 6882 while (nlen < pktlen) 6883 nlen *= 2; 6884 nbuf = malloc(nlen, M_DEVBUF, M_WAITOK); 6885 6886 if_printf(rxr->hn_ifp, "expand pktbuf %d -> %d\n", 6887 rxr->hn_pktbuf_len, nlen); 6888 6889 free(rxr->hn_pktbuf, M_DEVBUF); 6890 rxr->hn_pktbuf = nbuf; 6891 rxr->hn_pktbuf_len = nlen; 6892 /* Retry! */ 6893 continue; 6894 } else if (__predict_false(error == EAGAIN)) { 6895 /* No more channel packets; done! */ 6896 break; 6897 } 6898 KASSERT(!error, ("vmbus_chan_recv_pkt failed: %d", error)); 6899 6900 switch (pkt->cph_type) { 6901 case VMBUS_CHANPKT_TYPE_COMP: 6902 hn_nvs_handle_comp(sc, chan, pkt); 6903 break; 6904 6905 case VMBUS_CHANPKT_TYPE_RXBUF: 6906 hn_nvs_handle_rxbuf(rxr, chan, pkt); 6907 break; 6908 6909 case VMBUS_CHANPKT_TYPE_INBAND: 6910 hn_nvs_handle_notify(sc, pkt); 6911 break; 6912 6913 default: 6914 if_printf(rxr->hn_ifp, "unknown chan pkt %u\n", 6915 pkt->cph_type); 6916 break; 6917 } 6918 } 6919 hn_chan_rollup(rxr, rxr->hn_txr); 6920 } 6921 6922 static void 6923 hn_sysinit(void *arg __unused) 6924 { 6925 int i; 6926 6927 #ifdef HN_IFSTART_SUPPORT 6928 /* 6929 * Don't use ifnet.if_start if transparent VF mode is requested; 6930 * mainly due to the IFF_DRV_OACTIVE flag. 6931 */ 6932 if (hn_xpnt_vf && hn_use_if_start) { 6933 hn_use_if_start = 0; 6934 printf("hn: tranparent VF mode, if_transmit will be used, " 6935 "instead of if_start\n"); 6936 } 6937 #endif 6938 if (hn_xpnt_vf_attwait < HN_XPNT_VF_ATTWAIT_MIN) { 6939 printf("hn: invalid transparent VF attach routing " 6940 "wait timeout %d, reset to %d\n", 6941 hn_xpnt_vf_attwait, HN_XPNT_VF_ATTWAIT_MIN); 6942 hn_xpnt_vf_attwait = HN_XPNT_VF_ATTWAIT_MIN; 6943 } 6944 6945 /* 6946 * Initialize VF map. 6947 */ 6948 rm_init_flags(&hn_vfmap_lock, "hn_vfmap", RM_SLEEPABLE); 6949 hn_vfmap_size = HN_VFMAP_SIZE_DEF; 6950 hn_vfmap = malloc(sizeof(struct ifnet *) * hn_vfmap_size, M_DEVBUF, 6951 M_WAITOK | M_ZERO); 6952 6953 /* 6954 * Fix the # of TX taskqueues. 6955 */ 6956 if (hn_tx_taskq_cnt <= 0) 6957 hn_tx_taskq_cnt = 1; 6958 else if (hn_tx_taskq_cnt > mp_ncpus) 6959 hn_tx_taskq_cnt = mp_ncpus; 6960 6961 /* 6962 * Fix the TX taskqueue mode. 6963 */ 6964 switch (hn_tx_taskq_mode) { 6965 case HN_TX_TASKQ_M_INDEP: 6966 case HN_TX_TASKQ_M_GLOBAL: 6967 case HN_TX_TASKQ_M_EVTTQ: 6968 break; 6969 default: 6970 hn_tx_taskq_mode = HN_TX_TASKQ_M_INDEP; 6971 break; 6972 } 6973 6974 if (vm_guest != VM_GUEST_HV) 6975 return; 6976 6977 if (hn_tx_taskq_mode != HN_TX_TASKQ_M_GLOBAL) 6978 return; 6979 6980 hn_tx_taskque = malloc(hn_tx_taskq_cnt * sizeof(struct taskqueue *), 6981 M_DEVBUF, M_WAITOK); 6982 for (i = 0; i < hn_tx_taskq_cnt; ++i) { 6983 hn_tx_taskque[i] = taskqueue_create("hn_tx", M_WAITOK, 6984 taskqueue_thread_enqueue, &hn_tx_taskque[i]); 6985 taskqueue_start_threads(&hn_tx_taskque[i], 1, PI_NET, 6986 "hn tx%d", i); 6987 } 6988 } 6989 SYSINIT(hn_sysinit, SI_SUB_DRIVERS, SI_ORDER_SECOND, hn_sysinit, NULL); 6990 6991 static void 6992 hn_sysuninit(void *arg __unused) 6993 { 6994 6995 if (hn_tx_taskque != NULL) { 6996 int i; 6997 6998 for (i = 0; i < hn_tx_taskq_cnt; ++i) 6999 taskqueue_free(hn_tx_taskque[i]); 7000 free(hn_tx_taskque, M_DEVBUF); 7001 } 7002 7003 if (hn_vfmap != NULL) 7004 free(hn_vfmap, M_DEVBUF); 7005 rm_destroy(&hn_vfmap_lock); 7006 } 7007 SYSUNINIT(hn_sysuninit, SI_SUB_DRIVERS, SI_ORDER_SECOND, hn_sysuninit, NULL); 7008