1 /*- 2 * Copyright (c) 2010-2012 Citrix Inc. 3 * Copyright (c) 2009-2012,2016-2017 Microsoft Corp. 4 * Copyright (c) 2012 NetApp Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /*- 30 * Copyright (c) 2004-2006 Kip Macy 31 * All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 45 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 52 * SUCH DAMAGE. 53 */ 54 55 #include <sys/cdefs.h> 56 __FBSDID("$FreeBSD$"); 57 58 #include "opt_hn.h" 59 #include "opt_inet6.h" 60 #include "opt_inet.h" 61 #include "opt_rss.h" 62 63 #include <sys/param.h> 64 #include <sys/systm.h> 65 #include <sys/bus.h> 66 #include <sys/counter.h> 67 #include <sys/kernel.h> 68 #include <sys/limits.h> 69 #include <sys/malloc.h> 70 #include <sys/mbuf.h> 71 #include <sys/module.h> 72 #include <sys/queue.h> 73 #include <sys/lock.h> 74 #include <sys/rmlock.h> 75 #include <sys/sbuf.h> 76 #include <sys/smp.h> 77 #include <sys/socket.h> 78 #include <sys/sockio.h> 79 #include <sys/sx.h> 80 #include <sys/sysctl.h> 81 #include <sys/taskqueue.h> 82 #include <sys/buf_ring.h> 83 #include <sys/eventhandler.h> 84 85 #include <machine/atomic.h> 86 #include <machine/in_cksum.h> 87 88 #include <net/bpf.h> 89 #include <net/ethernet.h> 90 #include <net/if.h> 91 #include <net/if_dl.h> 92 #include <net/if_media.h> 93 #include <net/if_types.h> 94 #include <net/if_var.h> 95 #include <net/rndis.h> 96 #ifdef RSS 97 #include <net/rss_config.h> 98 #endif 99 100 #include <netinet/in_systm.h> 101 #include <netinet/in.h> 102 #include <netinet/ip.h> 103 #include <netinet/ip6.h> 104 #include <netinet/tcp.h> 105 #include <netinet/tcp_lro.h> 106 #include <netinet/udp.h> 107 108 #include <dev/hyperv/include/hyperv.h> 109 #include <dev/hyperv/include/hyperv_busdma.h> 110 #include <dev/hyperv/include/vmbus.h> 111 #include <dev/hyperv/include/vmbus_xact.h> 112 113 #include <dev/hyperv/netvsc/ndis.h> 114 #include <dev/hyperv/netvsc/if_hnreg.h> 115 #include <dev/hyperv/netvsc/if_hnvar.h> 116 #include <dev/hyperv/netvsc/hn_nvs.h> 117 #include <dev/hyperv/netvsc/hn_rndis.h> 118 119 #include "vmbus_if.h" 120 121 #define HN_IFSTART_SUPPORT 122 123 #define HN_RING_CNT_DEF_MAX 8 124 125 #define HN_VFMAP_SIZE_DEF 8 126 127 #define HN_XPNT_VF_ATTWAIT_MIN 2 /* seconds */ 128 129 /* YYY should get it from the underlying channel */ 130 #define HN_TX_DESC_CNT 512 131 132 #define HN_RNDIS_PKT_LEN \ 133 (sizeof(struct rndis_packet_msg) + \ 134 HN_RNDIS_PKTINFO_SIZE(HN_NDIS_HASH_VALUE_SIZE) + \ 135 HN_RNDIS_PKTINFO_SIZE(NDIS_VLAN_INFO_SIZE) + \ 136 HN_RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) + \ 137 HN_RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE)) 138 #define HN_RNDIS_PKT_BOUNDARY PAGE_SIZE 139 #define HN_RNDIS_PKT_ALIGN CACHE_LINE_SIZE 140 141 #define HN_TX_DATA_BOUNDARY PAGE_SIZE 142 #define HN_TX_DATA_MAXSIZE IP_MAXPACKET 143 #define HN_TX_DATA_SEGSIZE PAGE_SIZE 144 /* -1 for RNDIS packet message */ 145 #define HN_TX_DATA_SEGCNT_MAX (HN_GPACNT_MAX - 1) 146 147 #define HN_DIRECT_TX_SIZE_DEF 128 148 149 #define HN_EARLY_TXEOF_THRESH 8 150 151 #define HN_PKTBUF_LEN_DEF (16 * 1024) 152 153 #define HN_LROENT_CNT_DEF 128 154 155 #define HN_LRO_LENLIM_MULTIRX_DEF (12 * ETHERMTU) 156 #define HN_LRO_LENLIM_DEF (25 * ETHERMTU) 157 /* YYY 2*MTU is a bit rough, but should be good enough. */ 158 #define HN_LRO_LENLIM_MIN(ifp) (2 * (ifp)->if_mtu) 159 160 #define HN_LRO_ACKCNT_DEF 1 161 162 #define HN_LOCK_INIT(sc) \ 163 sx_init(&(sc)->hn_lock, device_get_nameunit((sc)->hn_dev)) 164 #define HN_LOCK_DESTROY(sc) sx_destroy(&(sc)->hn_lock) 165 #define HN_LOCK_ASSERT(sc) sx_assert(&(sc)->hn_lock, SA_XLOCKED) 166 #define HN_LOCK(sc) \ 167 do { \ 168 while (sx_try_xlock(&(sc)->hn_lock) == 0) \ 169 DELAY(1000); \ 170 } while (0) 171 #define HN_UNLOCK(sc) sx_xunlock(&(sc)->hn_lock) 172 173 #define HN_CSUM_IP_MASK (CSUM_IP | CSUM_IP_TCP | CSUM_IP_UDP) 174 #define HN_CSUM_IP6_MASK (CSUM_IP6_TCP | CSUM_IP6_UDP) 175 #define HN_CSUM_IP_HWASSIST(sc) \ 176 ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP_MASK) 177 #define HN_CSUM_IP6_HWASSIST(sc) \ 178 ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP6_MASK) 179 180 #define HN_PKTSIZE_MIN(align) \ 181 roundup2(ETHER_MIN_LEN + ETHER_VLAN_ENCAP_LEN - ETHER_CRC_LEN + \ 182 HN_RNDIS_PKT_LEN, (align)) 183 #define HN_PKTSIZE(m, align) \ 184 roundup2((m)->m_pkthdr.len + HN_RNDIS_PKT_LEN, (align)) 185 186 #ifdef RSS 187 #define HN_RING_IDX2CPU(sc, idx) rss_getcpu((idx) % rss_getnumbuckets()) 188 #else 189 #define HN_RING_IDX2CPU(sc, idx) (((sc)->hn_cpu + (idx)) % mp_ncpus) 190 #endif 191 192 struct hn_txdesc { 193 #ifndef HN_USE_TXDESC_BUFRING 194 SLIST_ENTRY(hn_txdesc) link; 195 #endif 196 STAILQ_ENTRY(hn_txdesc) agg_link; 197 198 /* Aggregated txdescs, in sending order. */ 199 STAILQ_HEAD(, hn_txdesc) agg_list; 200 201 /* The oldest packet, if transmission aggregation happens. */ 202 struct mbuf *m; 203 struct hn_tx_ring *txr; 204 int refs; 205 uint32_t flags; /* HN_TXD_FLAG_ */ 206 struct hn_nvs_sendctx send_ctx; 207 uint32_t chim_index; 208 int chim_size; 209 210 bus_dmamap_t data_dmap; 211 212 bus_addr_t rndis_pkt_paddr; 213 struct rndis_packet_msg *rndis_pkt; 214 bus_dmamap_t rndis_pkt_dmap; 215 }; 216 217 #define HN_TXD_FLAG_ONLIST 0x0001 218 #define HN_TXD_FLAG_DMAMAP 0x0002 219 #define HN_TXD_FLAG_ONAGG 0x0004 220 221 struct hn_rxinfo { 222 uint32_t vlan_info; 223 uint32_t csum_info; 224 uint32_t hash_info; 225 uint32_t hash_value; 226 }; 227 228 struct hn_rxvf_setarg { 229 struct hn_rx_ring *rxr; 230 struct ifnet *vf_ifp; 231 }; 232 233 #define HN_RXINFO_VLAN 0x0001 234 #define HN_RXINFO_CSUM 0x0002 235 #define HN_RXINFO_HASHINF 0x0004 236 #define HN_RXINFO_HASHVAL 0x0008 237 #define HN_RXINFO_ALL \ 238 (HN_RXINFO_VLAN | \ 239 HN_RXINFO_CSUM | \ 240 HN_RXINFO_HASHINF | \ 241 HN_RXINFO_HASHVAL) 242 243 #define HN_NDIS_VLAN_INFO_INVALID 0xffffffff 244 #define HN_NDIS_RXCSUM_INFO_INVALID 0 245 #define HN_NDIS_HASH_INFO_INVALID 0 246 247 static int hn_probe(device_t); 248 static int hn_attach(device_t); 249 static int hn_detach(device_t); 250 static int hn_shutdown(device_t); 251 static void hn_chan_callback(struct vmbus_channel *, 252 void *); 253 254 static void hn_init(void *); 255 static int hn_ioctl(struct ifnet *, u_long, caddr_t); 256 #ifdef HN_IFSTART_SUPPORT 257 static void hn_start(struct ifnet *); 258 #endif 259 static int hn_transmit(struct ifnet *, struct mbuf *); 260 static void hn_xmit_qflush(struct ifnet *); 261 static int hn_ifmedia_upd(struct ifnet *); 262 static void hn_ifmedia_sts(struct ifnet *, 263 struct ifmediareq *); 264 265 static void hn_ifnet_event(void *, struct ifnet *, int); 266 static void hn_ifaddr_event(void *, struct ifnet *); 267 static void hn_ifnet_attevent(void *, struct ifnet *); 268 static void hn_ifnet_detevent(void *, struct ifnet *); 269 static void hn_ifnet_lnkevent(void *, struct ifnet *, int); 270 271 static bool hn_ismyvf(const struct hn_softc *, 272 const struct ifnet *); 273 static void hn_rxvf_change(struct hn_softc *, 274 struct ifnet *, bool); 275 static void hn_rxvf_set(struct hn_softc *, struct ifnet *); 276 static void hn_rxvf_set_task(void *, int); 277 static void hn_xpnt_vf_input(struct ifnet *, struct mbuf *); 278 static int hn_xpnt_vf_iocsetflags(struct hn_softc *); 279 static int hn_xpnt_vf_iocsetcaps(struct hn_softc *, 280 struct ifreq *); 281 static void hn_xpnt_vf_saveifflags(struct hn_softc *); 282 static bool hn_xpnt_vf_isready(struct hn_softc *); 283 static void hn_xpnt_vf_setready(struct hn_softc *); 284 static void hn_xpnt_vf_init_taskfunc(void *, int); 285 static void hn_xpnt_vf_init(struct hn_softc *); 286 static void hn_xpnt_vf_setenable(struct hn_softc *); 287 static void hn_xpnt_vf_setdisable(struct hn_softc *, bool); 288 static void hn_vf_rss_fixup(struct hn_softc *, bool); 289 static void hn_vf_rss_restore(struct hn_softc *); 290 291 static int hn_rndis_rxinfo(const void *, int, 292 struct hn_rxinfo *); 293 static void hn_rndis_rx_data(struct hn_rx_ring *, 294 const void *, int); 295 static void hn_rndis_rx_status(struct hn_softc *, 296 const void *, int); 297 static void hn_rndis_init_fixat(struct hn_softc *, int); 298 299 static void hn_nvs_handle_notify(struct hn_softc *, 300 const struct vmbus_chanpkt_hdr *); 301 static void hn_nvs_handle_comp(struct hn_softc *, 302 struct vmbus_channel *, 303 const struct vmbus_chanpkt_hdr *); 304 static void hn_nvs_handle_rxbuf(struct hn_rx_ring *, 305 struct vmbus_channel *, 306 const struct vmbus_chanpkt_hdr *); 307 static void hn_nvs_ack_rxbuf(struct hn_rx_ring *, 308 struct vmbus_channel *, uint64_t); 309 310 #if __FreeBSD_version >= 1100099 311 static int hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS); 312 static int hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS); 313 #endif 314 static int hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS); 315 static int hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS); 316 #if __FreeBSD_version < 1100095 317 static int hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS); 318 #else 319 static int hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS); 320 #endif 321 static int hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS); 322 static int hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS); 323 static int hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS); 324 static int hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS); 325 static int hn_caps_sysctl(SYSCTL_HANDLER_ARGS); 326 static int hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS); 327 static int hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS); 328 #ifndef RSS 329 static int hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS); 330 static int hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS); 331 #endif 332 static int hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS); 333 static int hn_rss_hcap_sysctl(SYSCTL_HANDLER_ARGS); 334 static int hn_rss_mbuf_sysctl(SYSCTL_HANDLER_ARGS); 335 static int hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS); 336 static int hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS); 337 static int hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS); 338 static int hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS); 339 static int hn_polling_sysctl(SYSCTL_HANDLER_ARGS); 340 static int hn_vf_sysctl(SYSCTL_HANDLER_ARGS); 341 static int hn_rxvf_sysctl(SYSCTL_HANDLER_ARGS); 342 static int hn_vflist_sysctl(SYSCTL_HANDLER_ARGS); 343 static int hn_vfmap_sysctl(SYSCTL_HANDLER_ARGS); 344 static int hn_xpnt_vf_accbpf_sysctl(SYSCTL_HANDLER_ARGS); 345 static int hn_xpnt_vf_enabled_sysctl(SYSCTL_HANDLER_ARGS); 346 347 static void hn_stop(struct hn_softc *, bool); 348 static void hn_init_locked(struct hn_softc *); 349 static int hn_chan_attach(struct hn_softc *, 350 struct vmbus_channel *); 351 static void hn_chan_detach(struct hn_softc *, 352 struct vmbus_channel *); 353 static int hn_attach_subchans(struct hn_softc *); 354 static void hn_detach_allchans(struct hn_softc *); 355 static void hn_chan_rollup(struct hn_rx_ring *, 356 struct hn_tx_ring *); 357 static void hn_set_ring_inuse(struct hn_softc *, int); 358 static int hn_synth_attach(struct hn_softc *, int); 359 static void hn_synth_detach(struct hn_softc *); 360 static int hn_synth_alloc_subchans(struct hn_softc *, 361 int *); 362 static bool hn_synth_attachable(const struct hn_softc *); 363 static void hn_suspend(struct hn_softc *); 364 static void hn_suspend_data(struct hn_softc *); 365 static void hn_suspend_mgmt(struct hn_softc *); 366 static void hn_resume(struct hn_softc *); 367 static void hn_resume_data(struct hn_softc *); 368 static void hn_resume_mgmt(struct hn_softc *); 369 static void hn_suspend_mgmt_taskfunc(void *, int); 370 static void hn_chan_drain(struct hn_softc *, 371 struct vmbus_channel *); 372 static void hn_disable_rx(struct hn_softc *); 373 static void hn_drain_rxtx(struct hn_softc *, int); 374 static void hn_polling(struct hn_softc *, u_int); 375 static void hn_chan_polling(struct vmbus_channel *, u_int); 376 static void hn_mtu_change_fixup(struct hn_softc *); 377 378 static void hn_update_link_status(struct hn_softc *); 379 static void hn_change_network(struct hn_softc *); 380 static void hn_link_taskfunc(void *, int); 381 static void hn_netchg_init_taskfunc(void *, int); 382 static void hn_netchg_status_taskfunc(void *, int); 383 static void hn_link_status(struct hn_softc *); 384 385 static int hn_create_rx_data(struct hn_softc *, int); 386 static void hn_destroy_rx_data(struct hn_softc *); 387 static int hn_check_iplen(const struct mbuf *, int); 388 static void hn_rxpkt_proto(const struct mbuf *, int *, int *); 389 static int hn_set_rxfilter(struct hn_softc *, uint32_t); 390 static int hn_rxfilter_config(struct hn_softc *); 391 static int hn_rss_reconfig(struct hn_softc *); 392 static void hn_rss_ind_fixup(struct hn_softc *); 393 static void hn_rss_mbuf_hash(struct hn_softc *, uint32_t); 394 static int hn_rxpkt(struct hn_rx_ring *, const void *, 395 int, const struct hn_rxinfo *); 396 static uint32_t hn_rss_type_fromndis(uint32_t); 397 static uint32_t hn_rss_type_tondis(uint32_t); 398 399 static int hn_tx_ring_create(struct hn_softc *, int); 400 static void hn_tx_ring_destroy(struct hn_tx_ring *); 401 static int hn_create_tx_data(struct hn_softc *, int); 402 static void hn_fixup_tx_data(struct hn_softc *); 403 static void hn_fixup_rx_data(struct hn_softc *); 404 static void hn_destroy_tx_data(struct hn_softc *); 405 static void hn_txdesc_dmamap_destroy(struct hn_txdesc *); 406 static void hn_txdesc_gc(struct hn_tx_ring *, 407 struct hn_txdesc *); 408 static int hn_encap(struct ifnet *, struct hn_tx_ring *, 409 struct hn_txdesc *, struct mbuf **); 410 static int hn_txpkt(struct ifnet *, struct hn_tx_ring *, 411 struct hn_txdesc *); 412 static void hn_set_chim_size(struct hn_softc *, int); 413 static void hn_set_tso_maxsize(struct hn_softc *, int, int); 414 static bool hn_tx_ring_pending(struct hn_tx_ring *); 415 static void hn_tx_ring_qflush(struct hn_tx_ring *); 416 static void hn_resume_tx(struct hn_softc *, int); 417 static void hn_set_txagg(struct hn_softc *); 418 static void *hn_try_txagg(struct ifnet *, 419 struct hn_tx_ring *, struct hn_txdesc *, 420 int); 421 static int hn_get_txswq_depth(const struct hn_tx_ring *); 422 static void hn_txpkt_done(struct hn_nvs_sendctx *, 423 struct hn_softc *, struct vmbus_channel *, 424 const void *, int); 425 static int hn_txpkt_sglist(struct hn_tx_ring *, 426 struct hn_txdesc *); 427 static int hn_txpkt_chim(struct hn_tx_ring *, 428 struct hn_txdesc *); 429 static int hn_xmit(struct hn_tx_ring *, int); 430 static void hn_xmit_taskfunc(void *, int); 431 static void hn_xmit_txeof(struct hn_tx_ring *); 432 static void hn_xmit_txeof_taskfunc(void *, int); 433 #ifdef HN_IFSTART_SUPPORT 434 static int hn_start_locked(struct hn_tx_ring *, int); 435 static void hn_start_taskfunc(void *, int); 436 static void hn_start_txeof(struct hn_tx_ring *); 437 static void hn_start_txeof_taskfunc(void *, int); 438 #endif 439 440 SYSCTL_NODE(_hw, OID_AUTO, hn, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 441 "Hyper-V network interface"); 442 443 /* Trust tcp segements verification on host side. */ 444 static int hn_trust_hosttcp = 1; 445 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hosttcp, CTLFLAG_RDTUN, 446 &hn_trust_hosttcp, 0, 447 "Trust tcp segement verification on host side, " 448 "when csum info is missing (global setting)"); 449 450 /* Trust udp datagrams verification on host side. */ 451 static int hn_trust_hostudp = 1; 452 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostudp, CTLFLAG_RDTUN, 453 &hn_trust_hostudp, 0, 454 "Trust udp datagram verification on host side, " 455 "when csum info is missing (global setting)"); 456 457 /* Trust ip packets verification on host side. */ 458 static int hn_trust_hostip = 1; 459 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostip, CTLFLAG_RDTUN, 460 &hn_trust_hostip, 0, 461 "Trust ip packet verification on host side, " 462 "when csum info is missing (global setting)"); 463 464 /* 465 * Offload UDP/IPv4 checksum. 466 */ 467 static int hn_enable_udp4cs = 1; 468 SYSCTL_INT(_hw_hn, OID_AUTO, enable_udp4cs, CTLFLAG_RDTUN, 469 &hn_enable_udp4cs, 0, "Offload UDP/IPv4 checksum"); 470 471 /* 472 * Offload UDP/IPv6 checksum. 473 */ 474 static int hn_enable_udp6cs = 1; 475 SYSCTL_INT(_hw_hn, OID_AUTO, enable_udp6cs, CTLFLAG_RDTUN, 476 &hn_enable_udp6cs, 0, "Offload UDP/IPv6 checksum"); 477 478 /* Stats. */ 479 static counter_u64_t hn_udpcs_fixup; 480 SYSCTL_COUNTER_U64(_hw_hn, OID_AUTO, udpcs_fixup, CTLFLAG_RW, 481 &hn_udpcs_fixup, "# of UDP checksum fixup"); 482 483 /* 484 * See hn_set_hlen(). 485 * 486 * This value is for Azure. For Hyper-V, set this above 487 * 65536 to disable UDP datagram checksum fixup. 488 */ 489 static int hn_udpcs_fixup_mtu = 1420; 490 SYSCTL_INT(_hw_hn, OID_AUTO, udpcs_fixup_mtu, CTLFLAG_RWTUN, 491 &hn_udpcs_fixup_mtu, 0, "UDP checksum fixup MTU threshold"); 492 493 /* Limit TSO burst size */ 494 static int hn_tso_maxlen = IP_MAXPACKET; 495 SYSCTL_INT(_hw_hn, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN, 496 &hn_tso_maxlen, 0, "TSO burst limit"); 497 498 /* Limit chimney send size */ 499 static int hn_tx_chimney_size = 0; 500 SYSCTL_INT(_hw_hn, OID_AUTO, tx_chimney_size, CTLFLAG_RDTUN, 501 &hn_tx_chimney_size, 0, "Chimney send packet size limit"); 502 503 /* Limit the size of packet for direct transmission */ 504 static int hn_direct_tx_size = HN_DIRECT_TX_SIZE_DEF; 505 SYSCTL_INT(_hw_hn, OID_AUTO, direct_tx_size, CTLFLAG_RDTUN, 506 &hn_direct_tx_size, 0, "Size of the packet for direct transmission"); 507 508 /* # of LRO entries per RX ring */ 509 #if defined(INET) || defined(INET6) 510 #if __FreeBSD_version >= 1100095 511 static int hn_lro_entry_count = HN_LROENT_CNT_DEF; 512 SYSCTL_INT(_hw_hn, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN, 513 &hn_lro_entry_count, 0, "LRO entry count"); 514 #endif 515 #endif 516 517 static int hn_tx_taskq_cnt = 1; 518 SYSCTL_INT(_hw_hn, OID_AUTO, tx_taskq_cnt, CTLFLAG_RDTUN, 519 &hn_tx_taskq_cnt, 0, "# of TX taskqueues"); 520 521 #define HN_TX_TASKQ_M_INDEP 0 522 #define HN_TX_TASKQ_M_GLOBAL 1 523 #define HN_TX_TASKQ_M_EVTTQ 2 524 525 static int hn_tx_taskq_mode = HN_TX_TASKQ_M_INDEP; 526 SYSCTL_INT(_hw_hn, OID_AUTO, tx_taskq_mode, CTLFLAG_RDTUN, 527 &hn_tx_taskq_mode, 0, "TX taskqueue modes: " 528 "0 - independent, 1 - share global tx taskqs, 2 - share event taskqs"); 529 530 #ifndef HN_USE_TXDESC_BUFRING 531 static int hn_use_txdesc_bufring = 0; 532 #else 533 static int hn_use_txdesc_bufring = 1; 534 #endif 535 SYSCTL_INT(_hw_hn, OID_AUTO, use_txdesc_bufring, CTLFLAG_RD, 536 &hn_use_txdesc_bufring, 0, "Use buf_ring for TX descriptors"); 537 538 #ifdef HN_IFSTART_SUPPORT 539 /* Use ifnet.if_start instead of ifnet.if_transmit */ 540 static int hn_use_if_start = 0; 541 SYSCTL_INT(_hw_hn, OID_AUTO, use_if_start, CTLFLAG_RDTUN, 542 &hn_use_if_start, 0, "Use if_start TX method"); 543 #endif 544 545 /* # of channels to use */ 546 static int hn_chan_cnt = 0; 547 SYSCTL_INT(_hw_hn, OID_AUTO, chan_cnt, CTLFLAG_RDTUN, 548 &hn_chan_cnt, 0, 549 "# of channels to use; each channel has one RX ring and one TX ring"); 550 551 /* # of transmit rings to use */ 552 static int hn_tx_ring_cnt = 0; 553 SYSCTL_INT(_hw_hn, OID_AUTO, tx_ring_cnt, CTLFLAG_RDTUN, 554 &hn_tx_ring_cnt, 0, "# of TX rings to use"); 555 556 /* Software TX ring deptch */ 557 static int hn_tx_swq_depth = 0; 558 SYSCTL_INT(_hw_hn, OID_AUTO, tx_swq_depth, CTLFLAG_RDTUN, 559 &hn_tx_swq_depth, 0, "Depth of IFQ or BUFRING"); 560 561 /* Enable sorted LRO, and the depth of the per-channel mbuf queue */ 562 #if __FreeBSD_version >= 1100095 563 static u_int hn_lro_mbufq_depth = 0; 564 SYSCTL_UINT(_hw_hn, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN, 565 &hn_lro_mbufq_depth, 0, "Depth of LRO mbuf queue"); 566 #endif 567 568 /* Packet transmission aggregation size limit */ 569 static int hn_tx_agg_size = -1; 570 SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_size, CTLFLAG_RDTUN, 571 &hn_tx_agg_size, 0, "Packet transmission aggregation size limit"); 572 573 /* Packet transmission aggregation count limit */ 574 static int hn_tx_agg_pkts = -1; 575 SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_pkts, CTLFLAG_RDTUN, 576 &hn_tx_agg_pkts, 0, "Packet transmission aggregation packet limit"); 577 578 /* VF list */ 579 SYSCTL_PROC(_hw_hn, OID_AUTO, vflist, 580 CTLFLAG_RD | CTLTYPE_STRING | CTLFLAG_NEEDGIANT, 0, 0, 581 hn_vflist_sysctl, "A", 582 "VF list"); 583 584 /* VF mapping */ 585 SYSCTL_PROC(_hw_hn, OID_AUTO, vfmap, 586 CTLFLAG_RD | CTLTYPE_STRING | CTLFLAG_NEEDGIANT, 0, 0, 587 hn_vfmap_sysctl, "A", 588 "VF mapping"); 589 590 /* Transparent VF */ 591 static int hn_xpnt_vf = 1; 592 SYSCTL_INT(_hw_hn, OID_AUTO, vf_transparent, CTLFLAG_RDTUN, 593 &hn_xpnt_vf, 0, "Transparent VF mod"); 594 595 /* Accurate BPF support for Transparent VF */ 596 static int hn_xpnt_vf_accbpf = 0; 597 SYSCTL_INT(_hw_hn, OID_AUTO, vf_xpnt_accbpf, CTLFLAG_RDTUN, 598 &hn_xpnt_vf_accbpf, 0, "Accurate BPF for transparent VF"); 599 600 /* Extra wait for transparent VF attach routing; unit seconds. */ 601 static int hn_xpnt_vf_attwait = HN_XPNT_VF_ATTWAIT_MIN; 602 SYSCTL_INT(_hw_hn, OID_AUTO, vf_xpnt_attwait, CTLFLAG_RWTUN, 603 &hn_xpnt_vf_attwait, 0, 604 "Extra wait for transparent VF attach routing; unit: seconds"); 605 606 static u_int hn_cpu_index; /* next CPU for channel */ 607 static struct taskqueue **hn_tx_taskque;/* shared TX taskqueues */ 608 609 static struct rmlock hn_vfmap_lock; 610 static int hn_vfmap_size; 611 static struct ifnet **hn_vfmap; 612 613 #ifndef RSS 614 static const uint8_t 615 hn_rss_key_default[NDIS_HASH_KEYSIZE_TOEPLITZ] = { 616 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 617 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 618 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, 619 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, 620 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa 621 }; 622 #endif /* !RSS */ 623 624 static const struct hyperv_guid hn_guid = { 625 .hv_guid = { 626 0x63, 0x51, 0x61, 0xf8, 0x3e, 0xdf, 0xc5, 0x46, 627 0x91, 0x3f, 0xf2, 0xd2, 0xf9, 0x65, 0xed, 0x0e } 628 }; 629 630 static device_method_t hn_methods[] = { 631 /* Device interface */ 632 DEVMETHOD(device_probe, hn_probe), 633 DEVMETHOD(device_attach, hn_attach), 634 DEVMETHOD(device_detach, hn_detach), 635 DEVMETHOD(device_shutdown, hn_shutdown), 636 DEVMETHOD_END 637 }; 638 639 static driver_t hn_driver = { 640 "hn", 641 hn_methods, 642 sizeof(struct hn_softc) 643 }; 644 645 static devclass_t hn_devclass; 646 647 DRIVER_MODULE(hn, vmbus, hn_driver, hn_devclass, 0, 0); 648 MODULE_VERSION(hn, 1); 649 MODULE_DEPEND(hn, vmbus, 1, 1, 1); 650 651 #if __FreeBSD_version >= 1100099 652 static void 653 hn_set_lro_lenlim(struct hn_softc *sc, int lenlim) 654 { 655 int i; 656 657 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 658 sc->hn_rx_ring[i].hn_lro.lro_length_lim = lenlim; 659 } 660 #endif 661 662 static int 663 hn_txpkt_sglist(struct hn_tx_ring *txr, struct hn_txdesc *txd) 664 { 665 666 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID && 667 txd->chim_size == 0, ("invalid rndis sglist txd")); 668 return (hn_nvs_send_rndis_sglist(txr->hn_chan, HN_NVS_RNDIS_MTYPE_DATA, 669 &txd->send_ctx, txr->hn_gpa, txr->hn_gpa_cnt)); 670 } 671 672 static int 673 hn_txpkt_chim(struct hn_tx_ring *txr, struct hn_txdesc *txd) 674 { 675 struct hn_nvs_rndis rndis; 676 677 KASSERT(txd->chim_index != HN_NVS_CHIM_IDX_INVALID && 678 txd->chim_size > 0, ("invalid rndis chim txd")); 679 680 rndis.nvs_type = HN_NVS_TYPE_RNDIS; 681 rndis.nvs_rndis_mtype = HN_NVS_RNDIS_MTYPE_DATA; 682 rndis.nvs_chim_idx = txd->chim_index; 683 rndis.nvs_chim_sz = txd->chim_size; 684 685 return (hn_nvs_send(txr->hn_chan, VMBUS_CHANPKT_FLAG_RC, 686 &rndis, sizeof(rndis), &txd->send_ctx)); 687 } 688 689 static __inline uint32_t 690 hn_chim_alloc(struct hn_softc *sc) 691 { 692 int i, bmap_cnt = sc->hn_chim_bmap_cnt; 693 u_long *bmap = sc->hn_chim_bmap; 694 uint32_t ret = HN_NVS_CHIM_IDX_INVALID; 695 696 for (i = 0; i < bmap_cnt; ++i) { 697 int idx; 698 699 idx = ffsl(~bmap[i]); 700 if (idx == 0) 701 continue; 702 703 --idx; /* ffsl is 1-based */ 704 KASSERT(i * LONG_BIT + idx < sc->hn_chim_cnt, 705 ("invalid i %d and idx %d", i, idx)); 706 707 if (atomic_testandset_long(&bmap[i], idx)) 708 continue; 709 710 ret = i * LONG_BIT + idx; 711 break; 712 } 713 return (ret); 714 } 715 716 static __inline void 717 hn_chim_free(struct hn_softc *sc, uint32_t chim_idx) 718 { 719 u_long mask; 720 uint32_t idx; 721 722 idx = chim_idx / LONG_BIT; 723 KASSERT(idx < sc->hn_chim_bmap_cnt, 724 ("invalid chimney index 0x%x", chim_idx)); 725 726 mask = 1UL << (chim_idx % LONG_BIT); 727 KASSERT(sc->hn_chim_bmap[idx] & mask, 728 ("index bitmap 0x%lx, chimney index %u, " 729 "bitmap idx %d, bitmask 0x%lx", 730 sc->hn_chim_bmap[idx], chim_idx, idx, mask)); 731 732 atomic_clear_long(&sc->hn_chim_bmap[idx], mask); 733 } 734 735 #if defined(INET6) || defined(INET) 736 737 #define PULLUP_HDR(m, len) \ 738 do { \ 739 if (__predict_false((m)->m_len < (len))) { \ 740 (m) = m_pullup((m), (len)); \ 741 if ((m) == NULL) \ 742 return (NULL); \ 743 } \ 744 } while (0) 745 746 /* 747 * NOTE: If this function failed, the m_head would be freed. 748 */ 749 static __inline struct mbuf * 750 hn_tso_fixup(struct mbuf *m_head) 751 { 752 struct ether_vlan_header *evl; 753 struct tcphdr *th; 754 int ehlen; 755 756 KASSERT(M_WRITABLE(m_head), ("TSO mbuf not writable")); 757 758 PULLUP_HDR(m_head, sizeof(*evl)); 759 evl = mtod(m_head, struct ether_vlan_header *); 760 if (evl->evl_encap_proto == ntohs(ETHERTYPE_VLAN)) 761 ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 762 else 763 ehlen = ETHER_HDR_LEN; 764 m_head->m_pkthdr.l2hlen = ehlen; 765 766 #ifdef INET 767 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) { 768 struct ip *ip; 769 int iphlen; 770 771 PULLUP_HDR(m_head, ehlen + sizeof(*ip)); 772 ip = mtodo(m_head, ehlen); 773 iphlen = ip->ip_hl << 2; 774 m_head->m_pkthdr.l3hlen = iphlen; 775 776 PULLUP_HDR(m_head, ehlen + iphlen + sizeof(*th)); 777 th = mtodo(m_head, ehlen + iphlen); 778 779 ip->ip_len = 0; 780 ip->ip_sum = 0; 781 th->th_sum = in_pseudo(ip->ip_src.s_addr, 782 ip->ip_dst.s_addr, htons(IPPROTO_TCP)); 783 } 784 #endif 785 #if defined(INET6) && defined(INET) 786 else 787 #endif 788 #ifdef INET6 789 { 790 struct ip6_hdr *ip6; 791 792 PULLUP_HDR(m_head, ehlen + sizeof(*ip6)); 793 ip6 = mtodo(m_head, ehlen); 794 if (ip6->ip6_nxt != IPPROTO_TCP) { 795 m_freem(m_head); 796 return (NULL); 797 } 798 m_head->m_pkthdr.l3hlen = sizeof(*ip6); 799 800 PULLUP_HDR(m_head, ehlen + sizeof(*ip6) + sizeof(*th)); 801 th = mtodo(m_head, ehlen + sizeof(*ip6)); 802 803 ip6->ip6_plen = 0; 804 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); 805 } 806 #endif 807 return (m_head); 808 } 809 810 /* 811 * NOTE: If this function failed, the m_head would be freed. 812 */ 813 static __inline struct mbuf * 814 hn_set_hlen(struct mbuf *m_head) 815 { 816 const struct ether_vlan_header *evl; 817 int ehlen; 818 819 PULLUP_HDR(m_head, sizeof(*evl)); 820 evl = mtod(m_head, const struct ether_vlan_header *); 821 if (evl->evl_encap_proto == ntohs(ETHERTYPE_VLAN)) 822 ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 823 else 824 ehlen = ETHER_HDR_LEN; 825 m_head->m_pkthdr.l2hlen = ehlen; 826 827 #ifdef INET 828 if (m_head->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP_UDP)) { 829 const struct ip *ip; 830 int iphlen; 831 832 PULLUP_HDR(m_head, ehlen + sizeof(*ip)); 833 ip = mtodo(m_head, ehlen); 834 iphlen = ip->ip_hl << 2; 835 m_head->m_pkthdr.l3hlen = iphlen; 836 837 /* 838 * UDP checksum offload does not work in Azure, if the 839 * following conditions meet: 840 * - sizeof(IP hdr + UDP hdr + payload) > 1420. 841 * - IP_DF is not set in the IP hdr. 842 * 843 * Fallback to software checksum for these UDP datagrams. 844 */ 845 if ((m_head->m_pkthdr.csum_flags & CSUM_IP_UDP) && 846 m_head->m_pkthdr.len > hn_udpcs_fixup_mtu + ehlen && 847 (ntohs(ip->ip_off) & IP_DF) == 0) { 848 uint16_t off = ehlen + iphlen; 849 850 counter_u64_add(hn_udpcs_fixup, 1); 851 PULLUP_HDR(m_head, off + sizeof(struct udphdr)); 852 *(uint16_t *)(m_head->m_data + off + 853 m_head->m_pkthdr.csum_data) = in_cksum_skip( 854 m_head, m_head->m_pkthdr.len, off); 855 m_head->m_pkthdr.csum_flags &= ~CSUM_IP_UDP; 856 } 857 } 858 #endif 859 #if defined(INET6) && defined(INET) 860 else 861 #endif 862 #ifdef INET6 863 { 864 const struct ip6_hdr *ip6; 865 866 PULLUP_HDR(m_head, ehlen + sizeof(*ip6)); 867 ip6 = mtodo(m_head, ehlen); 868 if (ip6->ip6_nxt != IPPROTO_TCP && 869 ip6->ip6_nxt != IPPROTO_UDP) { 870 m_freem(m_head); 871 return (NULL); 872 } 873 m_head->m_pkthdr.l3hlen = sizeof(*ip6); 874 } 875 #endif 876 return (m_head); 877 } 878 879 /* 880 * NOTE: If this function failed, the m_head would be freed. 881 */ 882 static __inline struct mbuf * 883 hn_check_tcpsyn(struct mbuf *m_head, int *tcpsyn) 884 { 885 const struct tcphdr *th; 886 int ehlen, iphlen; 887 888 *tcpsyn = 0; 889 ehlen = m_head->m_pkthdr.l2hlen; 890 iphlen = m_head->m_pkthdr.l3hlen; 891 892 PULLUP_HDR(m_head, ehlen + iphlen + sizeof(*th)); 893 th = mtodo(m_head, ehlen + iphlen); 894 if (th->th_flags & TH_SYN) 895 *tcpsyn = 1; 896 return (m_head); 897 } 898 899 #undef PULLUP_HDR 900 901 #endif /* INET6 || INET */ 902 903 static int 904 hn_set_rxfilter(struct hn_softc *sc, uint32_t filter) 905 { 906 int error = 0; 907 908 HN_LOCK_ASSERT(sc); 909 910 if (sc->hn_rx_filter != filter) { 911 error = hn_rndis_set_rxfilter(sc, filter); 912 if (!error) 913 sc->hn_rx_filter = filter; 914 } 915 return (error); 916 } 917 918 static int 919 hn_rxfilter_config(struct hn_softc *sc) 920 { 921 struct ifnet *ifp = sc->hn_ifp; 922 uint32_t filter; 923 924 HN_LOCK_ASSERT(sc); 925 926 /* 927 * If the non-transparent mode VF is activated, we don't know how 928 * its RX filter is configured, so stick the synthetic device in 929 * the promiscous mode. 930 */ 931 if ((ifp->if_flags & IFF_PROMISC) || (sc->hn_flags & HN_FLAG_RXVF)) { 932 filter = NDIS_PACKET_TYPE_PROMISCUOUS; 933 } else { 934 filter = NDIS_PACKET_TYPE_DIRECTED; 935 if (ifp->if_flags & IFF_BROADCAST) 936 filter |= NDIS_PACKET_TYPE_BROADCAST; 937 /* TODO: support multicast list */ 938 if ((ifp->if_flags & IFF_ALLMULTI) || 939 !CK_STAILQ_EMPTY(&ifp->if_multiaddrs)) 940 filter |= NDIS_PACKET_TYPE_ALL_MULTICAST; 941 } 942 return (hn_set_rxfilter(sc, filter)); 943 } 944 945 static void 946 hn_set_txagg(struct hn_softc *sc) 947 { 948 uint32_t size, pkts; 949 int i; 950 951 /* 952 * Setup aggregation size. 953 */ 954 if (sc->hn_agg_size < 0) 955 size = UINT32_MAX; 956 else 957 size = sc->hn_agg_size; 958 959 if (sc->hn_rndis_agg_size < size) 960 size = sc->hn_rndis_agg_size; 961 962 /* NOTE: We only aggregate packets using chimney sending buffers. */ 963 if (size > (uint32_t)sc->hn_chim_szmax) 964 size = sc->hn_chim_szmax; 965 966 if (size <= 2 * HN_PKTSIZE_MIN(sc->hn_rndis_agg_align)) { 967 /* Disable */ 968 size = 0; 969 pkts = 0; 970 goto done; 971 } 972 973 /* NOTE: Type of the per TX ring setting is 'int'. */ 974 if (size > INT_MAX) 975 size = INT_MAX; 976 977 /* 978 * Setup aggregation packet count. 979 */ 980 if (sc->hn_agg_pkts < 0) 981 pkts = UINT32_MAX; 982 else 983 pkts = sc->hn_agg_pkts; 984 985 if (sc->hn_rndis_agg_pkts < pkts) 986 pkts = sc->hn_rndis_agg_pkts; 987 988 if (pkts <= 1) { 989 /* Disable */ 990 size = 0; 991 pkts = 0; 992 goto done; 993 } 994 995 /* NOTE: Type of the per TX ring setting is 'short'. */ 996 if (pkts > SHRT_MAX) 997 pkts = SHRT_MAX; 998 999 done: 1000 /* NOTE: Type of the per TX ring setting is 'short'. */ 1001 if (sc->hn_rndis_agg_align > SHRT_MAX) { 1002 /* Disable */ 1003 size = 0; 1004 pkts = 0; 1005 } 1006 1007 if (bootverbose) { 1008 if_printf(sc->hn_ifp, "TX agg size %u, pkts %u, align %u\n", 1009 size, pkts, sc->hn_rndis_agg_align); 1010 } 1011 1012 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 1013 struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; 1014 1015 mtx_lock(&txr->hn_tx_lock); 1016 txr->hn_agg_szmax = size; 1017 txr->hn_agg_pktmax = pkts; 1018 txr->hn_agg_align = sc->hn_rndis_agg_align; 1019 mtx_unlock(&txr->hn_tx_lock); 1020 } 1021 } 1022 1023 static int 1024 hn_get_txswq_depth(const struct hn_tx_ring *txr) 1025 { 1026 1027 KASSERT(txr->hn_txdesc_cnt > 0, ("tx ring is not setup yet")); 1028 if (hn_tx_swq_depth < txr->hn_txdesc_cnt) 1029 return txr->hn_txdesc_cnt; 1030 return hn_tx_swq_depth; 1031 } 1032 1033 static int 1034 hn_rss_reconfig(struct hn_softc *sc) 1035 { 1036 int error; 1037 1038 HN_LOCK_ASSERT(sc); 1039 1040 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) 1041 return (ENXIO); 1042 1043 /* 1044 * Disable RSS first. 1045 * 1046 * NOTE: 1047 * Direct reconfiguration by setting the UNCHG flags does 1048 * _not_ work properly. 1049 */ 1050 if (bootverbose) 1051 if_printf(sc->hn_ifp, "disable RSS\n"); 1052 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_DISABLE); 1053 if (error) { 1054 if_printf(sc->hn_ifp, "RSS disable failed\n"); 1055 return (error); 1056 } 1057 1058 /* 1059 * Reenable the RSS w/ the updated RSS key or indirect 1060 * table. 1061 */ 1062 if (bootverbose) 1063 if_printf(sc->hn_ifp, "reconfig RSS\n"); 1064 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE); 1065 if (error) { 1066 if_printf(sc->hn_ifp, "RSS reconfig failed\n"); 1067 return (error); 1068 } 1069 return (0); 1070 } 1071 1072 static void 1073 hn_rss_ind_fixup(struct hn_softc *sc) 1074 { 1075 struct ndis_rssprm_toeplitz *rss = &sc->hn_rss; 1076 int i, nchan; 1077 1078 nchan = sc->hn_rx_ring_inuse; 1079 KASSERT(nchan > 1, ("invalid # of channels %d", nchan)); 1080 1081 /* 1082 * Check indirect table to make sure that all channels in it 1083 * can be used. 1084 */ 1085 for (i = 0; i < NDIS_HASH_INDCNT; ++i) { 1086 if (rss->rss_ind[i] >= nchan) { 1087 if_printf(sc->hn_ifp, 1088 "RSS indirect table %d fixup: %u -> %d\n", 1089 i, rss->rss_ind[i], nchan - 1); 1090 rss->rss_ind[i] = nchan - 1; 1091 } 1092 } 1093 } 1094 1095 static int 1096 hn_ifmedia_upd(struct ifnet *ifp __unused) 1097 { 1098 1099 return EOPNOTSUPP; 1100 } 1101 1102 static void 1103 hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) 1104 { 1105 struct hn_softc *sc = ifp->if_softc; 1106 1107 ifmr->ifm_status = IFM_AVALID; 1108 ifmr->ifm_active = IFM_ETHER; 1109 1110 if ((sc->hn_link_flags & HN_LINK_FLAG_LINKUP) == 0) { 1111 ifmr->ifm_active |= IFM_NONE; 1112 return; 1113 } 1114 ifmr->ifm_status |= IFM_ACTIVE; 1115 ifmr->ifm_active |= IFM_10G_T | IFM_FDX; 1116 } 1117 1118 static void 1119 hn_rxvf_set_task(void *xarg, int pending __unused) 1120 { 1121 struct hn_rxvf_setarg *arg = xarg; 1122 1123 arg->rxr->hn_rxvf_ifp = arg->vf_ifp; 1124 } 1125 1126 static void 1127 hn_rxvf_set(struct hn_softc *sc, struct ifnet *vf_ifp) 1128 { 1129 struct hn_rx_ring *rxr; 1130 struct hn_rxvf_setarg arg; 1131 struct task task; 1132 int i; 1133 1134 HN_LOCK_ASSERT(sc); 1135 1136 TASK_INIT(&task, 0, hn_rxvf_set_task, &arg); 1137 1138 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 1139 rxr = &sc->hn_rx_ring[i]; 1140 1141 if (i < sc->hn_rx_ring_inuse) { 1142 arg.rxr = rxr; 1143 arg.vf_ifp = vf_ifp; 1144 vmbus_chan_run_task(rxr->hn_chan, &task); 1145 } else { 1146 rxr->hn_rxvf_ifp = vf_ifp; 1147 } 1148 } 1149 } 1150 1151 static bool 1152 hn_ismyvf(const struct hn_softc *sc, const struct ifnet *ifp) 1153 { 1154 const struct ifnet *hn_ifp; 1155 1156 hn_ifp = sc->hn_ifp; 1157 1158 if (ifp == hn_ifp) 1159 return (false); 1160 1161 if (ifp->if_alloctype != IFT_ETHER) 1162 return (false); 1163 1164 /* Ignore lagg/vlan interfaces */ 1165 if (strcmp(ifp->if_dname, "lagg") == 0 || 1166 strcmp(ifp->if_dname, "vlan") == 0) 1167 return (false); 1168 1169 /* 1170 * During detach events ifp->if_addr might be NULL. 1171 * Make sure the bcmp() below doesn't panic on that: 1172 */ 1173 if (ifp->if_addr == NULL || hn_ifp->if_addr == NULL) 1174 return (false); 1175 1176 if (bcmp(IF_LLADDR(ifp), IF_LLADDR(hn_ifp), ETHER_ADDR_LEN) != 0) 1177 return (false); 1178 1179 return (true); 1180 } 1181 1182 static void 1183 hn_rxvf_change(struct hn_softc *sc, struct ifnet *ifp, bool rxvf) 1184 { 1185 struct ifnet *hn_ifp; 1186 1187 HN_LOCK(sc); 1188 1189 if (!(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED)) 1190 goto out; 1191 1192 if (!hn_ismyvf(sc, ifp)) 1193 goto out; 1194 hn_ifp = sc->hn_ifp; 1195 1196 if (rxvf) { 1197 if (sc->hn_flags & HN_FLAG_RXVF) 1198 goto out; 1199 1200 sc->hn_flags |= HN_FLAG_RXVF; 1201 hn_rxfilter_config(sc); 1202 } else { 1203 if (!(sc->hn_flags & HN_FLAG_RXVF)) 1204 goto out; 1205 1206 sc->hn_flags &= ~HN_FLAG_RXVF; 1207 if (hn_ifp->if_drv_flags & IFF_DRV_RUNNING) 1208 hn_rxfilter_config(sc); 1209 else 1210 hn_set_rxfilter(sc, NDIS_PACKET_TYPE_NONE); 1211 } 1212 1213 hn_nvs_set_datapath(sc, 1214 rxvf ? HN_NVS_DATAPATH_VF : HN_NVS_DATAPATH_SYNTH); 1215 1216 hn_rxvf_set(sc, rxvf ? ifp : NULL); 1217 1218 if (rxvf) { 1219 hn_vf_rss_fixup(sc, true); 1220 hn_suspend_mgmt(sc); 1221 sc->hn_link_flags &= 1222 ~(HN_LINK_FLAG_LINKUP | HN_LINK_FLAG_NETCHG); 1223 if_link_state_change(hn_ifp, LINK_STATE_DOWN); 1224 } else { 1225 hn_vf_rss_restore(sc); 1226 hn_resume_mgmt(sc); 1227 } 1228 1229 devctl_notify("HYPERV_NIC_VF", hn_ifp->if_xname, 1230 rxvf ? "VF_UP" : "VF_DOWN", NULL); 1231 1232 if (bootverbose) { 1233 if_printf(hn_ifp, "datapath is switched %s %s\n", 1234 rxvf ? "to" : "from", ifp->if_xname); 1235 } 1236 out: 1237 HN_UNLOCK(sc); 1238 } 1239 1240 static void 1241 hn_ifnet_event(void *arg, struct ifnet *ifp, int event) 1242 { 1243 1244 if (event != IFNET_EVENT_UP && event != IFNET_EVENT_DOWN) 1245 return; 1246 hn_rxvf_change(arg, ifp, event == IFNET_EVENT_UP); 1247 } 1248 1249 static void 1250 hn_ifaddr_event(void *arg, struct ifnet *ifp) 1251 { 1252 1253 hn_rxvf_change(arg, ifp, ifp->if_flags & IFF_UP); 1254 } 1255 1256 static int 1257 hn_xpnt_vf_iocsetcaps(struct hn_softc *sc, struct ifreq *ifr) 1258 { 1259 struct ifnet *ifp, *vf_ifp; 1260 uint64_t tmp; 1261 int error; 1262 1263 HN_LOCK_ASSERT(sc); 1264 ifp = sc->hn_ifp; 1265 vf_ifp = sc->hn_vf_ifp; 1266 1267 /* 1268 * Fix up requested capabilities w/ supported capabilities, 1269 * since the supported capabilities could have been changed. 1270 */ 1271 ifr->ifr_reqcap &= ifp->if_capabilities; 1272 /* Pass SIOCSIFCAP to VF. */ 1273 error = vf_ifp->if_ioctl(vf_ifp, SIOCSIFCAP, (caddr_t)ifr); 1274 1275 /* 1276 * NOTE: 1277 * The error will be propagated to the callers, however, it 1278 * is _not_ useful here. 1279 */ 1280 1281 /* 1282 * Merge VF's enabled capabilities. 1283 */ 1284 ifp->if_capenable = vf_ifp->if_capenable & ifp->if_capabilities; 1285 1286 tmp = vf_ifp->if_hwassist & HN_CSUM_IP_HWASSIST(sc); 1287 if (ifp->if_capenable & IFCAP_TXCSUM) 1288 ifp->if_hwassist |= tmp; 1289 else 1290 ifp->if_hwassist &= ~tmp; 1291 1292 tmp = vf_ifp->if_hwassist & HN_CSUM_IP6_HWASSIST(sc); 1293 if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) 1294 ifp->if_hwassist |= tmp; 1295 else 1296 ifp->if_hwassist &= ~tmp; 1297 1298 tmp = vf_ifp->if_hwassist & CSUM_IP_TSO; 1299 if (ifp->if_capenable & IFCAP_TSO4) 1300 ifp->if_hwassist |= tmp; 1301 else 1302 ifp->if_hwassist &= ~tmp; 1303 1304 tmp = vf_ifp->if_hwassist & CSUM_IP6_TSO; 1305 if (ifp->if_capenable & IFCAP_TSO6) 1306 ifp->if_hwassist |= tmp; 1307 else 1308 ifp->if_hwassist &= ~tmp; 1309 1310 return (error); 1311 } 1312 1313 static int 1314 hn_xpnt_vf_iocsetflags(struct hn_softc *sc) 1315 { 1316 struct ifnet *vf_ifp; 1317 struct ifreq ifr; 1318 1319 HN_LOCK_ASSERT(sc); 1320 vf_ifp = sc->hn_vf_ifp; 1321 1322 memset(&ifr, 0, sizeof(ifr)); 1323 strlcpy(ifr.ifr_name, vf_ifp->if_xname, sizeof(ifr.ifr_name)); 1324 ifr.ifr_flags = vf_ifp->if_flags & 0xffff; 1325 ifr.ifr_flagshigh = vf_ifp->if_flags >> 16; 1326 return (vf_ifp->if_ioctl(vf_ifp, SIOCSIFFLAGS, (caddr_t)&ifr)); 1327 } 1328 1329 static void 1330 hn_xpnt_vf_saveifflags(struct hn_softc *sc) 1331 { 1332 struct ifnet *ifp = sc->hn_ifp; 1333 int allmulti = 0; 1334 1335 HN_LOCK_ASSERT(sc); 1336 1337 /* XXX vlan(4) style mcast addr maintenance */ 1338 if (!CK_STAILQ_EMPTY(&ifp->if_multiaddrs)) 1339 allmulti = IFF_ALLMULTI; 1340 1341 /* Always set the VF's if_flags */ 1342 sc->hn_vf_ifp->if_flags = ifp->if_flags | allmulti; 1343 } 1344 1345 static void 1346 hn_xpnt_vf_input(struct ifnet *vf_ifp, struct mbuf *m) 1347 { 1348 struct rm_priotracker pt; 1349 struct ifnet *hn_ifp = NULL; 1350 struct mbuf *mn; 1351 1352 /* 1353 * XXX racy, if hn(4) ever detached. 1354 */ 1355 rm_rlock(&hn_vfmap_lock, &pt); 1356 if (vf_ifp->if_index < hn_vfmap_size) 1357 hn_ifp = hn_vfmap[vf_ifp->if_index]; 1358 rm_runlock(&hn_vfmap_lock, &pt); 1359 1360 if (hn_ifp != NULL) { 1361 for (mn = m; mn != NULL; mn = mn->m_nextpkt) { 1362 /* 1363 * Allow tapping on the VF. 1364 */ 1365 ETHER_BPF_MTAP(vf_ifp, mn); 1366 1367 /* 1368 * Update VF stats. 1369 */ 1370 if ((vf_ifp->if_capenable & IFCAP_HWSTATS) == 0) { 1371 if_inc_counter(vf_ifp, IFCOUNTER_IBYTES, 1372 mn->m_pkthdr.len); 1373 } 1374 /* 1375 * XXX IFCOUNTER_IMCAST 1376 * This stat updating is kinda invasive, since it 1377 * requires two checks on the mbuf: the length check 1378 * and the ethernet header check. As of this write, 1379 * all multicast packets go directly to hn(4), which 1380 * makes imcast stat updating in the VF a try in vian. 1381 */ 1382 1383 /* 1384 * Fix up rcvif and increase hn(4)'s ipackets. 1385 */ 1386 mn->m_pkthdr.rcvif = hn_ifp; 1387 if_inc_counter(hn_ifp, IFCOUNTER_IPACKETS, 1); 1388 } 1389 /* 1390 * Go through hn(4)'s if_input. 1391 */ 1392 hn_ifp->if_input(hn_ifp, m); 1393 } else { 1394 /* 1395 * In the middle of the transition; free this 1396 * mbuf chain. 1397 */ 1398 while (m != NULL) { 1399 mn = m->m_nextpkt; 1400 m->m_nextpkt = NULL; 1401 m_freem(m); 1402 m = mn; 1403 } 1404 } 1405 } 1406 1407 static void 1408 hn_mtu_change_fixup(struct hn_softc *sc) 1409 { 1410 struct ifnet *ifp; 1411 1412 HN_LOCK_ASSERT(sc); 1413 ifp = sc->hn_ifp; 1414 1415 hn_set_tso_maxsize(sc, hn_tso_maxlen, ifp->if_mtu); 1416 #if __FreeBSD_version >= 1100099 1417 if (sc->hn_rx_ring[0].hn_lro.lro_length_lim < HN_LRO_LENLIM_MIN(ifp)) 1418 hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MIN(ifp)); 1419 #endif 1420 } 1421 1422 static uint32_t 1423 hn_rss_type_fromndis(uint32_t rss_hash) 1424 { 1425 uint32_t types = 0; 1426 1427 if (rss_hash & NDIS_HASH_IPV4) 1428 types |= RSS_TYPE_IPV4; 1429 if (rss_hash & NDIS_HASH_TCP_IPV4) 1430 types |= RSS_TYPE_TCP_IPV4; 1431 if (rss_hash & NDIS_HASH_IPV6) 1432 types |= RSS_TYPE_IPV6; 1433 if (rss_hash & NDIS_HASH_IPV6_EX) 1434 types |= RSS_TYPE_IPV6_EX; 1435 if (rss_hash & NDIS_HASH_TCP_IPV6) 1436 types |= RSS_TYPE_TCP_IPV6; 1437 if (rss_hash & NDIS_HASH_TCP_IPV6_EX) 1438 types |= RSS_TYPE_TCP_IPV6_EX; 1439 if (rss_hash & NDIS_HASH_UDP_IPV4_X) 1440 types |= RSS_TYPE_UDP_IPV4; 1441 return (types); 1442 } 1443 1444 static uint32_t 1445 hn_rss_type_tondis(uint32_t types) 1446 { 1447 uint32_t rss_hash = 0; 1448 1449 KASSERT((types & (RSS_TYPE_UDP_IPV6 | RSS_TYPE_UDP_IPV6_EX)) == 0, 1450 ("UDP6 and UDP6EX are not supported")); 1451 1452 if (types & RSS_TYPE_IPV4) 1453 rss_hash |= NDIS_HASH_IPV4; 1454 if (types & RSS_TYPE_TCP_IPV4) 1455 rss_hash |= NDIS_HASH_TCP_IPV4; 1456 if (types & RSS_TYPE_IPV6) 1457 rss_hash |= NDIS_HASH_IPV6; 1458 if (types & RSS_TYPE_IPV6_EX) 1459 rss_hash |= NDIS_HASH_IPV6_EX; 1460 if (types & RSS_TYPE_TCP_IPV6) 1461 rss_hash |= NDIS_HASH_TCP_IPV6; 1462 if (types & RSS_TYPE_TCP_IPV6_EX) 1463 rss_hash |= NDIS_HASH_TCP_IPV6_EX; 1464 if (types & RSS_TYPE_UDP_IPV4) 1465 rss_hash |= NDIS_HASH_UDP_IPV4_X; 1466 return (rss_hash); 1467 } 1468 1469 static void 1470 hn_rss_mbuf_hash(struct hn_softc *sc, uint32_t mbuf_hash) 1471 { 1472 int i; 1473 1474 HN_LOCK_ASSERT(sc); 1475 1476 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 1477 sc->hn_rx_ring[i].hn_mbuf_hash = mbuf_hash; 1478 } 1479 1480 static void 1481 hn_vf_rss_fixup(struct hn_softc *sc, bool reconf) 1482 { 1483 struct ifnet *ifp, *vf_ifp; 1484 struct ifrsshash ifrh; 1485 struct ifrsskey ifrk; 1486 int error; 1487 uint32_t my_types, diff_types, mbuf_types = 0; 1488 1489 HN_LOCK_ASSERT(sc); 1490 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, 1491 ("%s: synthetic parts are not attached", sc->hn_ifp->if_xname)); 1492 1493 if (sc->hn_rx_ring_inuse == 1) { 1494 /* No RSS on synthetic parts; done. */ 1495 return; 1496 } 1497 if ((sc->hn_rss_hcap & NDIS_HASH_FUNCTION_TOEPLITZ) == 0) { 1498 /* Synthetic parts do not support Toeplitz; done. */ 1499 return; 1500 } 1501 1502 ifp = sc->hn_ifp; 1503 vf_ifp = sc->hn_vf_ifp; 1504 1505 /* 1506 * Extract VF's RSS key. Only 40 bytes key for Toeplitz is 1507 * supported. 1508 */ 1509 memset(&ifrk, 0, sizeof(ifrk)); 1510 strlcpy(ifrk.ifrk_name, vf_ifp->if_xname, sizeof(ifrk.ifrk_name)); 1511 error = vf_ifp->if_ioctl(vf_ifp, SIOCGIFRSSKEY, (caddr_t)&ifrk); 1512 if (error) { 1513 if_printf(ifp, "%s SIOCGIFRSSKEY failed: %d\n", 1514 vf_ifp->if_xname, error); 1515 goto done; 1516 } 1517 if (ifrk.ifrk_func != RSS_FUNC_TOEPLITZ) { 1518 if_printf(ifp, "%s RSS function %u is not Toeplitz\n", 1519 vf_ifp->if_xname, ifrk.ifrk_func); 1520 goto done; 1521 } 1522 if (ifrk.ifrk_keylen != NDIS_HASH_KEYSIZE_TOEPLITZ) { 1523 if_printf(ifp, "%s invalid RSS Toeplitz key length %d\n", 1524 vf_ifp->if_xname, ifrk.ifrk_keylen); 1525 goto done; 1526 } 1527 1528 /* 1529 * Extract VF's RSS hash. Only Toeplitz is supported. 1530 */ 1531 memset(&ifrh, 0, sizeof(ifrh)); 1532 strlcpy(ifrh.ifrh_name, vf_ifp->if_xname, sizeof(ifrh.ifrh_name)); 1533 error = vf_ifp->if_ioctl(vf_ifp, SIOCGIFRSSHASH, (caddr_t)&ifrh); 1534 if (error) { 1535 if_printf(ifp, "%s SIOCGRSSHASH failed: %d\n", 1536 vf_ifp->if_xname, error); 1537 goto done; 1538 } 1539 if (ifrh.ifrh_func != RSS_FUNC_TOEPLITZ) { 1540 if_printf(ifp, "%s RSS function %u is not Toeplitz\n", 1541 vf_ifp->if_xname, ifrh.ifrh_func); 1542 goto done; 1543 } 1544 1545 my_types = hn_rss_type_fromndis(sc->hn_rss_hcap); 1546 if ((ifrh.ifrh_types & my_types) == 0) { 1547 /* This disables RSS; ignore it then */ 1548 if_printf(ifp, "%s intersection of RSS types failed. " 1549 "VF %#x, mine %#x\n", vf_ifp->if_xname, 1550 ifrh.ifrh_types, my_types); 1551 goto done; 1552 } 1553 1554 diff_types = my_types ^ ifrh.ifrh_types; 1555 my_types &= ifrh.ifrh_types; 1556 mbuf_types = my_types; 1557 1558 /* 1559 * Detect RSS hash value/type confliction. 1560 * 1561 * NOTE: 1562 * We don't disable the hash type, but stop delivery the hash 1563 * value/type through mbufs on RX path. 1564 * 1565 * XXX If HN_CAP_UDPHASH is set in hn_caps, then UDP 4-tuple 1566 * hash is delivered with type of TCP_IPV4. This means if 1567 * UDP_IPV4 is enabled, then TCP_IPV4 should be forced, at 1568 * least to hn_mbuf_hash. However, given that _all_ of the 1569 * NICs implement TCP_IPV4, this will _not_ impose any issues 1570 * here. 1571 */ 1572 if ((my_types & RSS_TYPE_IPV4) && 1573 (diff_types & ifrh.ifrh_types & 1574 (RSS_TYPE_TCP_IPV4 | RSS_TYPE_UDP_IPV4))) { 1575 /* Conflict; disable IPV4 hash type/value delivery. */ 1576 if_printf(ifp, "disable IPV4 mbuf hash delivery\n"); 1577 mbuf_types &= ~RSS_TYPE_IPV4; 1578 } 1579 if ((my_types & RSS_TYPE_IPV6) && 1580 (diff_types & ifrh.ifrh_types & 1581 (RSS_TYPE_TCP_IPV6 | RSS_TYPE_UDP_IPV6 | 1582 RSS_TYPE_TCP_IPV6_EX | RSS_TYPE_UDP_IPV6_EX | 1583 RSS_TYPE_IPV6_EX))) { 1584 /* Conflict; disable IPV6 hash type/value delivery. */ 1585 if_printf(ifp, "disable IPV6 mbuf hash delivery\n"); 1586 mbuf_types &= ~RSS_TYPE_IPV6; 1587 } 1588 if ((my_types & RSS_TYPE_IPV6_EX) && 1589 (diff_types & ifrh.ifrh_types & 1590 (RSS_TYPE_TCP_IPV6 | RSS_TYPE_UDP_IPV6 | 1591 RSS_TYPE_TCP_IPV6_EX | RSS_TYPE_UDP_IPV6_EX | 1592 RSS_TYPE_IPV6))) { 1593 /* Conflict; disable IPV6_EX hash type/value delivery. */ 1594 if_printf(ifp, "disable IPV6_EX mbuf hash delivery\n"); 1595 mbuf_types &= ~RSS_TYPE_IPV6_EX; 1596 } 1597 if ((my_types & RSS_TYPE_TCP_IPV6) && 1598 (diff_types & ifrh.ifrh_types & RSS_TYPE_TCP_IPV6_EX)) { 1599 /* Conflict; disable TCP_IPV6 hash type/value delivery. */ 1600 if_printf(ifp, "disable TCP_IPV6 mbuf hash delivery\n"); 1601 mbuf_types &= ~RSS_TYPE_TCP_IPV6; 1602 } 1603 if ((my_types & RSS_TYPE_TCP_IPV6_EX) && 1604 (diff_types & ifrh.ifrh_types & RSS_TYPE_TCP_IPV6)) { 1605 /* Conflict; disable TCP_IPV6_EX hash type/value delivery. */ 1606 if_printf(ifp, "disable TCP_IPV6_EX mbuf hash delivery\n"); 1607 mbuf_types &= ~RSS_TYPE_TCP_IPV6_EX; 1608 } 1609 if ((my_types & RSS_TYPE_UDP_IPV6) && 1610 (diff_types & ifrh.ifrh_types & RSS_TYPE_UDP_IPV6_EX)) { 1611 /* Conflict; disable UDP_IPV6 hash type/value delivery. */ 1612 if_printf(ifp, "disable UDP_IPV6 mbuf hash delivery\n"); 1613 mbuf_types &= ~RSS_TYPE_UDP_IPV6; 1614 } 1615 if ((my_types & RSS_TYPE_UDP_IPV6_EX) && 1616 (diff_types & ifrh.ifrh_types & RSS_TYPE_UDP_IPV6)) { 1617 /* Conflict; disable UDP_IPV6_EX hash type/value delivery. */ 1618 if_printf(ifp, "disable UDP_IPV6_EX mbuf hash delivery\n"); 1619 mbuf_types &= ~RSS_TYPE_UDP_IPV6_EX; 1620 } 1621 1622 /* 1623 * Indirect table does not matter. 1624 */ 1625 1626 sc->hn_rss_hash = (sc->hn_rss_hcap & NDIS_HASH_FUNCTION_MASK) | 1627 hn_rss_type_tondis(my_types); 1628 memcpy(sc->hn_rss.rss_key, ifrk.ifrk_key, sizeof(sc->hn_rss.rss_key)); 1629 sc->hn_flags |= HN_FLAG_HAS_RSSKEY; 1630 1631 if (reconf) { 1632 error = hn_rss_reconfig(sc); 1633 if (error) { 1634 /* XXX roll-back? */ 1635 if_printf(ifp, "hn_rss_reconfig failed: %d\n", error); 1636 /* XXX keep going. */ 1637 } 1638 } 1639 done: 1640 /* Hash deliverability for mbufs. */ 1641 hn_rss_mbuf_hash(sc, hn_rss_type_tondis(mbuf_types)); 1642 } 1643 1644 static void 1645 hn_vf_rss_restore(struct hn_softc *sc) 1646 { 1647 1648 HN_LOCK_ASSERT(sc); 1649 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, 1650 ("%s: synthetic parts are not attached", sc->hn_ifp->if_xname)); 1651 1652 if (sc->hn_rx_ring_inuse == 1) 1653 goto done; 1654 1655 /* 1656 * Restore hash types. Key does _not_ matter. 1657 */ 1658 if (sc->hn_rss_hash != sc->hn_rss_hcap) { 1659 int error; 1660 1661 sc->hn_rss_hash = sc->hn_rss_hcap; 1662 error = hn_rss_reconfig(sc); 1663 if (error) { 1664 if_printf(sc->hn_ifp, "hn_rss_reconfig failed: %d\n", 1665 error); 1666 /* XXX keep going. */ 1667 } 1668 } 1669 done: 1670 /* Hash deliverability for mbufs. */ 1671 hn_rss_mbuf_hash(sc, NDIS_HASH_ALL); 1672 } 1673 1674 static void 1675 hn_xpnt_vf_setready(struct hn_softc *sc) 1676 { 1677 struct ifnet *ifp, *vf_ifp; 1678 struct ifreq ifr; 1679 1680 HN_LOCK_ASSERT(sc); 1681 ifp = sc->hn_ifp; 1682 vf_ifp = sc->hn_vf_ifp; 1683 1684 /* 1685 * Mark the VF ready. 1686 */ 1687 sc->hn_vf_rdytick = 0; 1688 1689 /* 1690 * Save information for restoration. 1691 */ 1692 sc->hn_saved_caps = ifp->if_capabilities; 1693 sc->hn_saved_tsomax = ifp->if_hw_tsomax; 1694 sc->hn_saved_tsosegcnt = ifp->if_hw_tsomaxsegcount; 1695 sc->hn_saved_tsosegsz = ifp->if_hw_tsomaxsegsize; 1696 1697 /* 1698 * Intersect supported/enabled capabilities. 1699 * 1700 * NOTE: 1701 * if_hwassist is not changed here. 1702 */ 1703 ifp->if_capabilities &= vf_ifp->if_capabilities; 1704 ifp->if_capenable &= ifp->if_capabilities; 1705 1706 /* 1707 * Fix TSO settings. 1708 */ 1709 if (ifp->if_hw_tsomax > vf_ifp->if_hw_tsomax) 1710 ifp->if_hw_tsomax = vf_ifp->if_hw_tsomax; 1711 if (ifp->if_hw_tsomaxsegcount > vf_ifp->if_hw_tsomaxsegcount) 1712 ifp->if_hw_tsomaxsegcount = vf_ifp->if_hw_tsomaxsegcount; 1713 if (ifp->if_hw_tsomaxsegsize > vf_ifp->if_hw_tsomaxsegsize) 1714 ifp->if_hw_tsomaxsegsize = vf_ifp->if_hw_tsomaxsegsize; 1715 1716 /* 1717 * Change VF's enabled capabilities. 1718 */ 1719 memset(&ifr, 0, sizeof(ifr)); 1720 strlcpy(ifr.ifr_name, vf_ifp->if_xname, sizeof(ifr.ifr_name)); 1721 ifr.ifr_reqcap = ifp->if_capenable; 1722 hn_xpnt_vf_iocsetcaps(sc, &ifr); 1723 1724 if (ifp->if_mtu != ETHERMTU) { 1725 int error; 1726 1727 /* 1728 * Change VF's MTU. 1729 */ 1730 memset(&ifr, 0, sizeof(ifr)); 1731 strlcpy(ifr.ifr_name, vf_ifp->if_xname, sizeof(ifr.ifr_name)); 1732 ifr.ifr_mtu = ifp->if_mtu; 1733 error = vf_ifp->if_ioctl(vf_ifp, SIOCSIFMTU, (caddr_t)&ifr); 1734 if (error) { 1735 if_printf(ifp, "%s SIOCSIFMTU %u failed\n", 1736 vf_ifp->if_xname, ifp->if_mtu); 1737 if (ifp->if_mtu > ETHERMTU) { 1738 if_printf(ifp, "change MTU to %d\n", ETHERMTU); 1739 1740 /* 1741 * XXX 1742 * No need to adjust the synthetic parts' MTU; 1743 * failure of the adjustment will cause us 1744 * infinite headache. 1745 */ 1746 ifp->if_mtu = ETHERMTU; 1747 hn_mtu_change_fixup(sc); 1748 } 1749 } 1750 } 1751 } 1752 1753 static bool 1754 hn_xpnt_vf_isready(struct hn_softc *sc) 1755 { 1756 1757 HN_LOCK_ASSERT(sc); 1758 1759 if (!hn_xpnt_vf || sc->hn_vf_ifp == NULL) 1760 return (false); 1761 1762 if (sc->hn_vf_rdytick == 0) 1763 return (true); 1764 1765 if (sc->hn_vf_rdytick > ticks) 1766 return (false); 1767 1768 /* Mark VF as ready. */ 1769 hn_xpnt_vf_setready(sc); 1770 return (true); 1771 } 1772 1773 static void 1774 hn_xpnt_vf_setenable(struct hn_softc *sc) 1775 { 1776 int i; 1777 1778 HN_LOCK_ASSERT(sc); 1779 1780 /* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */ 1781 rm_wlock(&sc->hn_vf_lock); 1782 sc->hn_xvf_flags |= HN_XVFFLAG_ENABLED; 1783 rm_wunlock(&sc->hn_vf_lock); 1784 1785 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 1786 sc->hn_rx_ring[i].hn_rx_flags |= HN_RX_FLAG_XPNT_VF; 1787 } 1788 1789 static void 1790 hn_xpnt_vf_setdisable(struct hn_softc *sc, bool clear_vf) 1791 { 1792 int i; 1793 1794 HN_LOCK_ASSERT(sc); 1795 1796 /* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */ 1797 rm_wlock(&sc->hn_vf_lock); 1798 sc->hn_xvf_flags &= ~HN_XVFFLAG_ENABLED; 1799 if (clear_vf) 1800 sc->hn_vf_ifp = NULL; 1801 rm_wunlock(&sc->hn_vf_lock); 1802 1803 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 1804 sc->hn_rx_ring[i].hn_rx_flags &= ~HN_RX_FLAG_XPNT_VF; 1805 } 1806 1807 static void 1808 hn_xpnt_vf_init(struct hn_softc *sc) 1809 { 1810 int error; 1811 1812 HN_LOCK_ASSERT(sc); 1813 1814 KASSERT((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) == 0, 1815 ("%s: transparent VF was enabled", sc->hn_ifp->if_xname)); 1816 1817 if (bootverbose) { 1818 if_printf(sc->hn_ifp, "try bringing up %s\n", 1819 sc->hn_vf_ifp->if_xname); 1820 } 1821 1822 /* 1823 * Bring the VF up. 1824 */ 1825 hn_xpnt_vf_saveifflags(sc); 1826 sc->hn_vf_ifp->if_flags |= IFF_UP; 1827 error = hn_xpnt_vf_iocsetflags(sc); 1828 if (error) { 1829 if_printf(sc->hn_ifp, "bringing up %s failed: %d\n", 1830 sc->hn_vf_ifp->if_xname, error); 1831 return; 1832 } 1833 1834 /* 1835 * NOTE: 1836 * Datapath setting must happen _after_ bringing the VF up. 1837 */ 1838 hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_VF); 1839 1840 /* 1841 * NOTE: 1842 * Fixup RSS related bits _after_ the VF is brought up, since 1843 * many VFs generate RSS key during it's initialization. 1844 */ 1845 hn_vf_rss_fixup(sc, true); 1846 1847 /* Mark transparent mode VF as enabled. */ 1848 hn_xpnt_vf_setenable(sc); 1849 } 1850 1851 static void 1852 hn_xpnt_vf_init_taskfunc(void *xsc, int pending __unused) 1853 { 1854 struct hn_softc *sc = xsc; 1855 1856 HN_LOCK(sc); 1857 1858 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) 1859 goto done; 1860 if (sc->hn_vf_ifp == NULL) 1861 goto done; 1862 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) 1863 goto done; 1864 1865 if (sc->hn_vf_rdytick != 0) { 1866 /* Mark VF as ready. */ 1867 hn_xpnt_vf_setready(sc); 1868 } 1869 1870 if (sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) { 1871 /* 1872 * Delayed VF initialization. 1873 */ 1874 if (bootverbose) { 1875 if_printf(sc->hn_ifp, "delayed initialize %s\n", 1876 sc->hn_vf_ifp->if_xname); 1877 } 1878 hn_xpnt_vf_init(sc); 1879 } 1880 done: 1881 HN_UNLOCK(sc); 1882 } 1883 1884 static void 1885 hn_ifnet_attevent(void *xsc, struct ifnet *ifp) 1886 { 1887 struct hn_softc *sc = xsc; 1888 1889 HN_LOCK(sc); 1890 1891 if (!(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED)) 1892 goto done; 1893 1894 if (!hn_ismyvf(sc, ifp)) 1895 goto done; 1896 1897 if (sc->hn_vf_ifp != NULL) { 1898 if_printf(sc->hn_ifp, "%s was attached as VF\n", 1899 sc->hn_vf_ifp->if_xname); 1900 goto done; 1901 } 1902 1903 if (hn_xpnt_vf && ifp->if_start != NULL) { 1904 /* 1905 * ifnet.if_start is _not_ supported by transparent 1906 * mode VF; mainly due to the IFF_DRV_OACTIVE flag. 1907 */ 1908 if_printf(sc->hn_ifp, "%s uses if_start, which is unsupported " 1909 "in transparent VF mode.\n", ifp->if_xname); 1910 goto done; 1911 } 1912 1913 rm_wlock(&hn_vfmap_lock); 1914 1915 if (ifp->if_index >= hn_vfmap_size) { 1916 struct ifnet **newmap; 1917 int newsize; 1918 1919 newsize = ifp->if_index + HN_VFMAP_SIZE_DEF; 1920 newmap = malloc(sizeof(struct ifnet *) * newsize, M_DEVBUF, 1921 M_WAITOK | M_ZERO); 1922 1923 memcpy(newmap, hn_vfmap, 1924 sizeof(struct ifnet *) * hn_vfmap_size); 1925 free(hn_vfmap, M_DEVBUF); 1926 hn_vfmap = newmap; 1927 hn_vfmap_size = newsize; 1928 } 1929 KASSERT(hn_vfmap[ifp->if_index] == NULL, 1930 ("%s: ifindex %d was mapped to %s", 1931 ifp->if_xname, ifp->if_index, hn_vfmap[ifp->if_index]->if_xname)); 1932 hn_vfmap[ifp->if_index] = sc->hn_ifp; 1933 1934 rm_wunlock(&hn_vfmap_lock); 1935 1936 /* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */ 1937 rm_wlock(&sc->hn_vf_lock); 1938 KASSERT((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) == 0, 1939 ("%s: transparent VF was enabled", sc->hn_ifp->if_xname)); 1940 sc->hn_vf_ifp = ifp; 1941 rm_wunlock(&sc->hn_vf_lock); 1942 1943 if (hn_xpnt_vf) { 1944 int wait_ticks; 1945 1946 /* 1947 * Install if_input for vf_ifp, which does vf_ifp -> hn_ifp. 1948 * Save vf_ifp's current if_input for later restoration. 1949 */ 1950 sc->hn_vf_input = ifp->if_input; 1951 ifp->if_input = hn_xpnt_vf_input; 1952 1953 /* 1954 * Stop link status management; use the VF's. 1955 */ 1956 hn_suspend_mgmt(sc); 1957 1958 /* 1959 * Give VF sometime to complete its attach routing. 1960 */ 1961 wait_ticks = hn_xpnt_vf_attwait * hz; 1962 sc->hn_vf_rdytick = ticks + wait_ticks; 1963 1964 taskqueue_enqueue_timeout(sc->hn_vf_taskq, &sc->hn_vf_init, 1965 wait_ticks); 1966 } 1967 done: 1968 HN_UNLOCK(sc); 1969 } 1970 1971 static void 1972 hn_ifnet_detevent(void *xsc, struct ifnet *ifp) 1973 { 1974 struct hn_softc *sc = xsc; 1975 1976 HN_LOCK(sc); 1977 1978 if (sc->hn_vf_ifp == NULL) 1979 goto done; 1980 1981 if (!hn_ismyvf(sc, ifp)) 1982 goto done; 1983 1984 if (hn_xpnt_vf) { 1985 /* 1986 * Make sure that the delayed initialization is not running. 1987 * 1988 * NOTE: 1989 * - This lock _must_ be released, since the hn_vf_init task 1990 * will try holding this lock. 1991 * - It is safe to release this lock here, since the 1992 * hn_ifnet_attevent() is interlocked by the hn_vf_ifp. 1993 * 1994 * XXX racy, if hn(4) ever detached. 1995 */ 1996 HN_UNLOCK(sc); 1997 taskqueue_drain_timeout(sc->hn_vf_taskq, &sc->hn_vf_init); 1998 HN_LOCK(sc); 1999 2000 KASSERT(sc->hn_vf_input != NULL, ("%s VF input is not saved", 2001 sc->hn_ifp->if_xname)); 2002 ifp->if_input = sc->hn_vf_input; 2003 sc->hn_vf_input = NULL; 2004 2005 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) && 2006 (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) 2007 hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_SYNTH); 2008 2009 if (sc->hn_vf_rdytick == 0) { 2010 /* 2011 * The VF was ready; restore some settings. 2012 */ 2013 sc->hn_ifp->if_capabilities = sc->hn_saved_caps; 2014 /* 2015 * NOTE: 2016 * There is _no_ need to fixup if_capenable and 2017 * if_hwassist, since the if_capabilities before 2018 * restoration was an intersection of the VF's 2019 * if_capabilites and the synthetic device's 2020 * if_capabilites. 2021 */ 2022 sc->hn_ifp->if_hw_tsomax = sc->hn_saved_tsomax; 2023 sc->hn_ifp->if_hw_tsomaxsegcount = 2024 sc->hn_saved_tsosegcnt; 2025 sc->hn_ifp->if_hw_tsomaxsegsize = sc->hn_saved_tsosegsz; 2026 } 2027 2028 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) { 2029 /* 2030 * Restore RSS settings. 2031 */ 2032 hn_vf_rss_restore(sc); 2033 2034 /* 2035 * Resume link status management, which was suspended 2036 * by hn_ifnet_attevent(). 2037 */ 2038 hn_resume_mgmt(sc); 2039 } 2040 } 2041 2042 /* Mark transparent mode VF as disabled. */ 2043 hn_xpnt_vf_setdisable(sc, true /* clear hn_vf_ifp */); 2044 2045 rm_wlock(&hn_vfmap_lock); 2046 2047 KASSERT(ifp->if_index < hn_vfmap_size, 2048 ("ifindex %d, vfmapsize %d", ifp->if_index, hn_vfmap_size)); 2049 if (hn_vfmap[ifp->if_index] != NULL) { 2050 KASSERT(hn_vfmap[ifp->if_index] == sc->hn_ifp, 2051 ("%s: ifindex %d was mapped to %s", 2052 ifp->if_xname, ifp->if_index, 2053 hn_vfmap[ifp->if_index]->if_xname)); 2054 hn_vfmap[ifp->if_index] = NULL; 2055 } 2056 2057 rm_wunlock(&hn_vfmap_lock); 2058 done: 2059 HN_UNLOCK(sc); 2060 } 2061 2062 static void 2063 hn_ifnet_lnkevent(void *xsc, struct ifnet *ifp, int link_state) 2064 { 2065 struct hn_softc *sc = xsc; 2066 2067 if (sc->hn_vf_ifp == ifp) 2068 if_link_state_change(sc->hn_ifp, link_state); 2069 } 2070 2071 static int 2072 hn_probe(device_t dev) 2073 { 2074 2075 if (VMBUS_PROBE_GUID(device_get_parent(dev), dev, &hn_guid) == 0) { 2076 device_set_desc(dev, "Hyper-V Network Interface"); 2077 return BUS_PROBE_DEFAULT; 2078 } 2079 return ENXIO; 2080 } 2081 2082 static int 2083 hn_attach(device_t dev) 2084 { 2085 struct hn_softc *sc = device_get_softc(dev); 2086 struct sysctl_oid_list *child; 2087 struct sysctl_ctx_list *ctx; 2088 uint8_t eaddr[ETHER_ADDR_LEN]; 2089 struct ifnet *ifp = NULL; 2090 int error, ring_cnt, tx_ring_cnt; 2091 uint32_t mtu; 2092 2093 sc->hn_dev = dev; 2094 sc->hn_prichan = vmbus_get_channel(dev); 2095 HN_LOCK_INIT(sc); 2096 rm_init(&sc->hn_vf_lock, "hnvf"); 2097 if (hn_xpnt_vf && hn_xpnt_vf_accbpf) 2098 sc->hn_xvf_flags |= HN_XVFFLAG_ACCBPF; 2099 2100 /* 2101 * Initialize these tunables once. 2102 */ 2103 sc->hn_agg_size = hn_tx_agg_size; 2104 sc->hn_agg_pkts = hn_tx_agg_pkts; 2105 2106 /* 2107 * Setup taskqueue for transmission. 2108 */ 2109 if (hn_tx_taskq_mode == HN_TX_TASKQ_M_INDEP) { 2110 int i; 2111 2112 sc->hn_tx_taskqs = 2113 malloc(hn_tx_taskq_cnt * sizeof(struct taskqueue *), 2114 M_DEVBUF, M_WAITOK); 2115 for (i = 0; i < hn_tx_taskq_cnt; ++i) { 2116 sc->hn_tx_taskqs[i] = taskqueue_create("hn_tx", 2117 M_WAITOK, taskqueue_thread_enqueue, 2118 &sc->hn_tx_taskqs[i]); 2119 taskqueue_start_threads(&sc->hn_tx_taskqs[i], 1, PI_NET, 2120 "%s tx%d", device_get_nameunit(dev), i); 2121 } 2122 } else if (hn_tx_taskq_mode == HN_TX_TASKQ_M_GLOBAL) { 2123 sc->hn_tx_taskqs = hn_tx_taskque; 2124 } 2125 2126 /* 2127 * Setup taskqueue for mangement tasks, e.g. link status. 2128 */ 2129 sc->hn_mgmt_taskq0 = taskqueue_create("hn_mgmt", M_WAITOK, 2130 taskqueue_thread_enqueue, &sc->hn_mgmt_taskq0); 2131 taskqueue_start_threads(&sc->hn_mgmt_taskq0, 1, PI_NET, "%s mgmt", 2132 device_get_nameunit(dev)); 2133 TASK_INIT(&sc->hn_link_task, 0, hn_link_taskfunc, sc); 2134 TASK_INIT(&sc->hn_netchg_init, 0, hn_netchg_init_taskfunc, sc); 2135 TIMEOUT_TASK_INIT(sc->hn_mgmt_taskq0, &sc->hn_netchg_status, 0, 2136 hn_netchg_status_taskfunc, sc); 2137 2138 if (hn_xpnt_vf) { 2139 /* 2140 * Setup taskqueue for VF tasks, e.g. delayed VF bringing up. 2141 */ 2142 sc->hn_vf_taskq = taskqueue_create("hn_vf", M_WAITOK, 2143 taskqueue_thread_enqueue, &sc->hn_vf_taskq); 2144 taskqueue_start_threads(&sc->hn_vf_taskq, 1, PI_NET, "%s vf", 2145 device_get_nameunit(dev)); 2146 TIMEOUT_TASK_INIT(sc->hn_vf_taskq, &sc->hn_vf_init, 0, 2147 hn_xpnt_vf_init_taskfunc, sc); 2148 } 2149 2150 /* 2151 * Allocate ifnet and setup its name earlier, so that if_printf 2152 * can be used by functions, which will be called after 2153 * ether_ifattach(). 2154 */ 2155 ifp = sc->hn_ifp = if_alloc(IFT_ETHER); 2156 ifp->if_softc = sc; 2157 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 2158 2159 /* 2160 * Initialize ifmedia earlier so that it can be unconditionally 2161 * destroyed, if error happened later on. 2162 */ 2163 ifmedia_init(&sc->hn_media, 0, hn_ifmedia_upd, hn_ifmedia_sts); 2164 2165 /* 2166 * Figure out the # of RX rings (ring_cnt) and the # of TX rings 2167 * to use (tx_ring_cnt). 2168 * 2169 * NOTE: 2170 * The # of RX rings to use is same as the # of channels to use. 2171 */ 2172 ring_cnt = hn_chan_cnt; 2173 if (ring_cnt <= 0) { 2174 /* Default */ 2175 ring_cnt = mp_ncpus; 2176 if (ring_cnt > HN_RING_CNT_DEF_MAX) 2177 ring_cnt = HN_RING_CNT_DEF_MAX; 2178 } else if (ring_cnt > mp_ncpus) { 2179 ring_cnt = mp_ncpus; 2180 } 2181 #ifdef RSS 2182 if (ring_cnt > rss_getnumbuckets()) 2183 ring_cnt = rss_getnumbuckets(); 2184 #endif 2185 2186 tx_ring_cnt = hn_tx_ring_cnt; 2187 if (tx_ring_cnt <= 0 || tx_ring_cnt > ring_cnt) 2188 tx_ring_cnt = ring_cnt; 2189 #ifdef HN_IFSTART_SUPPORT 2190 if (hn_use_if_start) { 2191 /* ifnet.if_start only needs one TX ring. */ 2192 tx_ring_cnt = 1; 2193 } 2194 #endif 2195 2196 /* 2197 * Set the leader CPU for channels. 2198 */ 2199 sc->hn_cpu = atomic_fetchadd_int(&hn_cpu_index, ring_cnt) % mp_ncpus; 2200 2201 /* 2202 * Create enough TX/RX rings, even if only limited number of 2203 * channels can be allocated. 2204 */ 2205 error = hn_create_tx_data(sc, tx_ring_cnt); 2206 if (error) 2207 goto failed; 2208 error = hn_create_rx_data(sc, ring_cnt); 2209 if (error) 2210 goto failed; 2211 2212 /* 2213 * Create transaction context for NVS and RNDIS transactions. 2214 */ 2215 sc->hn_xact = vmbus_xact_ctx_create(bus_get_dma_tag(dev), 2216 HN_XACT_REQ_SIZE, HN_XACT_RESP_SIZE, 0); 2217 if (sc->hn_xact == NULL) { 2218 error = ENXIO; 2219 goto failed; 2220 } 2221 2222 /* 2223 * Install orphan handler for the revocation of this device's 2224 * primary channel. 2225 * 2226 * NOTE: 2227 * The processing order is critical here: 2228 * Install the orphan handler, _before_ testing whether this 2229 * device's primary channel has been revoked or not. 2230 */ 2231 vmbus_chan_set_orphan(sc->hn_prichan, sc->hn_xact); 2232 if (vmbus_chan_is_revoked(sc->hn_prichan)) { 2233 error = ENXIO; 2234 goto failed; 2235 } 2236 2237 /* 2238 * Attach the synthetic parts, i.e. NVS and RNDIS. 2239 */ 2240 error = hn_synth_attach(sc, ETHERMTU); 2241 if (error) 2242 goto failed; 2243 2244 error = hn_rndis_get_eaddr(sc, eaddr); 2245 if (error) 2246 goto failed; 2247 2248 error = hn_rndis_get_mtu(sc, &mtu); 2249 if (error) 2250 mtu = ETHERMTU; 2251 else if (bootverbose) 2252 device_printf(dev, "RNDIS mtu %u\n", mtu); 2253 2254 #if __FreeBSD_version >= 1100099 2255 if (sc->hn_rx_ring_inuse > 1) { 2256 /* 2257 * Reduce TCP segment aggregation limit for multiple 2258 * RX rings to increase ACK timeliness. 2259 */ 2260 hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MULTIRX_DEF); 2261 } 2262 #endif 2263 2264 /* 2265 * Fixup TX/RX stuffs after synthetic parts are attached. 2266 */ 2267 hn_fixup_tx_data(sc); 2268 hn_fixup_rx_data(sc); 2269 2270 ctx = device_get_sysctl_ctx(dev); 2271 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); 2272 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "nvs_version", CTLFLAG_RD, 2273 &sc->hn_nvs_ver, 0, "NVS version"); 2274 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "ndis_version", 2275 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2276 hn_ndis_version_sysctl, "A", "NDIS version"); 2277 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "caps", 2278 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2279 hn_caps_sysctl, "A", "capabilities"); 2280 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "hwassist", 2281 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2282 hn_hwassist_sysctl, "A", "hwassist"); 2283 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tso_max", 2284 CTLFLAG_RD, &ifp->if_hw_tsomax, 0, "max TSO size"); 2285 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tso_maxsegcnt", 2286 CTLFLAG_RD, &ifp->if_hw_tsomaxsegcount, 0, 2287 "max # of TSO segments"); 2288 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tso_maxsegsz", 2289 CTLFLAG_RD, &ifp->if_hw_tsomaxsegsize, 0, 2290 "max size of TSO segment"); 2291 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxfilter", 2292 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2293 hn_rxfilter_sysctl, "A", "rxfilter"); 2294 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_hash", 2295 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2296 hn_rss_hash_sysctl, "A", "RSS hash"); 2297 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_hashcap", 2298 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2299 hn_rss_hcap_sysctl, "A", "RSS hash capabilities"); 2300 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "mbuf_hash", 2301 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2302 hn_rss_mbuf_sysctl, "A", "RSS hash for mbufs"); 2303 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rss_ind_size", 2304 CTLFLAG_RD, &sc->hn_rss_ind_size, 0, "RSS indirect entry count"); 2305 #ifndef RSS 2306 /* 2307 * Don't allow RSS key/indirect table changes, if RSS is defined. 2308 */ 2309 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_key", 2310 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 2311 hn_rss_key_sysctl, "IU", "RSS key"); 2312 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_ind", 2313 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 2314 hn_rss_ind_sysctl, "IU", "RSS indirect table"); 2315 #endif 2316 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_size", 2317 CTLFLAG_RD, &sc->hn_rndis_agg_size, 0, 2318 "RNDIS offered packet transmission aggregation size limit"); 2319 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_pkts", 2320 CTLFLAG_RD, &sc->hn_rndis_agg_pkts, 0, 2321 "RNDIS offered packet transmission aggregation count limit"); 2322 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_align", 2323 CTLFLAG_RD, &sc->hn_rndis_agg_align, 0, 2324 "RNDIS packet transmission aggregation alignment"); 2325 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_size", 2326 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 2327 hn_txagg_size_sysctl, "I", 2328 "Packet transmission aggregation size, 0 -- disable, -1 -- auto"); 2329 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pkts", 2330 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 2331 hn_txagg_pkts_sysctl, "I", 2332 "Packet transmission aggregation packets, " 2333 "0 -- disable, -1 -- auto"); 2334 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "polling", 2335 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 2336 hn_polling_sysctl, "I", 2337 "Polling frequency: [100,1000000], 0 disable polling"); 2338 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf", 2339 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2340 hn_vf_sysctl, "A", "Virtual Function's name"); 2341 if (!hn_xpnt_vf) { 2342 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxvf", 2343 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2344 hn_rxvf_sysctl, "A", "activated Virtual Function's name"); 2345 } else { 2346 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf_xpnt_enabled", 2347 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2348 hn_xpnt_vf_enabled_sysctl, "I", 2349 "Transparent VF enabled"); 2350 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf_xpnt_accbpf", 2351 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 2352 hn_xpnt_vf_accbpf_sysctl, "I", 2353 "Accurate BPF for transparent VF"); 2354 } 2355 2356 /* 2357 * Setup the ifmedia, which has been initialized earlier. 2358 */ 2359 ifmedia_add(&sc->hn_media, IFM_ETHER | IFM_AUTO, 0, NULL); 2360 ifmedia_set(&sc->hn_media, IFM_ETHER | IFM_AUTO); 2361 /* XXX ifmedia_set really should do this for us */ 2362 sc->hn_media.ifm_media = sc->hn_media.ifm_cur->ifm_media; 2363 2364 /* 2365 * Setup the ifnet for this interface. 2366 */ 2367 2368 ifp->if_baudrate = IF_Gbps(10); 2369 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 2370 ifp->if_ioctl = hn_ioctl; 2371 ifp->if_init = hn_init; 2372 #ifdef HN_IFSTART_SUPPORT 2373 if (hn_use_if_start) { 2374 int qdepth = hn_get_txswq_depth(&sc->hn_tx_ring[0]); 2375 2376 ifp->if_start = hn_start; 2377 IFQ_SET_MAXLEN(&ifp->if_snd, qdepth); 2378 ifp->if_snd.ifq_drv_maxlen = qdepth - 1; 2379 IFQ_SET_READY(&ifp->if_snd); 2380 } else 2381 #endif 2382 { 2383 ifp->if_transmit = hn_transmit; 2384 ifp->if_qflush = hn_xmit_qflush; 2385 } 2386 2387 ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_LRO | IFCAP_LINKSTATE; 2388 #ifdef foo 2389 /* We can't diff IPv6 packets from IPv4 packets on RX path. */ 2390 ifp->if_capabilities |= IFCAP_RXCSUM_IPV6; 2391 #endif 2392 if (sc->hn_caps & HN_CAP_VLAN) { 2393 /* XXX not sure about VLAN_MTU. */ 2394 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU; 2395 } 2396 2397 ifp->if_hwassist = sc->hn_tx_ring[0].hn_csum_assist; 2398 if (ifp->if_hwassist & HN_CSUM_IP_MASK) 2399 ifp->if_capabilities |= IFCAP_TXCSUM; 2400 if (ifp->if_hwassist & HN_CSUM_IP6_MASK) 2401 ifp->if_capabilities |= IFCAP_TXCSUM_IPV6; 2402 if (sc->hn_caps & HN_CAP_TSO4) { 2403 ifp->if_capabilities |= IFCAP_TSO4; 2404 ifp->if_hwassist |= CSUM_IP_TSO; 2405 } 2406 if (sc->hn_caps & HN_CAP_TSO6) { 2407 ifp->if_capabilities |= IFCAP_TSO6; 2408 ifp->if_hwassist |= CSUM_IP6_TSO; 2409 } 2410 2411 /* Enable all available capabilities by default. */ 2412 ifp->if_capenable = ifp->if_capabilities; 2413 2414 /* 2415 * Disable IPv6 TSO and TXCSUM by default, they still can 2416 * be enabled through SIOCSIFCAP. 2417 */ 2418 ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6 | IFCAP_TSO6); 2419 ifp->if_hwassist &= ~(HN_CSUM_IP6_MASK | CSUM_IP6_TSO); 2420 2421 if (ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) { 2422 /* 2423 * Lock hn_set_tso_maxsize() to simplify its 2424 * internal logic. 2425 */ 2426 HN_LOCK(sc); 2427 hn_set_tso_maxsize(sc, hn_tso_maxlen, ETHERMTU); 2428 HN_UNLOCK(sc); 2429 ifp->if_hw_tsomaxsegcount = HN_TX_DATA_SEGCNT_MAX; 2430 ifp->if_hw_tsomaxsegsize = PAGE_SIZE; 2431 } 2432 2433 ether_ifattach(ifp, eaddr); 2434 2435 if ((ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) && bootverbose) { 2436 if_printf(ifp, "TSO segcnt %u segsz %u\n", 2437 ifp->if_hw_tsomaxsegcount, ifp->if_hw_tsomaxsegsize); 2438 } 2439 if (mtu < ETHERMTU) { 2440 if_printf(ifp, "fixup mtu %u -> %u\n", ifp->if_mtu, mtu); 2441 ifp->if_mtu = mtu; 2442 } 2443 2444 /* Inform the upper layer about the long frame support. */ 2445 ifp->if_hdrlen = sizeof(struct ether_vlan_header); 2446 2447 /* 2448 * Kick off link status check. 2449 */ 2450 sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0; 2451 hn_update_link_status(sc); 2452 2453 if (!hn_xpnt_vf) { 2454 sc->hn_ifnet_evthand = EVENTHANDLER_REGISTER(ifnet_event, 2455 hn_ifnet_event, sc, EVENTHANDLER_PRI_ANY); 2456 sc->hn_ifaddr_evthand = EVENTHANDLER_REGISTER(ifaddr_event, 2457 hn_ifaddr_event, sc, EVENTHANDLER_PRI_ANY); 2458 } else { 2459 sc->hn_ifnet_lnkhand = EVENTHANDLER_REGISTER(ifnet_link_event, 2460 hn_ifnet_lnkevent, sc, EVENTHANDLER_PRI_ANY); 2461 } 2462 2463 /* 2464 * NOTE: 2465 * Subscribe ether_ifattach event, instead of ifnet_arrival event, 2466 * since interface's LLADDR is needed; interface LLADDR is not 2467 * available when ifnet_arrival event is triggered. 2468 */ 2469 sc->hn_ifnet_atthand = EVENTHANDLER_REGISTER(ether_ifattach_event, 2470 hn_ifnet_attevent, sc, EVENTHANDLER_PRI_ANY); 2471 sc->hn_ifnet_dethand = EVENTHANDLER_REGISTER(ifnet_departure_event, 2472 hn_ifnet_detevent, sc, EVENTHANDLER_PRI_ANY); 2473 2474 return (0); 2475 failed: 2476 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) 2477 hn_synth_detach(sc); 2478 hn_detach(dev); 2479 return (error); 2480 } 2481 2482 static int 2483 hn_detach(device_t dev) 2484 { 2485 struct hn_softc *sc = device_get_softc(dev); 2486 struct ifnet *ifp = sc->hn_ifp, *vf_ifp; 2487 2488 if (sc->hn_xact != NULL && vmbus_chan_is_revoked(sc->hn_prichan)) { 2489 /* 2490 * In case that the vmbus missed the orphan handler 2491 * installation. 2492 */ 2493 vmbus_xact_ctx_orphan(sc->hn_xact); 2494 } 2495 2496 if (sc->hn_ifaddr_evthand != NULL) 2497 EVENTHANDLER_DEREGISTER(ifaddr_event, sc->hn_ifaddr_evthand); 2498 if (sc->hn_ifnet_evthand != NULL) 2499 EVENTHANDLER_DEREGISTER(ifnet_event, sc->hn_ifnet_evthand); 2500 if (sc->hn_ifnet_atthand != NULL) { 2501 EVENTHANDLER_DEREGISTER(ether_ifattach_event, 2502 sc->hn_ifnet_atthand); 2503 } 2504 if (sc->hn_ifnet_dethand != NULL) { 2505 EVENTHANDLER_DEREGISTER(ifnet_departure_event, 2506 sc->hn_ifnet_dethand); 2507 } 2508 if (sc->hn_ifnet_lnkhand != NULL) 2509 EVENTHANDLER_DEREGISTER(ifnet_link_event, sc->hn_ifnet_lnkhand); 2510 2511 vf_ifp = sc->hn_vf_ifp; 2512 __compiler_membar(); 2513 if (vf_ifp != NULL) 2514 hn_ifnet_detevent(sc, vf_ifp); 2515 2516 if (device_is_attached(dev)) { 2517 HN_LOCK(sc); 2518 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) { 2519 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 2520 hn_stop(sc, true); 2521 /* 2522 * NOTE: 2523 * hn_stop() only suspends data, so managment 2524 * stuffs have to be suspended manually here. 2525 */ 2526 hn_suspend_mgmt(sc); 2527 hn_synth_detach(sc); 2528 } 2529 HN_UNLOCK(sc); 2530 ether_ifdetach(ifp); 2531 } 2532 2533 ifmedia_removeall(&sc->hn_media); 2534 hn_destroy_rx_data(sc); 2535 hn_destroy_tx_data(sc); 2536 2537 if (sc->hn_tx_taskqs != NULL && sc->hn_tx_taskqs != hn_tx_taskque) { 2538 int i; 2539 2540 for (i = 0; i < hn_tx_taskq_cnt; ++i) 2541 taskqueue_free(sc->hn_tx_taskqs[i]); 2542 free(sc->hn_tx_taskqs, M_DEVBUF); 2543 } 2544 taskqueue_free(sc->hn_mgmt_taskq0); 2545 if (sc->hn_vf_taskq != NULL) 2546 taskqueue_free(sc->hn_vf_taskq); 2547 2548 if (sc->hn_xact != NULL) { 2549 /* 2550 * Uninstall the orphan handler _before_ the xact is 2551 * destructed. 2552 */ 2553 vmbus_chan_unset_orphan(sc->hn_prichan); 2554 vmbus_xact_ctx_destroy(sc->hn_xact); 2555 } 2556 2557 if_free(ifp); 2558 2559 HN_LOCK_DESTROY(sc); 2560 rm_destroy(&sc->hn_vf_lock); 2561 return (0); 2562 } 2563 2564 static int 2565 hn_shutdown(device_t dev) 2566 { 2567 2568 return (0); 2569 } 2570 2571 static void 2572 hn_link_status(struct hn_softc *sc) 2573 { 2574 uint32_t link_status; 2575 int error; 2576 2577 error = hn_rndis_get_linkstatus(sc, &link_status); 2578 if (error) { 2579 /* XXX what to do? */ 2580 return; 2581 } 2582 2583 if (link_status == NDIS_MEDIA_STATE_CONNECTED) 2584 sc->hn_link_flags |= HN_LINK_FLAG_LINKUP; 2585 else 2586 sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP; 2587 if_link_state_change(sc->hn_ifp, 2588 (sc->hn_link_flags & HN_LINK_FLAG_LINKUP) ? 2589 LINK_STATE_UP : LINK_STATE_DOWN); 2590 } 2591 2592 static void 2593 hn_link_taskfunc(void *xsc, int pending __unused) 2594 { 2595 struct hn_softc *sc = xsc; 2596 2597 if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG) 2598 return; 2599 hn_link_status(sc); 2600 } 2601 2602 static void 2603 hn_netchg_init_taskfunc(void *xsc, int pending __unused) 2604 { 2605 struct hn_softc *sc = xsc; 2606 2607 /* Prevent any link status checks from running. */ 2608 sc->hn_link_flags |= HN_LINK_FLAG_NETCHG; 2609 2610 /* 2611 * Fake up a [link down --> link up] state change; 5 seconds 2612 * delay is used, which closely simulates miibus reaction 2613 * upon link down event. 2614 */ 2615 sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP; 2616 if_link_state_change(sc->hn_ifp, LINK_STATE_DOWN); 2617 taskqueue_enqueue_timeout(sc->hn_mgmt_taskq0, 2618 &sc->hn_netchg_status, 5 * hz); 2619 } 2620 2621 static void 2622 hn_netchg_status_taskfunc(void *xsc, int pending __unused) 2623 { 2624 struct hn_softc *sc = xsc; 2625 2626 /* Re-allow link status checks. */ 2627 sc->hn_link_flags &= ~HN_LINK_FLAG_NETCHG; 2628 hn_link_status(sc); 2629 } 2630 2631 static void 2632 hn_update_link_status(struct hn_softc *sc) 2633 { 2634 2635 if (sc->hn_mgmt_taskq != NULL) 2636 taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_link_task); 2637 } 2638 2639 static void 2640 hn_change_network(struct hn_softc *sc) 2641 { 2642 2643 if (sc->hn_mgmt_taskq != NULL) 2644 taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_netchg_init); 2645 } 2646 2647 static __inline int 2648 hn_txdesc_dmamap_load(struct hn_tx_ring *txr, struct hn_txdesc *txd, 2649 struct mbuf **m_head, bus_dma_segment_t *segs, int *nsegs) 2650 { 2651 struct mbuf *m = *m_head; 2652 int error; 2653 2654 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID, ("txd uses chim")); 2655 2656 error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap, 2657 m, segs, nsegs, BUS_DMA_NOWAIT); 2658 if (error == EFBIG) { 2659 struct mbuf *m_new; 2660 2661 m_new = m_collapse(m, M_NOWAIT, HN_TX_DATA_SEGCNT_MAX); 2662 if (m_new == NULL) 2663 return ENOBUFS; 2664 else 2665 *m_head = m = m_new; 2666 txr->hn_tx_collapsed++; 2667 2668 error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, 2669 txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT); 2670 } 2671 if (!error) { 2672 bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap, 2673 BUS_DMASYNC_PREWRITE); 2674 txd->flags |= HN_TXD_FLAG_DMAMAP; 2675 } 2676 return error; 2677 } 2678 2679 static __inline int 2680 hn_txdesc_put(struct hn_tx_ring *txr, struct hn_txdesc *txd) 2681 { 2682 2683 KASSERT((txd->flags & HN_TXD_FLAG_ONLIST) == 0, 2684 ("put an onlist txd %#x", txd->flags)); 2685 KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0, 2686 ("put an onagg txd %#x", txd->flags)); 2687 2688 KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs)); 2689 if (atomic_fetchadd_int(&txd->refs, -1) != 1) 2690 return 0; 2691 2692 if (!STAILQ_EMPTY(&txd->agg_list)) { 2693 struct hn_txdesc *tmp_txd; 2694 2695 while ((tmp_txd = STAILQ_FIRST(&txd->agg_list)) != NULL) { 2696 int freed; 2697 2698 KASSERT(STAILQ_EMPTY(&tmp_txd->agg_list), 2699 ("resursive aggregation on aggregated txdesc")); 2700 KASSERT((tmp_txd->flags & HN_TXD_FLAG_ONAGG), 2701 ("not aggregated txdesc")); 2702 KASSERT((tmp_txd->flags & HN_TXD_FLAG_DMAMAP) == 0, 2703 ("aggregated txdesc uses dmamap")); 2704 KASSERT(tmp_txd->chim_index == HN_NVS_CHIM_IDX_INVALID, 2705 ("aggregated txdesc consumes " 2706 "chimney sending buffer")); 2707 KASSERT(tmp_txd->chim_size == 0, 2708 ("aggregated txdesc has non-zero " 2709 "chimney sending size")); 2710 2711 STAILQ_REMOVE_HEAD(&txd->agg_list, agg_link); 2712 tmp_txd->flags &= ~HN_TXD_FLAG_ONAGG; 2713 freed = hn_txdesc_put(txr, tmp_txd); 2714 KASSERT(freed, ("failed to free aggregated txdesc")); 2715 } 2716 } 2717 2718 if (txd->chim_index != HN_NVS_CHIM_IDX_INVALID) { 2719 KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, 2720 ("chim txd uses dmamap")); 2721 hn_chim_free(txr->hn_sc, txd->chim_index); 2722 txd->chim_index = HN_NVS_CHIM_IDX_INVALID; 2723 txd->chim_size = 0; 2724 } else if (txd->flags & HN_TXD_FLAG_DMAMAP) { 2725 bus_dmamap_sync(txr->hn_tx_data_dtag, 2726 txd->data_dmap, BUS_DMASYNC_POSTWRITE); 2727 bus_dmamap_unload(txr->hn_tx_data_dtag, 2728 txd->data_dmap); 2729 txd->flags &= ~HN_TXD_FLAG_DMAMAP; 2730 } 2731 2732 if (txd->m != NULL) { 2733 m_freem(txd->m); 2734 txd->m = NULL; 2735 } 2736 2737 txd->flags |= HN_TXD_FLAG_ONLIST; 2738 #ifndef HN_USE_TXDESC_BUFRING 2739 mtx_lock_spin(&txr->hn_txlist_spin); 2740 KASSERT(txr->hn_txdesc_avail >= 0 && 2741 txr->hn_txdesc_avail < txr->hn_txdesc_cnt, 2742 ("txdesc_put: invalid txd avail %d", txr->hn_txdesc_avail)); 2743 txr->hn_txdesc_avail++; 2744 SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link); 2745 mtx_unlock_spin(&txr->hn_txlist_spin); 2746 #else /* HN_USE_TXDESC_BUFRING */ 2747 #ifdef HN_DEBUG 2748 atomic_add_int(&txr->hn_txdesc_avail, 1); 2749 #endif 2750 buf_ring_enqueue(txr->hn_txdesc_br, txd); 2751 #endif /* !HN_USE_TXDESC_BUFRING */ 2752 2753 return 1; 2754 } 2755 2756 static __inline struct hn_txdesc * 2757 hn_txdesc_get(struct hn_tx_ring *txr) 2758 { 2759 struct hn_txdesc *txd; 2760 2761 #ifndef HN_USE_TXDESC_BUFRING 2762 mtx_lock_spin(&txr->hn_txlist_spin); 2763 txd = SLIST_FIRST(&txr->hn_txlist); 2764 if (txd != NULL) { 2765 KASSERT(txr->hn_txdesc_avail > 0, 2766 ("txdesc_get: invalid txd avail %d", txr->hn_txdesc_avail)); 2767 txr->hn_txdesc_avail--; 2768 SLIST_REMOVE_HEAD(&txr->hn_txlist, link); 2769 } 2770 mtx_unlock_spin(&txr->hn_txlist_spin); 2771 #else 2772 txd = buf_ring_dequeue_sc(txr->hn_txdesc_br); 2773 #endif 2774 2775 if (txd != NULL) { 2776 #ifdef HN_USE_TXDESC_BUFRING 2777 #ifdef HN_DEBUG 2778 atomic_subtract_int(&txr->hn_txdesc_avail, 1); 2779 #endif 2780 #endif /* HN_USE_TXDESC_BUFRING */ 2781 KASSERT(txd->m == NULL && txd->refs == 0 && 2782 STAILQ_EMPTY(&txd->agg_list) && 2783 txd->chim_index == HN_NVS_CHIM_IDX_INVALID && 2784 txd->chim_size == 0 && 2785 (txd->flags & HN_TXD_FLAG_ONLIST) && 2786 (txd->flags & HN_TXD_FLAG_ONAGG) == 0 && 2787 (txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("invalid txd")); 2788 txd->flags &= ~HN_TXD_FLAG_ONLIST; 2789 txd->refs = 1; 2790 } 2791 return txd; 2792 } 2793 2794 static __inline void 2795 hn_txdesc_hold(struct hn_txdesc *txd) 2796 { 2797 2798 /* 0->1 transition will never work */ 2799 KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs)); 2800 atomic_add_int(&txd->refs, 1); 2801 } 2802 2803 static __inline void 2804 hn_txdesc_agg(struct hn_txdesc *agg_txd, struct hn_txdesc *txd) 2805 { 2806 2807 KASSERT((agg_txd->flags & HN_TXD_FLAG_ONAGG) == 0, 2808 ("recursive aggregation on aggregating txdesc")); 2809 2810 KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0, 2811 ("already aggregated")); 2812 KASSERT(STAILQ_EMPTY(&txd->agg_list), 2813 ("recursive aggregation on to-be-aggregated txdesc")); 2814 2815 txd->flags |= HN_TXD_FLAG_ONAGG; 2816 STAILQ_INSERT_TAIL(&agg_txd->agg_list, txd, agg_link); 2817 } 2818 2819 static bool 2820 hn_tx_ring_pending(struct hn_tx_ring *txr) 2821 { 2822 bool pending = false; 2823 2824 #ifndef HN_USE_TXDESC_BUFRING 2825 mtx_lock_spin(&txr->hn_txlist_spin); 2826 if (txr->hn_txdesc_avail != txr->hn_txdesc_cnt) 2827 pending = true; 2828 mtx_unlock_spin(&txr->hn_txlist_spin); 2829 #else 2830 if (!buf_ring_full(txr->hn_txdesc_br)) 2831 pending = true; 2832 #endif 2833 return (pending); 2834 } 2835 2836 static __inline void 2837 hn_txeof(struct hn_tx_ring *txr) 2838 { 2839 txr->hn_has_txeof = 0; 2840 txr->hn_txeof(txr); 2841 } 2842 2843 static void 2844 hn_txpkt_done(struct hn_nvs_sendctx *sndc, struct hn_softc *sc, 2845 struct vmbus_channel *chan, const void *data __unused, int dlen __unused) 2846 { 2847 struct hn_txdesc *txd = sndc->hn_cbarg; 2848 struct hn_tx_ring *txr; 2849 2850 txr = txd->txr; 2851 KASSERT(txr->hn_chan == chan, 2852 ("channel mismatch, on chan%u, should be chan%u", 2853 vmbus_chan_id(chan), vmbus_chan_id(txr->hn_chan))); 2854 2855 txr->hn_has_txeof = 1; 2856 hn_txdesc_put(txr, txd); 2857 2858 ++txr->hn_txdone_cnt; 2859 if (txr->hn_txdone_cnt >= HN_EARLY_TXEOF_THRESH) { 2860 txr->hn_txdone_cnt = 0; 2861 if (txr->hn_oactive) 2862 hn_txeof(txr); 2863 } 2864 } 2865 2866 static void 2867 hn_chan_rollup(struct hn_rx_ring *rxr, struct hn_tx_ring *txr) 2868 { 2869 #if defined(INET) || defined(INET6) 2870 tcp_lro_flush_all(&rxr->hn_lro); 2871 #endif 2872 2873 /* 2874 * NOTE: 2875 * 'txr' could be NULL, if multiple channels and 2876 * ifnet.if_start method are enabled. 2877 */ 2878 if (txr == NULL || !txr->hn_has_txeof) 2879 return; 2880 2881 txr->hn_txdone_cnt = 0; 2882 hn_txeof(txr); 2883 } 2884 2885 static __inline uint32_t 2886 hn_rndis_pktmsg_offset(uint32_t ofs) 2887 { 2888 2889 KASSERT(ofs >= sizeof(struct rndis_packet_msg), 2890 ("invalid RNDIS packet msg offset %u", ofs)); 2891 return (ofs - __offsetof(struct rndis_packet_msg, rm_dataoffset)); 2892 } 2893 2894 static __inline void * 2895 hn_rndis_pktinfo_append(struct rndis_packet_msg *pkt, size_t pktsize, 2896 size_t pi_dlen, uint32_t pi_type) 2897 { 2898 const size_t pi_size = HN_RNDIS_PKTINFO_SIZE(pi_dlen); 2899 struct rndis_pktinfo *pi; 2900 2901 KASSERT((pi_size & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK) == 0, 2902 ("unaligned pktinfo size %zu, pktinfo dlen %zu", pi_size, pi_dlen)); 2903 2904 /* 2905 * Per-packet-info does not move; it only grows. 2906 * 2907 * NOTE: 2908 * rm_pktinfooffset in this phase counts from the beginning 2909 * of rndis_packet_msg. 2910 */ 2911 KASSERT(pkt->rm_pktinfooffset + pkt->rm_pktinfolen + pi_size <= pktsize, 2912 ("%u pktinfo overflows RNDIS packet msg", pi_type)); 2913 pi = (struct rndis_pktinfo *)((uint8_t *)pkt + pkt->rm_pktinfooffset + 2914 pkt->rm_pktinfolen); 2915 pkt->rm_pktinfolen += pi_size; 2916 2917 pi->rm_size = pi_size; 2918 pi->rm_type = pi_type; 2919 pi->rm_pktinfooffset = RNDIS_PKTINFO_OFFSET; 2920 2921 return (pi->rm_data); 2922 } 2923 2924 static __inline int 2925 hn_flush_txagg(struct ifnet *ifp, struct hn_tx_ring *txr) 2926 { 2927 struct hn_txdesc *txd; 2928 struct mbuf *m; 2929 int error, pkts; 2930 2931 txd = txr->hn_agg_txd; 2932 KASSERT(txd != NULL, ("no aggregate txdesc")); 2933 2934 /* 2935 * Since hn_txpkt() will reset this temporary stat, save 2936 * it now, so that oerrors can be updated properly, if 2937 * hn_txpkt() ever fails. 2938 */ 2939 pkts = txr->hn_stat_pkts; 2940 2941 /* 2942 * Since txd's mbuf will _not_ be freed upon hn_txpkt() 2943 * failure, save it for later freeing, if hn_txpkt() ever 2944 * fails. 2945 */ 2946 m = txd->m; 2947 error = hn_txpkt(ifp, txr, txd); 2948 if (__predict_false(error)) { 2949 /* txd is freed, but m is not. */ 2950 m_freem(m); 2951 2952 txr->hn_flush_failed++; 2953 if_inc_counter(ifp, IFCOUNTER_OERRORS, pkts); 2954 } 2955 2956 /* Reset all aggregation states. */ 2957 txr->hn_agg_txd = NULL; 2958 txr->hn_agg_szleft = 0; 2959 txr->hn_agg_pktleft = 0; 2960 txr->hn_agg_prevpkt = NULL; 2961 2962 return (error); 2963 } 2964 2965 static void * 2966 hn_try_txagg(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd, 2967 int pktsize) 2968 { 2969 void *chim; 2970 2971 if (txr->hn_agg_txd != NULL) { 2972 if (txr->hn_agg_pktleft >= 1 && txr->hn_agg_szleft > pktsize) { 2973 struct hn_txdesc *agg_txd = txr->hn_agg_txd; 2974 struct rndis_packet_msg *pkt = txr->hn_agg_prevpkt; 2975 int olen; 2976 2977 /* 2978 * Update the previous RNDIS packet's total length, 2979 * it can be increased due to the mandatory alignment 2980 * padding for this RNDIS packet. And update the 2981 * aggregating txdesc's chimney sending buffer size 2982 * accordingly. 2983 * 2984 * XXX 2985 * Zero-out the padding, as required by the RNDIS spec. 2986 */ 2987 olen = pkt->rm_len; 2988 pkt->rm_len = roundup2(olen, txr->hn_agg_align); 2989 agg_txd->chim_size += pkt->rm_len - olen; 2990 2991 /* Link this txdesc to the parent. */ 2992 hn_txdesc_agg(agg_txd, txd); 2993 2994 chim = (uint8_t *)pkt + pkt->rm_len; 2995 /* Save the current packet for later fixup. */ 2996 txr->hn_agg_prevpkt = chim; 2997 2998 txr->hn_agg_pktleft--; 2999 txr->hn_agg_szleft -= pktsize; 3000 if (txr->hn_agg_szleft <= 3001 HN_PKTSIZE_MIN(txr->hn_agg_align)) { 3002 /* 3003 * Probably can't aggregate more packets, 3004 * flush this aggregating txdesc proactively. 3005 */ 3006 txr->hn_agg_pktleft = 0; 3007 } 3008 /* Done! */ 3009 return (chim); 3010 } 3011 hn_flush_txagg(ifp, txr); 3012 } 3013 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc")); 3014 3015 txr->hn_tx_chimney_tried++; 3016 txd->chim_index = hn_chim_alloc(txr->hn_sc); 3017 if (txd->chim_index == HN_NVS_CHIM_IDX_INVALID) 3018 return (NULL); 3019 txr->hn_tx_chimney++; 3020 3021 chim = txr->hn_sc->hn_chim + 3022 (txd->chim_index * txr->hn_sc->hn_chim_szmax); 3023 3024 if (txr->hn_agg_pktmax > 1 && 3025 txr->hn_agg_szmax > pktsize + HN_PKTSIZE_MIN(txr->hn_agg_align)) { 3026 txr->hn_agg_txd = txd; 3027 txr->hn_agg_pktleft = txr->hn_agg_pktmax - 1; 3028 txr->hn_agg_szleft = txr->hn_agg_szmax - pktsize; 3029 txr->hn_agg_prevpkt = chim; 3030 } 3031 return (chim); 3032 } 3033 3034 /* 3035 * NOTE: 3036 * If this function fails, then both txd and m_head0 will be freed. 3037 */ 3038 static int 3039 hn_encap(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd, 3040 struct mbuf **m_head0) 3041 { 3042 bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX]; 3043 int error, nsegs, i; 3044 struct mbuf *m_head = *m_head0; 3045 struct rndis_packet_msg *pkt; 3046 uint32_t *pi_data; 3047 void *chim = NULL; 3048 int pkt_hlen, pkt_size; 3049 3050 pkt = txd->rndis_pkt; 3051 pkt_size = HN_PKTSIZE(m_head, txr->hn_agg_align); 3052 if (pkt_size < txr->hn_chim_size) { 3053 chim = hn_try_txagg(ifp, txr, txd, pkt_size); 3054 if (chim != NULL) 3055 pkt = chim; 3056 } else { 3057 if (txr->hn_agg_txd != NULL) 3058 hn_flush_txagg(ifp, txr); 3059 } 3060 3061 pkt->rm_type = REMOTE_NDIS_PACKET_MSG; 3062 pkt->rm_len = m_head->m_pkthdr.len; 3063 pkt->rm_dataoffset = 0; 3064 pkt->rm_datalen = m_head->m_pkthdr.len; 3065 pkt->rm_oobdataoffset = 0; 3066 pkt->rm_oobdatalen = 0; 3067 pkt->rm_oobdataelements = 0; 3068 pkt->rm_pktinfooffset = sizeof(*pkt); 3069 pkt->rm_pktinfolen = 0; 3070 pkt->rm_vchandle = 0; 3071 pkt->rm_reserved = 0; 3072 3073 if (txr->hn_tx_flags & HN_TX_FLAG_HASHVAL) { 3074 /* 3075 * Set the hash value for this packet, so that the host could 3076 * dispatch the TX done event for this packet back to this TX 3077 * ring's channel. 3078 */ 3079 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 3080 HN_NDIS_HASH_VALUE_SIZE, HN_NDIS_PKTINFO_TYPE_HASHVAL); 3081 *pi_data = txr->hn_tx_idx; 3082 } 3083 3084 if (m_head->m_flags & M_VLANTAG) { 3085 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 3086 NDIS_VLAN_INFO_SIZE, NDIS_PKTINFO_TYPE_VLAN); 3087 *pi_data = NDIS_VLAN_INFO_MAKE( 3088 EVL_VLANOFTAG(m_head->m_pkthdr.ether_vtag), 3089 EVL_PRIOFTAG(m_head->m_pkthdr.ether_vtag), 3090 EVL_CFIOFTAG(m_head->m_pkthdr.ether_vtag)); 3091 } 3092 3093 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 3094 #if defined(INET6) || defined(INET) 3095 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 3096 NDIS_LSO2_INFO_SIZE, NDIS_PKTINFO_TYPE_LSO); 3097 #ifdef INET 3098 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) { 3099 *pi_data = NDIS_LSO2_INFO_MAKEIPV4( 3100 m_head->m_pkthdr.l2hlen + m_head->m_pkthdr.l3hlen, 3101 m_head->m_pkthdr.tso_segsz); 3102 } 3103 #endif 3104 #if defined(INET6) && defined(INET) 3105 else 3106 #endif 3107 #ifdef INET6 3108 { 3109 *pi_data = NDIS_LSO2_INFO_MAKEIPV6( 3110 m_head->m_pkthdr.l2hlen + m_head->m_pkthdr.l3hlen, 3111 m_head->m_pkthdr.tso_segsz); 3112 } 3113 #endif 3114 #endif /* INET6 || INET */ 3115 } else if (m_head->m_pkthdr.csum_flags & txr->hn_csum_assist) { 3116 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 3117 NDIS_TXCSUM_INFO_SIZE, NDIS_PKTINFO_TYPE_CSUM); 3118 if (m_head->m_pkthdr.csum_flags & 3119 (CSUM_IP6_TCP | CSUM_IP6_UDP)) { 3120 *pi_data = NDIS_TXCSUM_INFO_IPV6; 3121 } else { 3122 *pi_data = NDIS_TXCSUM_INFO_IPV4; 3123 if (m_head->m_pkthdr.csum_flags & CSUM_IP) 3124 *pi_data |= NDIS_TXCSUM_INFO_IPCS; 3125 } 3126 3127 if (m_head->m_pkthdr.csum_flags & 3128 (CSUM_IP_TCP | CSUM_IP6_TCP)) { 3129 *pi_data |= NDIS_TXCSUM_INFO_MKTCPCS( 3130 m_head->m_pkthdr.l2hlen + m_head->m_pkthdr.l3hlen); 3131 } else if (m_head->m_pkthdr.csum_flags & 3132 (CSUM_IP_UDP | CSUM_IP6_UDP)) { 3133 *pi_data |= NDIS_TXCSUM_INFO_MKUDPCS( 3134 m_head->m_pkthdr.l2hlen + m_head->m_pkthdr.l3hlen); 3135 } 3136 } 3137 3138 pkt_hlen = pkt->rm_pktinfooffset + pkt->rm_pktinfolen; 3139 /* Fixup RNDIS packet message total length */ 3140 pkt->rm_len += pkt_hlen; 3141 /* Convert RNDIS packet message offsets */ 3142 pkt->rm_dataoffset = hn_rndis_pktmsg_offset(pkt_hlen); 3143 pkt->rm_pktinfooffset = hn_rndis_pktmsg_offset(pkt->rm_pktinfooffset); 3144 3145 /* 3146 * Fast path: Chimney sending. 3147 */ 3148 if (chim != NULL) { 3149 struct hn_txdesc *tgt_txd = txd; 3150 3151 if (txr->hn_agg_txd != NULL) { 3152 tgt_txd = txr->hn_agg_txd; 3153 #ifdef INVARIANTS 3154 *m_head0 = NULL; 3155 #endif 3156 } 3157 3158 KASSERT(pkt == chim, 3159 ("RNDIS pkt not in chimney sending buffer")); 3160 KASSERT(tgt_txd->chim_index != HN_NVS_CHIM_IDX_INVALID, 3161 ("chimney sending buffer is not used")); 3162 tgt_txd->chim_size += pkt->rm_len; 3163 3164 m_copydata(m_head, 0, m_head->m_pkthdr.len, 3165 ((uint8_t *)chim) + pkt_hlen); 3166 3167 txr->hn_gpa_cnt = 0; 3168 txr->hn_sendpkt = hn_txpkt_chim; 3169 goto done; 3170 } 3171 3172 KASSERT(txr->hn_agg_txd == NULL, ("aggregating sglist txdesc")); 3173 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID, 3174 ("chimney buffer is used")); 3175 KASSERT(pkt == txd->rndis_pkt, ("RNDIS pkt not in txdesc")); 3176 3177 error = hn_txdesc_dmamap_load(txr, txd, &m_head, segs, &nsegs); 3178 if (__predict_false(error)) { 3179 int freed; 3180 3181 /* 3182 * This mbuf is not linked w/ the txd yet, so free it now. 3183 */ 3184 m_freem(m_head); 3185 *m_head0 = NULL; 3186 3187 freed = hn_txdesc_put(txr, txd); 3188 KASSERT(freed != 0, 3189 ("fail to free txd upon txdma error")); 3190 3191 txr->hn_txdma_failed++; 3192 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 3193 return error; 3194 } 3195 *m_head0 = m_head; 3196 3197 /* +1 RNDIS packet message */ 3198 txr->hn_gpa_cnt = nsegs + 1; 3199 3200 /* send packet with page buffer */ 3201 txr->hn_gpa[0].gpa_page = atop(txd->rndis_pkt_paddr); 3202 txr->hn_gpa[0].gpa_ofs = txd->rndis_pkt_paddr & PAGE_MASK; 3203 txr->hn_gpa[0].gpa_len = pkt_hlen; 3204 3205 /* 3206 * Fill the page buffers with mbuf info after the page 3207 * buffer for RNDIS packet message. 3208 */ 3209 for (i = 0; i < nsegs; ++i) { 3210 struct vmbus_gpa *gpa = &txr->hn_gpa[i + 1]; 3211 3212 gpa->gpa_page = atop(segs[i].ds_addr); 3213 gpa->gpa_ofs = segs[i].ds_addr & PAGE_MASK; 3214 gpa->gpa_len = segs[i].ds_len; 3215 } 3216 3217 txd->chim_index = HN_NVS_CHIM_IDX_INVALID; 3218 txd->chim_size = 0; 3219 txr->hn_sendpkt = hn_txpkt_sglist; 3220 done: 3221 txd->m = m_head; 3222 3223 /* Set the completion routine */ 3224 hn_nvs_sendctx_init(&txd->send_ctx, hn_txpkt_done, txd); 3225 3226 /* Update temporary stats for later use. */ 3227 txr->hn_stat_pkts++; 3228 txr->hn_stat_size += m_head->m_pkthdr.len; 3229 if (m_head->m_flags & M_MCAST) 3230 txr->hn_stat_mcasts++; 3231 3232 return 0; 3233 } 3234 3235 /* 3236 * NOTE: 3237 * If this function fails, then txd will be freed, but the mbuf 3238 * associated w/ the txd will _not_ be freed. 3239 */ 3240 static int 3241 hn_txpkt(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd) 3242 { 3243 int error, send_failed = 0, has_bpf; 3244 3245 again: 3246 has_bpf = bpf_peers_present(ifp->if_bpf); 3247 if (has_bpf) { 3248 /* 3249 * Make sure that this txd and any aggregated txds are not 3250 * freed before ETHER_BPF_MTAP. 3251 */ 3252 hn_txdesc_hold(txd); 3253 } 3254 error = txr->hn_sendpkt(txr, txd); 3255 if (!error) { 3256 if (has_bpf) { 3257 const struct hn_txdesc *tmp_txd; 3258 3259 ETHER_BPF_MTAP(ifp, txd->m); 3260 STAILQ_FOREACH(tmp_txd, &txd->agg_list, agg_link) 3261 ETHER_BPF_MTAP(ifp, tmp_txd->m); 3262 } 3263 3264 if_inc_counter(ifp, IFCOUNTER_OPACKETS, txr->hn_stat_pkts); 3265 #ifdef HN_IFSTART_SUPPORT 3266 if (!hn_use_if_start) 3267 #endif 3268 { 3269 if_inc_counter(ifp, IFCOUNTER_OBYTES, 3270 txr->hn_stat_size); 3271 if (txr->hn_stat_mcasts != 0) { 3272 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 3273 txr->hn_stat_mcasts); 3274 } 3275 } 3276 txr->hn_pkts += txr->hn_stat_pkts; 3277 txr->hn_sends++; 3278 } 3279 if (has_bpf) 3280 hn_txdesc_put(txr, txd); 3281 3282 if (__predict_false(error)) { 3283 int freed; 3284 3285 /* 3286 * This should "really rarely" happen. 3287 * 3288 * XXX Too many RX to be acked or too many sideband 3289 * commands to run? Ask netvsc_channel_rollup() 3290 * to kick start later. 3291 */ 3292 txr->hn_has_txeof = 1; 3293 if (!send_failed) { 3294 txr->hn_send_failed++; 3295 send_failed = 1; 3296 /* 3297 * Try sending again after set hn_has_txeof; 3298 * in case that we missed the last 3299 * netvsc_channel_rollup(). 3300 */ 3301 goto again; 3302 } 3303 if_printf(ifp, "send failed\n"); 3304 3305 /* 3306 * Caller will perform further processing on the 3307 * associated mbuf, so don't free it in hn_txdesc_put(); 3308 * only unload it from the DMA map in hn_txdesc_put(), 3309 * if it was loaded. 3310 */ 3311 txd->m = NULL; 3312 freed = hn_txdesc_put(txr, txd); 3313 KASSERT(freed != 0, 3314 ("fail to free txd upon send error")); 3315 3316 txr->hn_send_failed++; 3317 } 3318 3319 /* Reset temporary stats, after this sending is done. */ 3320 txr->hn_stat_size = 0; 3321 txr->hn_stat_pkts = 0; 3322 txr->hn_stat_mcasts = 0; 3323 3324 return (error); 3325 } 3326 3327 /* 3328 * Append the specified data to the indicated mbuf chain, 3329 * Extend the mbuf chain if the new data does not fit in 3330 * existing space. 3331 * 3332 * This is a minor rewrite of m_append() from sys/kern/uipc_mbuf.c. 3333 * There should be an equivalent in the kernel mbuf code, 3334 * but there does not appear to be one yet. 3335 * 3336 * Differs from m_append() in that additional mbufs are 3337 * allocated with cluster size MJUMPAGESIZE, and filled 3338 * accordingly. 3339 * 3340 * Return 1 if able to complete the job; otherwise 0. 3341 */ 3342 static int 3343 hv_m_append(struct mbuf *m0, int len, c_caddr_t cp) 3344 { 3345 struct mbuf *m, *n; 3346 int remainder, space; 3347 3348 for (m = m0; m->m_next != NULL; m = m->m_next) 3349 ; 3350 remainder = len; 3351 space = M_TRAILINGSPACE(m); 3352 if (space > 0) { 3353 /* 3354 * Copy into available space. 3355 */ 3356 if (space > remainder) 3357 space = remainder; 3358 bcopy(cp, mtod(m, caddr_t) + m->m_len, space); 3359 m->m_len += space; 3360 cp += space; 3361 remainder -= space; 3362 } 3363 while (remainder > 0) { 3364 /* 3365 * Allocate a new mbuf; could check space 3366 * and allocate a cluster instead. 3367 */ 3368 n = m_getjcl(M_NOWAIT, m->m_type, 0, MJUMPAGESIZE); 3369 if (n == NULL) 3370 break; 3371 n->m_len = min(MJUMPAGESIZE, remainder); 3372 bcopy(cp, mtod(n, caddr_t), n->m_len); 3373 cp += n->m_len; 3374 remainder -= n->m_len; 3375 m->m_next = n; 3376 m = n; 3377 } 3378 if (m0->m_flags & M_PKTHDR) 3379 m0->m_pkthdr.len += len - remainder; 3380 3381 return (remainder == 0); 3382 } 3383 3384 #if defined(INET) || defined(INET6) 3385 static __inline int 3386 hn_lro_rx(struct lro_ctrl *lc, struct mbuf *m) 3387 { 3388 #if __FreeBSD_version >= 1100095 3389 if (hn_lro_mbufq_depth) { 3390 tcp_lro_queue_mbuf(lc, m); 3391 return 0; 3392 } 3393 #endif 3394 return tcp_lro_rx(lc, m, 0); 3395 } 3396 #endif 3397 3398 static int 3399 hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen, 3400 const struct hn_rxinfo *info) 3401 { 3402 struct ifnet *ifp, *hn_ifp = rxr->hn_ifp; 3403 struct mbuf *m_new; 3404 int size, do_lro = 0, do_csum = 1, is_vf = 0; 3405 int hash_type = M_HASHTYPE_NONE; 3406 int l3proto = ETHERTYPE_MAX, l4proto = IPPROTO_DONE; 3407 3408 ifp = hn_ifp; 3409 if (rxr->hn_rxvf_ifp != NULL) { 3410 /* 3411 * Non-transparent mode VF; pretend this packet is from 3412 * the VF. 3413 */ 3414 ifp = rxr->hn_rxvf_ifp; 3415 is_vf = 1; 3416 } else if (rxr->hn_rx_flags & HN_RX_FLAG_XPNT_VF) { 3417 /* Transparent mode VF. */ 3418 is_vf = 1; 3419 } 3420 3421 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 3422 /* 3423 * NOTE: 3424 * See the NOTE of hn_rndis_init_fixat(). This 3425 * function can be reached, immediately after the 3426 * RNDIS is initialized but before the ifnet is 3427 * setup on the hn_attach() path; drop the unexpected 3428 * packets. 3429 */ 3430 return (0); 3431 } 3432 3433 if (__predict_false(dlen < ETHER_HDR_LEN)) { 3434 if_inc_counter(hn_ifp, IFCOUNTER_IERRORS, 1); 3435 return (0); 3436 } 3437 3438 if (dlen <= MHLEN) { 3439 m_new = m_gethdr(M_NOWAIT, MT_DATA); 3440 if (m_new == NULL) { 3441 if_inc_counter(hn_ifp, IFCOUNTER_IQDROPS, 1); 3442 return (0); 3443 } 3444 memcpy(mtod(m_new, void *), data, dlen); 3445 m_new->m_pkthdr.len = m_new->m_len = dlen; 3446 rxr->hn_small_pkts++; 3447 } else { 3448 /* 3449 * Get an mbuf with a cluster. For packets 2K or less, 3450 * get a standard 2K cluster. For anything larger, get a 3451 * 4K cluster. Any buffers larger than 4K can cause problems 3452 * if looped around to the Hyper-V TX channel, so avoid them. 3453 */ 3454 size = MCLBYTES; 3455 if (dlen > MCLBYTES) { 3456 /* 4096 */ 3457 size = MJUMPAGESIZE; 3458 } 3459 3460 m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size); 3461 if (m_new == NULL) { 3462 if_inc_counter(hn_ifp, IFCOUNTER_IQDROPS, 1); 3463 return (0); 3464 } 3465 3466 hv_m_append(m_new, dlen, data); 3467 } 3468 m_new->m_pkthdr.rcvif = ifp; 3469 3470 if (__predict_false((hn_ifp->if_capenable & IFCAP_RXCSUM) == 0)) 3471 do_csum = 0; 3472 3473 /* receive side checksum offload */ 3474 if (info->csum_info != HN_NDIS_RXCSUM_INFO_INVALID) { 3475 /* IP csum offload */ 3476 if ((info->csum_info & NDIS_RXCSUM_INFO_IPCS_OK) && do_csum) { 3477 m_new->m_pkthdr.csum_flags |= 3478 (CSUM_IP_CHECKED | CSUM_IP_VALID); 3479 rxr->hn_csum_ip++; 3480 } 3481 3482 /* TCP/UDP csum offload */ 3483 if ((info->csum_info & (NDIS_RXCSUM_INFO_UDPCS_OK | 3484 NDIS_RXCSUM_INFO_TCPCS_OK)) && do_csum) { 3485 m_new->m_pkthdr.csum_flags |= 3486 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 3487 m_new->m_pkthdr.csum_data = 0xffff; 3488 if (info->csum_info & NDIS_RXCSUM_INFO_TCPCS_OK) 3489 rxr->hn_csum_tcp++; 3490 else 3491 rxr->hn_csum_udp++; 3492 } 3493 3494 /* 3495 * XXX 3496 * As of this write (Oct 28th, 2016), host side will turn 3497 * on only TCPCS_OK and IPCS_OK even for UDP datagrams, so 3498 * the do_lro setting here is actually _not_ accurate. We 3499 * depend on the RSS hash type check to reset do_lro. 3500 */ 3501 if ((info->csum_info & 3502 (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) == 3503 (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) 3504 do_lro = 1; 3505 } else { 3506 hn_rxpkt_proto(m_new, &l3proto, &l4proto); 3507 if (l3proto == ETHERTYPE_IP) { 3508 if (l4proto == IPPROTO_TCP) { 3509 if (do_csum && 3510 (rxr->hn_trust_hcsum & 3511 HN_TRUST_HCSUM_TCP)) { 3512 rxr->hn_csum_trusted++; 3513 m_new->m_pkthdr.csum_flags |= 3514 (CSUM_IP_CHECKED | CSUM_IP_VALID | 3515 CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 3516 m_new->m_pkthdr.csum_data = 0xffff; 3517 } 3518 do_lro = 1; 3519 } else if (l4proto == IPPROTO_UDP) { 3520 if (do_csum && 3521 (rxr->hn_trust_hcsum & 3522 HN_TRUST_HCSUM_UDP)) { 3523 rxr->hn_csum_trusted++; 3524 m_new->m_pkthdr.csum_flags |= 3525 (CSUM_IP_CHECKED | CSUM_IP_VALID | 3526 CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 3527 m_new->m_pkthdr.csum_data = 0xffff; 3528 } 3529 } else if (l4proto != IPPROTO_DONE && do_csum && 3530 (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_IP)) { 3531 rxr->hn_csum_trusted++; 3532 m_new->m_pkthdr.csum_flags |= 3533 (CSUM_IP_CHECKED | CSUM_IP_VALID); 3534 } 3535 } 3536 } 3537 3538 if (info->vlan_info != HN_NDIS_VLAN_INFO_INVALID) { 3539 m_new->m_pkthdr.ether_vtag = EVL_MAKETAG( 3540 NDIS_VLAN_INFO_ID(info->vlan_info), 3541 NDIS_VLAN_INFO_PRI(info->vlan_info), 3542 NDIS_VLAN_INFO_CFI(info->vlan_info)); 3543 m_new->m_flags |= M_VLANTAG; 3544 } 3545 3546 /* 3547 * If VF is activated (tranparent/non-transparent mode does not 3548 * matter here). 3549 * 3550 * - Disable LRO 3551 * 3552 * hn(4) will only receive broadcast packets, multicast packets, 3553 * TCP SYN and SYN|ACK (in Azure), LRO is useless for these 3554 * packet types. 3555 * 3556 * For non-transparent, we definitely _cannot_ enable LRO at 3557 * all, since the LRO flush will use hn(4) as the receiving 3558 * interface; i.e. hn_ifp->if_input(hn_ifp, m). 3559 */ 3560 if (is_vf) 3561 do_lro = 0; 3562 3563 /* 3564 * If VF is activated (tranparent/non-transparent mode does not 3565 * matter here), do _not_ mess with unsupported hash types or 3566 * functions. 3567 */ 3568 if (info->hash_info != HN_NDIS_HASH_INFO_INVALID) { 3569 rxr->hn_rss_pkts++; 3570 m_new->m_pkthdr.flowid = info->hash_value; 3571 if (!is_vf) 3572 hash_type = M_HASHTYPE_OPAQUE_HASH; 3573 if ((info->hash_info & NDIS_HASH_FUNCTION_MASK) == 3574 NDIS_HASH_FUNCTION_TOEPLITZ) { 3575 uint32_t type = (info->hash_info & NDIS_HASH_TYPE_MASK & 3576 rxr->hn_mbuf_hash); 3577 3578 /* 3579 * NOTE: 3580 * do_lro is resetted, if the hash types are not TCP 3581 * related. See the comment in the above csum_flags 3582 * setup section. 3583 */ 3584 switch (type) { 3585 case NDIS_HASH_IPV4: 3586 hash_type = M_HASHTYPE_RSS_IPV4; 3587 do_lro = 0; 3588 break; 3589 3590 case NDIS_HASH_TCP_IPV4: 3591 hash_type = M_HASHTYPE_RSS_TCP_IPV4; 3592 if (rxr->hn_rx_flags & HN_RX_FLAG_UDP_HASH) { 3593 int def_htype = M_HASHTYPE_OPAQUE_HASH; 3594 3595 if (is_vf) 3596 def_htype = M_HASHTYPE_NONE; 3597 3598 /* 3599 * UDP 4-tuple hash is delivered as 3600 * TCP 4-tuple hash. 3601 */ 3602 if (l3proto == ETHERTYPE_MAX) { 3603 hn_rxpkt_proto(m_new, 3604 &l3proto, &l4proto); 3605 } 3606 if (l3proto == ETHERTYPE_IP) { 3607 if (l4proto == IPPROTO_UDP && 3608 (rxr->hn_mbuf_hash & 3609 NDIS_HASH_UDP_IPV4_X)) { 3610 hash_type = 3611 M_HASHTYPE_RSS_UDP_IPV4; 3612 do_lro = 0; 3613 } else if (l4proto != 3614 IPPROTO_TCP) { 3615 hash_type = def_htype; 3616 do_lro = 0; 3617 } 3618 } else { 3619 hash_type = def_htype; 3620 do_lro = 0; 3621 } 3622 } 3623 break; 3624 3625 case NDIS_HASH_IPV6: 3626 hash_type = M_HASHTYPE_RSS_IPV6; 3627 do_lro = 0; 3628 break; 3629 3630 case NDIS_HASH_IPV6_EX: 3631 hash_type = M_HASHTYPE_RSS_IPV6_EX; 3632 do_lro = 0; 3633 break; 3634 3635 case NDIS_HASH_TCP_IPV6: 3636 hash_type = M_HASHTYPE_RSS_TCP_IPV6; 3637 break; 3638 3639 case NDIS_HASH_TCP_IPV6_EX: 3640 hash_type = M_HASHTYPE_RSS_TCP_IPV6_EX; 3641 break; 3642 } 3643 } 3644 } else if (!is_vf) { 3645 m_new->m_pkthdr.flowid = rxr->hn_rx_idx; 3646 hash_type = M_HASHTYPE_OPAQUE; 3647 } 3648 M_HASHTYPE_SET(m_new, hash_type); 3649 3650 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); 3651 if (hn_ifp != ifp) { 3652 const struct ether_header *eh; 3653 3654 /* 3655 * Non-transparent mode VF is activated. 3656 */ 3657 3658 /* 3659 * Allow tapping on hn(4). 3660 */ 3661 ETHER_BPF_MTAP(hn_ifp, m_new); 3662 3663 /* 3664 * Update hn(4)'s stats. 3665 */ 3666 if_inc_counter(hn_ifp, IFCOUNTER_IPACKETS, 1); 3667 if_inc_counter(hn_ifp, IFCOUNTER_IBYTES, m_new->m_pkthdr.len); 3668 /* Checked at the beginning of this function. */ 3669 KASSERT(m_new->m_len >= ETHER_HDR_LEN, ("not ethernet frame")); 3670 eh = mtod(m_new, struct ether_header *); 3671 if (ETHER_IS_MULTICAST(eh->ether_dhost)) 3672 if_inc_counter(hn_ifp, IFCOUNTER_IMCASTS, 1); 3673 } 3674 rxr->hn_pkts++; 3675 3676 if ((hn_ifp->if_capenable & IFCAP_LRO) && do_lro) { 3677 #if defined(INET) || defined(INET6) 3678 struct lro_ctrl *lro = &rxr->hn_lro; 3679 3680 if (lro->lro_cnt) { 3681 rxr->hn_lro_tried++; 3682 if (hn_lro_rx(lro, m_new) == 0) { 3683 /* DONE! */ 3684 return 0; 3685 } 3686 } 3687 #endif 3688 } 3689 ifp->if_input(ifp, m_new); 3690 3691 return (0); 3692 } 3693 3694 static int 3695 hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 3696 { 3697 struct hn_softc *sc = ifp->if_softc; 3698 struct ifreq *ifr = (struct ifreq *)data, ifr_vf; 3699 struct ifnet *vf_ifp; 3700 int mask, error = 0; 3701 struct ifrsskey *ifrk; 3702 struct ifrsshash *ifrh; 3703 uint32_t mtu; 3704 3705 switch (cmd) { 3706 case SIOCSIFMTU: 3707 if (ifr->ifr_mtu > HN_MTU_MAX) { 3708 error = EINVAL; 3709 break; 3710 } 3711 3712 HN_LOCK(sc); 3713 3714 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) { 3715 HN_UNLOCK(sc); 3716 break; 3717 } 3718 3719 if ((sc->hn_caps & HN_CAP_MTU) == 0) { 3720 /* Can't change MTU */ 3721 HN_UNLOCK(sc); 3722 error = EOPNOTSUPP; 3723 break; 3724 } 3725 3726 if (ifp->if_mtu == ifr->ifr_mtu) { 3727 HN_UNLOCK(sc); 3728 break; 3729 } 3730 3731 if (hn_xpnt_vf_isready(sc)) { 3732 vf_ifp = sc->hn_vf_ifp; 3733 ifr_vf = *ifr; 3734 strlcpy(ifr_vf.ifr_name, vf_ifp->if_xname, 3735 sizeof(ifr_vf.ifr_name)); 3736 error = vf_ifp->if_ioctl(vf_ifp, SIOCSIFMTU, 3737 (caddr_t)&ifr_vf); 3738 if (error) { 3739 HN_UNLOCK(sc); 3740 if_printf(ifp, "%s SIOCSIFMTU %d failed: %d\n", 3741 vf_ifp->if_xname, ifr->ifr_mtu, error); 3742 break; 3743 } 3744 } 3745 3746 /* 3747 * Suspend this interface before the synthetic parts 3748 * are ripped. 3749 */ 3750 hn_suspend(sc); 3751 3752 /* 3753 * Detach the synthetics parts, i.e. NVS and RNDIS. 3754 */ 3755 hn_synth_detach(sc); 3756 3757 /* 3758 * Reattach the synthetic parts, i.e. NVS and RNDIS, 3759 * with the new MTU setting. 3760 */ 3761 error = hn_synth_attach(sc, ifr->ifr_mtu); 3762 if (error) { 3763 HN_UNLOCK(sc); 3764 break; 3765 } 3766 3767 error = hn_rndis_get_mtu(sc, &mtu); 3768 if (error) 3769 mtu = ifr->ifr_mtu; 3770 else if (bootverbose) 3771 if_printf(ifp, "RNDIS mtu %u\n", mtu); 3772 3773 /* 3774 * Commit the requested MTU, after the synthetic parts 3775 * have been successfully attached. 3776 */ 3777 if (mtu >= ifr->ifr_mtu) { 3778 mtu = ifr->ifr_mtu; 3779 } else { 3780 if_printf(ifp, "fixup mtu %d -> %u\n", 3781 ifr->ifr_mtu, mtu); 3782 } 3783 ifp->if_mtu = mtu; 3784 3785 /* 3786 * Synthetic parts' reattach may change the chimney 3787 * sending size; update it. 3788 */ 3789 if (sc->hn_tx_ring[0].hn_chim_size > sc->hn_chim_szmax) 3790 hn_set_chim_size(sc, sc->hn_chim_szmax); 3791 3792 /* 3793 * Make sure that various parameters based on MTU are 3794 * still valid, after the MTU change. 3795 */ 3796 hn_mtu_change_fixup(sc); 3797 3798 /* 3799 * All done! Resume the interface now. 3800 */ 3801 hn_resume(sc); 3802 3803 if ((sc->hn_flags & HN_FLAG_RXVF) || 3804 (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) { 3805 /* 3806 * Since we have reattached the NVS part, 3807 * change the datapath to VF again; in case 3808 * that it is lost, after the NVS was detached. 3809 */ 3810 hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_VF); 3811 } 3812 3813 HN_UNLOCK(sc); 3814 break; 3815 3816 case SIOCSIFFLAGS: 3817 HN_LOCK(sc); 3818 3819 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) { 3820 HN_UNLOCK(sc); 3821 break; 3822 } 3823 3824 if (hn_xpnt_vf_isready(sc)) 3825 hn_xpnt_vf_saveifflags(sc); 3826 3827 if (ifp->if_flags & IFF_UP) { 3828 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 3829 /* 3830 * Caller meight hold mutex, e.g. 3831 * bpf; use busy-wait for the RNDIS 3832 * reply. 3833 */ 3834 HN_NO_SLEEPING(sc); 3835 hn_rxfilter_config(sc); 3836 HN_SLEEPING_OK(sc); 3837 3838 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) 3839 error = hn_xpnt_vf_iocsetflags(sc); 3840 } else { 3841 hn_init_locked(sc); 3842 } 3843 } else { 3844 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 3845 hn_stop(sc, false); 3846 } 3847 sc->hn_if_flags = ifp->if_flags; 3848 3849 HN_UNLOCK(sc); 3850 break; 3851 3852 case SIOCSIFCAP: 3853 HN_LOCK(sc); 3854 3855 if (hn_xpnt_vf_isready(sc)) { 3856 ifr_vf = *ifr; 3857 strlcpy(ifr_vf.ifr_name, sc->hn_vf_ifp->if_xname, 3858 sizeof(ifr_vf.ifr_name)); 3859 error = hn_xpnt_vf_iocsetcaps(sc, &ifr_vf); 3860 HN_UNLOCK(sc); 3861 break; 3862 } 3863 3864 /* 3865 * Fix up requested capabilities w/ supported capabilities, 3866 * since the supported capabilities could have been changed. 3867 */ 3868 mask = (ifr->ifr_reqcap & ifp->if_capabilities) ^ 3869 ifp->if_capenable; 3870 3871 if (mask & IFCAP_TXCSUM) { 3872 ifp->if_capenable ^= IFCAP_TXCSUM; 3873 if (ifp->if_capenable & IFCAP_TXCSUM) 3874 ifp->if_hwassist |= HN_CSUM_IP_HWASSIST(sc); 3875 else 3876 ifp->if_hwassist &= ~HN_CSUM_IP_HWASSIST(sc); 3877 } 3878 if (mask & IFCAP_TXCSUM_IPV6) { 3879 ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; 3880 if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) 3881 ifp->if_hwassist |= HN_CSUM_IP6_HWASSIST(sc); 3882 else 3883 ifp->if_hwassist &= ~HN_CSUM_IP6_HWASSIST(sc); 3884 } 3885 3886 /* TODO: flip RNDIS offload parameters for RXCSUM. */ 3887 if (mask & IFCAP_RXCSUM) 3888 ifp->if_capenable ^= IFCAP_RXCSUM; 3889 #ifdef foo 3890 /* We can't diff IPv6 packets from IPv4 packets on RX path. */ 3891 if (mask & IFCAP_RXCSUM_IPV6) 3892 ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; 3893 #endif 3894 3895 if (mask & IFCAP_LRO) 3896 ifp->if_capenable ^= IFCAP_LRO; 3897 3898 if (mask & IFCAP_TSO4) { 3899 ifp->if_capenable ^= IFCAP_TSO4; 3900 if (ifp->if_capenable & IFCAP_TSO4) 3901 ifp->if_hwassist |= CSUM_IP_TSO; 3902 else 3903 ifp->if_hwassist &= ~CSUM_IP_TSO; 3904 } 3905 if (mask & IFCAP_TSO6) { 3906 ifp->if_capenable ^= IFCAP_TSO6; 3907 if (ifp->if_capenable & IFCAP_TSO6) 3908 ifp->if_hwassist |= CSUM_IP6_TSO; 3909 else 3910 ifp->if_hwassist &= ~CSUM_IP6_TSO; 3911 } 3912 3913 HN_UNLOCK(sc); 3914 break; 3915 3916 case SIOCADDMULTI: 3917 case SIOCDELMULTI: 3918 HN_LOCK(sc); 3919 3920 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) { 3921 HN_UNLOCK(sc); 3922 break; 3923 } 3924 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 3925 /* 3926 * Multicast uses mutex; use busy-wait for 3927 * the RNDIS reply. 3928 */ 3929 HN_NO_SLEEPING(sc); 3930 hn_rxfilter_config(sc); 3931 HN_SLEEPING_OK(sc); 3932 } 3933 3934 /* XXX vlan(4) style mcast addr maintenance */ 3935 if (hn_xpnt_vf_isready(sc)) { 3936 int old_if_flags; 3937 3938 old_if_flags = sc->hn_vf_ifp->if_flags; 3939 hn_xpnt_vf_saveifflags(sc); 3940 3941 if ((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) && 3942 ((old_if_flags ^ sc->hn_vf_ifp->if_flags) & 3943 IFF_ALLMULTI)) 3944 error = hn_xpnt_vf_iocsetflags(sc); 3945 } 3946 3947 HN_UNLOCK(sc); 3948 break; 3949 3950 case SIOCSIFMEDIA: 3951 case SIOCGIFMEDIA: 3952 HN_LOCK(sc); 3953 if (hn_xpnt_vf_isready(sc)) { 3954 /* 3955 * SIOCGIFMEDIA expects ifmediareq, so don't 3956 * create and pass ifr_vf to the VF here; just 3957 * replace the ifr_name. 3958 */ 3959 vf_ifp = sc->hn_vf_ifp; 3960 strlcpy(ifr->ifr_name, vf_ifp->if_xname, 3961 sizeof(ifr->ifr_name)); 3962 error = vf_ifp->if_ioctl(vf_ifp, cmd, data); 3963 /* Restore the ifr_name. */ 3964 strlcpy(ifr->ifr_name, ifp->if_xname, 3965 sizeof(ifr->ifr_name)); 3966 HN_UNLOCK(sc); 3967 break; 3968 } 3969 HN_UNLOCK(sc); 3970 error = ifmedia_ioctl(ifp, ifr, &sc->hn_media, cmd); 3971 break; 3972 3973 case SIOCGIFRSSHASH: 3974 ifrh = (struct ifrsshash *)data; 3975 HN_LOCK(sc); 3976 if (sc->hn_rx_ring_inuse == 1) { 3977 HN_UNLOCK(sc); 3978 ifrh->ifrh_func = RSS_FUNC_NONE; 3979 ifrh->ifrh_types = 0; 3980 break; 3981 } 3982 3983 if (sc->hn_rss_hash & NDIS_HASH_FUNCTION_TOEPLITZ) 3984 ifrh->ifrh_func = RSS_FUNC_TOEPLITZ; 3985 else 3986 ifrh->ifrh_func = RSS_FUNC_PRIVATE; 3987 ifrh->ifrh_types = hn_rss_type_fromndis(sc->hn_rss_hash); 3988 HN_UNLOCK(sc); 3989 break; 3990 3991 case SIOCGIFRSSKEY: 3992 ifrk = (struct ifrsskey *)data; 3993 HN_LOCK(sc); 3994 if (sc->hn_rx_ring_inuse == 1) { 3995 HN_UNLOCK(sc); 3996 ifrk->ifrk_func = RSS_FUNC_NONE; 3997 ifrk->ifrk_keylen = 0; 3998 break; 3999 } 4000 if (sc->hn_rss_hash & NDIS_HASH_FUNCTION_TOEPLITZ) 4001 ifrk->ifrk_func = RSS_FUNC_TOEPLITZ; 4002 else 4003 ifrk->ifrk_func = RSS_FUNC_PRIVATE; 4004 ifrk->ifrk_keylen = NDIS_HASH_KEYSIZE_TOEPLITZ; 4005 memcpy(ifrk->ifrk_key, sc->hn_rss.rss_key, 4006 NDIS_HASH_KEYSIZE_TOEPLITZ); 4007 HN_UNLOCK(sc); 4008 break; 4009 4010 default: 4011 error = ether_ioctl(ifp, cmd, data); 4012 break; 4013 } 4014 return (error); 4015 } 4016 4017 static void 4018 hn_stop(struct hn_softc *sc, bool detaching) 4019 { 4020 struct ifnet *ifp = sc->hn_ifp; 4021 int i; 4022 4023 HN_LOCK_ASSERT(sc); 4024 4025 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, 4026 ("synthetic parts were not attached")); 4027 4028 /* Clear RUNNING bit ASAP. */ 4029 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_RUNNING); 4030 4031 /* Disable polling. */ 4032 hn_polling(sc, 0); 4033 4034 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) { 4035 KASSERT(sc->hn_vf_ifp != NULL, 4036 ("%s: VF is not attached", ifp->if_xname)); 4037 4038 /* Mark transparent mode VF as disabled. */ 4039 hn_xpnt_vf_setdisable(sc, false /* keep hn_vf_ifp */); 4040 4041 /* 4042 * NOTE: 4043 * Datapath setting must happen _before_ bringing 4044 * the VF down. 4045 */ 4046 hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_SYNTH); 4047 4048 /* 4049 * Bring the VF down. 4050 */ 4051 hn_xpnt_vf_saveifflags(sc); 4052 sc->hn_vf_ifp->if_flags &= ~IFF_UP; 4053 hn_xpnt_vf_iocsetflags(sc); 4054 } 4055 4056 /* Suspend data transfers. */ 4057 hn_suspend_data(sc); 4058 4059 /* Clear OACTIVE bit. */ 4060 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 4061 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) 4062 sc->hn_tx_ring[i].hn_oactive = 0; 4063 4064 /* 4065 * If the non-transparent mode VF is active, make sure 4066 * that the RX filter still allows packet reception. 4067 */ 4068 if (!detaching && (sc->hn_flags & HN_FLAG_RXVF)) 4069 hn_rxfilter_config(sc); 4070 } 4071 4072 static void 4073 hn_init_locked(struct hn_softc *sc) 4074 { 4075 struct ifnet *ifp = sc->hn_ifp; 4076 int i; 4077 4078 HN_LOCK_ASSERT(sc); 4079 4080 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) 4081 return; 4082 4083 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 4084 return; 4085 4086 /* Configure RX filter */ 4087 hn_rxfilter_config(sc); 4088 4089 /* Clear OACTIVE bit. */ 4090 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 4091 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) 4092 sc->hn_tx_ring[i].hn_oactive = 0; 4093 4094 /* Clear TX 'suspended' bit. */ 4095 hn_resume_tx(sc, sc->hn_tx_ring_inuse); 4096 4097 if (hn_xpnt_vf_isready(sc)) { 4098 /* Initialize transparent VF. */ 4099 hn_xpnt_vf_init(sc); 4100 } 4101 4102 /* Everything is ready; unleash! */ 4103 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_RUNNING); 4104 4105 /* Re-enable polling if requested. */ 4106 if (sc->hn_pollhz > 0) 4107 hn_polling(sc, sc->hn_pollhz); 4108 } 4109 4110 static void 4111 hn_init(void *xsc) 4112 { 4113 struct hn_softc *sc = xsc; 4114 4115 HN_LOCK(sc); 4116 hn_init_locked(sc); 4117 HN_UNLOCK(sc); 4118 } 4119 4120 #if __FreeBSD_version >= 1100099 4121 4122 static int 4123 hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS) 4124 { 4125 struct hn_softc *sc = arg1; 4126 unsigned int lenlim; 4127 int error; 4128 4129 lenlim = sc->hn_rx_ring[0].hn_lro.lro_length_lim; 4130 error = sysctl_handle_int(oidp, &lenlim, 0, req); 4131 if (error || req->newptr == NULL) 4132 return error; 4133 4134 HN_LOCK(sc); 4135 if (lenlim < HN_LRO_LENLIM_MIN(sc->hn_ifp) || 4136 lenlim > TCP_LRO_LENGTH_MAX) { 4137 HN_UNLOCK(sc); 4138 return EINVAL; 4139 } 4140 hn_set_lro_lenlim(sc, lenlim); 4141 HN_UNLOCK(sc); 4142 4143 return 0; 4144 } 4145 4146 static int 4147 hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS) 4148 { 4149 struct hn_softc *sc = arg1; 4150 int ackcnt, error, i; 4151 4152 /* 4153 * lro_ackcnt_lim is append count limit, 4154 * +1 to turn it into aggregation limit. 4155 */ 4156 ackcnt = sc->hn_rx_ring[0].hn_lro.lro_ackcnt_lim + 1; 4157 error = sysctl_handle_int(oidp, &ackcnt, 0, req); 4158 if (error || req->newptr == NULL) 4159 return error; 4160 4161 if (ackcnt < 2 || ackcnt > (TCP_LRO_ACKCNT_MAX + 1)) 4162 return EINVAL; 4163 4164 /* 4165 * Convert aggregation limit back to append 4166 * count limit. 4167 */ 4168 --ackcnt; 4169 HN_LOCK(sc); 4170 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 4171 sc->hn_rx_ring[i].hn_lro.lro_ackcnt_lim = ackcnt; 4172 HN_UNLOCK(sc); 4173 return 0; 4174 } 4175 4176 #endif 4177 4178 static int 4179 hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS) 4180 { 4181 struct hn_softc *sc = arg1; 4182 int hcsum = arg2; 4183 int on, error, i; 4184 4185 on = 0; 4186 if (sc->hn_rx_ring[0].hn_trust_hcsum & hcsum) 4187 on = 1; 4188 4189 error = sysctl_handle_int(oidp, &on, 0, req); 4190 if (error || req->newptr == NULL) 4191 return error; 4192 4193 HN_LOCK(sc); 4194 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4195 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 4196 4197 if (on) 4198 rxr->hn_trust_hcsum |= hcsum; 4199 else 4200 rxr->hn_trust_hcsum &= ~hcsum; 4201 } 4202 HN_UNLOCK(sc); 4203 return 0; 4204 } 4205 4206 static int 4207 hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS) 4208 { 4209 struct hn_softc *sc = arg1; 4210 int chim_size, error; 4211 4212 chim_size = sc->hn_tx_ring[0].hn_chim_size; 4213 error = sysctl_handle_int(oidp, &chim_size, 0, req); 4214 if (error || req->newptr == NULL) 4215 return error; 4216 4217 if (chim_size > sc->hn_chim_szmax || chim_size <= 0) 4218 return EINVAL; 4219 4220 HN_LOCK(sc); 4221 hn_set_chim_size(sc, chim_size); 4222 HN_UNLOCK(sc); 4223 return 0; 4224 } 4225 4226 #if __FreeBSD_version < 1100095 4227 static int 4228 hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS) 4229 { 4230 struct hn_softc *sc = arg1; 4231 int ofs = arg2, i, error; 4232 struct hn_rx_ring *rxr; 4233 uint64_t stat; 4234 4235 stat = 0; 4236 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4237 rxr = &sc->hn_rx_ring[i]; 4238 stat += *((int *)((uint8_t *)rxr + ofs)); 4239 } 4240 4241 error = sysctl_handle_64(oidp, &stat, 0, req); 4242 if (error || req->newptr == NULL) 4243 return error; 4244 4245 /* Zero out this stat. */ 4246 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4247 rxr = &sc->hn_rx_ring[i]; 4248 *((int *)((uint8_t *)rxr + ofs)) = 0; 4249 } 4250 return 0; 4251 } 4252 #else 4253 static int 4254 hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS) 4255 { 4256 struct hn_softc *sc = arg1; 4257 int ofs = arg2, i, error; 4258 struct hn_rx_ring *rxr; 4259 uint64_t stat; 4260 4261 stat = 0; 4262 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4263 rxr = &sc->hn_rx_ring[i]; 4264 stat += *((uint64_t *)((uint8_t *)rxr + ofs)); 4265 } 4266 4267 error = sysctl_handle_64(oidp, &stat, 0, req); 4268 if (error || req->newptr == NULL) 4269 return error; 4270 4271 /* Zero out this stat. */ 4272 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4273 rxr = &sc->hn_rx_ring[i]; 4274 *((uint64_t *)((uint8_t *)rxr + ofs)) = 0; 4275 } 4276 return 0; 4277 } 4278 4279 #endif 4280 4281 static int 4282 hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS) 4283 { 4284 struct hn_softc *sc = arg1; 4285 int ofs = arg2, i, error; 4286 struct hn_rx_ring *rxr; 4287 u_long stat; 4288 4289 stat = 0; 4290 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4291 rxr = &sc->hn_rx_ring[i]; 4292 stat += *((u_long *)((uint8_t *)rxr + ofs)); 4293 } 4294 4295 error = sysctl_handle_long(oidp, &stat, 0, req); 4296 if (error || req->newptr == NULL) 4297 return error; 4298 4299 /* Zero out this stat. */ 4300 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4301 rxr = &sc->hn_rx_ring[i]; 4302 *((u_long *)((uint8_t *)rxr + ofs)) = 0; 4303 } 4304 return 0; 4305 } 4306 4307 static int 4308 hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS) 4309 { 4310 struct hn_softc *sc = arg1; 4311 int ofs = arg2, i, error; 4312 struct hn_tx_ring *txr; 4313 u_long stat; 4314 4315 stat = 0; 4316 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 4317 txr = &sc->hn_tx_ring[i]; 4318 stat += *((u_long *)((uint8_t *)txr + ofs)); 4319 } 4320 4321 error = sysctl_handle_long(oidp, &stat, 0, req); 4322 if (error || req->newptr == NULL) 4323 return error; 4324 4325 /* Zero out this stat. */ 4326 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 4327 txr = &sc->hn_tx_ring[i]; 4328 *((u_long *)((uint8_t *)txr + ofs)) = 0; 4329 } 4330 return 0; 4331 } 4332 4333 static int 4334 hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS) 4335 { 4336 struct hn_softc *sc = arg1; 4337 int ofs = arg2, i, error, conf; 4338 struct hn_tx_ring *txr; 4339 4340 txr = &sc->hn_tx_ring[0]; 4341 conf = *((int *)((uint8_t *)txr + ofs)); 4342 4343 error = sysctl_handle_int(oidp, &conf, 0, req); 4344 if (error || req->newptr == NULL) 4345 return error; 4346 4347 HN_LOCK(sc); 4348 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 4349 txr = &sc->hn_tx_ring[i]; 4350 *((int *)((uint8_t *)txr + ofs)) = conf; 4351 } 4352 HN_UNLOCK(sc); 4353 4354 return 0; 4355 } 4356 4357 static int 4358 hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS) 4359 { 4360 struct hn_softc *sc = arg1; 4361 int error, size; 4362 4363 size = sc->hn_agg_size; 4364 error = sysctl_handle_int(oidp, &size, 0, req); 4365 if (error || req->newptr == NULL) 4366 return (error); 4367 4368 HN_LOCK(sc); 4369 sc->hn_agg_size = size; 4370 hn_set_txagg(sc); 4371 HN_UNLOCK(sc); 4372 4373 return (0); 4374 } 4375 4376 static int 4377 hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS) 4378 { 4379 struct hn_softc *sc = arg1; 4380 int error, pkts; 4381 4382 pkts = sc->hn_agg_pkts; 4383 error = sysctl_handle_int(oidp, &pkts, 0, req); 4384 if (error || req->newptr == NULL) 4385 return (error); 4386 4387 HN_LOCK(sc); 4388 sc->hn_agg_pkts = pkts; 4389 hn_set_txagg(sc); 4390 HN_UNLOCK(sc); 4391 4392 return (0); 4393 } 4394 4395 static int 4396 hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS) 4397 { 4398 struct hn_softc *sc = arg1; 4399 int pkts; 4400 4401 pkts = sc->hn_tx_ring[0].hn_agg_pktmax; 4402 return (sysctl_handle_int(oidp, &pkts, 0, req)); 4403 } 4404 4405 static int 4406 hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS) 4407 { 4408 struct hn_softc *sc = arg1; 4409 int align; 4410 4411 align = sc->hn_tx_ring[0].hn_agg_align; 4412 return (sysctl_handle_int(oidp, &align, 0, req)); 4413 } 4414 4415 static void 4416 hn_chan_polling(struct vmbus_channel *chan, u_int pollhz) 4417 { 4418 if (pollhz == 0) 4419 vmbus_chan_poll_disable(chan); 4420 else 4421 vmbus_chan_poll_enable(chan, pollhz); 4422 } 4423 4424 static void 4425 hn_polling(struct hn_softc *sc, u_int pollhz) 4426 { 4427 int nsubch = sc->hn_rx_ring_inuse - 1; 4428 4429 HN_LOCK_ASSERT(sc); 4430 4431 if (nsubch > 0) { 4432 struct vmbus_channel **subch; 4433 int i; 4434 4435 subch = vmbus_subchan_get(sc->hn_prichan, nsubch); 4436 for (i = 0; i < nsubch; ++i) 4437 hn_chan_polling(subch[i], pollhz); 4438 vmbus_subchan_rel(subch, nsubch); 4439 } 4440 hn_chan_polling(sc->hn_prichan, pollhz); 4441 } 4442 4443 static int 4444 hn_polling_sysctl(SYSCTL_HANDLER_ARGS) 4445 { 4446 struct hn_softc *sc = arg1; 4447 int pollhz, error; 4448 4449 pollhz = sc->hn_pollhz; 4450 error = sysctl_handle_int(oidp, &pollhz, 0, req); 4451 if (error || req->newptr == NULL) 4452 return (error); 4453 4454 if (pollhz != 0 && 4455 (pollhz < VMBUS_CHAN_POLLHZ_MIN || pollhz > VMBUS_CHAN_POLLHZ_MAX)) 4456 return (EINVAL); 4457 4458 HN_LOCK(sc); 4459 if (sc->hn_pollhz != pollhz) { 4460 sc->hn_pollhz = pollhz; 4461 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) && 4462 (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED)) 4463 hn_polling(sc, sc->hn_pollhz); 4464 } 4465 HN_UNLOCK(sc); 4466 4467 return (0); 4468 } 4469 4470 static int 4471 hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS) 4472 { 4473 struct hn_softc *sc = arg1; 4474 char verstr[16]; 4475 4476 snprintf(verstr, sizeof(verstr), "%u.%u", 4477 HN_NDIS_VERSION_MAJOR(sc->hn_ndis_ver), 4478 HN_NDIS_VERSION_MINOR(sc->hn_ndis_ver)); 4479 return sysctl_handle_string(oidp, verstr, sizeof(verstr), req); 4480 } 4481 4482 static int 4483 hn_caps_sysctl(SYSCTL_HANDLER_ARGS) 4484 { 4485 struct hn_softc *sc = arg1; 4486 char caps_str[128]; 4487 uint32_t caps; 4488 4489 HN_LOCK(sc); 4490 caps = sc->hn_caps; 4491 HN_UNLOCK(sc); 4492 snprintf(caps_str, sizeof(caps_str), "%b", caps, HN_CAP_BITS); 4493 return sysctl_handle_string(oidp, caps_str, sizeof(caps_str), req); 4494 } 4495 4496 static int 4497 hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS) 4498 { 4499 struct hn_softc *sc = arg1; 4500 char assist_str[128]; 4501 uint32_t hwassist; 4502 4503 HN_LOCK(sc); 4504 hwassist = sc->hn_ifp->if_hwassist; 4505 HN_UNLOCK(sc); 4506 snprintf(assist_str, sizeof(assist_str), "%b", hwassist, CSUM_BITS); 4507 return sysctl_handle_string(oidp, assist_str, sizeof(assist_str), req); 4508 } 4509 4510 static int 4511 hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS) 4512 { 4513 struct hn_softc *sc = arg1; 4514 char filter_str[128]; 4515 uint32_t filter; 4516 4517 HN_LOCK(sc); 4518 filter = sc->hn_rx_filter; 4519 HN_UNLOCK(sc); 4520 snprintf(filter_str, sizeof(filter_str), "%b", filter, 4521 NDIS_PACKET_TYPES); 4522 return sysctl_handle_string(oidp, filter_str, sizeof(filter_str), req); 4523 } 4524 4525 #ifndef RSS 4526 4527 static int 4528 hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS) 4529 { 4530 struct hn_softc *sc = arg1; 4531 int error; 4532 4533 HN_LOCK(sc); 4534 4535 error = SYSCTL_OUT(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key)); 4536 if (error || req->newptr == NULL) 4537 goto back; 4538 4539 if ((sc->hn_flags & HN_FLAG_RXVF) || 4540 (hn_xpnt_vf && sc->hn_vf_ifp != NULL)) { 4541 /* 4542 * RSS key is synchronized w/ VF's, don't allow users 4543 * to change it. 4544 */ 4545 error = EBUSY; 4546 goto back; 4547 } 4548 4549 error = SYSCTL_IN(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key)); 4550 if (error) 4551 goto back; 4552 sc->hn_flags |= HN_FLAG_HAS_RSSKEY; 4553 4554 if (sc->hn_rx_ring_inuse > 1) { 4555 error = hn_rss_reconfig(sc); 4556 } else { 4557 /* Not RSS capable, at least for now; just save the RSS key. */ 4558 error = 0; 4559 } 4560 back: 4561 HN_UNLOCK(sc); 4562 return (error); 4563 } 4564 4565 static int 4566 hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS) 4567 { 4568 struct hn_softc *sc = arg1; 4569 int error; 4570 4571 HN_LOCK(sc); 4572 4573 error = SYSCTL_OUT(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind)); 4574 if (error || req->newptr == NULL) 4575 goto back; 4576 4577 /* 4578 * Don't allow RSS indirect table change, if this interface is not 4579 * RSS capable currently. 4580 */ 4581 if (sc->hn_rx_ring_inuse == 1) { 4582 error = EOPNOTSUPP; 4583 goto back; 4584 } 4585 4586 error = SYSCTL_IN(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind)); 4587 if (error) 4588 goto back; 4589 sc->hn_flags |= HN_FLAG_HAS_RSSIND; 4590 4591 hn_rss_ind_fixup(sc); 4592 error = hn_rss_reconfig(sc); 4593 back: 4594 HN_UNLOCK(sc); 4595 return (error); 4596 } 4597 4598 #endif /* !RSS */ 4599 4600 static int 4601 hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS) 4602 { 4603 struct hn_softc *sc = arg1; 4604 char hash_str[128]; 4605 uint32_t hash; 4606 4607 HN_LOCK(sc); 4608 hash = sc->hn_rss_hash; 4609 HN_UNLOCK(sc); 4610 snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS); 4611 return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req); 4612 } 4613 4614 static int 4615 hn_rss_hcap_sysctl(SYSCTL_HANDLER_ARGS) 4616 { 4617 struct hn_softc *sc = arg1; 4618 char hash_str[128]; 4619 uint32_t hash; 4620 4621 HN_LOCK(sc); 4622 hash = sc->hn_rss_hcap; 4623 HN_UNLOCK(sc); 4624 snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS); 4625 return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req); 4626 } 4627 4628 static int 4629 hn_rss_mbuf_sysctl(SYSCTL_HANDLER_ARGS) 4630 { 4631 struct hn_softc *sc = arg1; 4632 char hash_str[128]; 4633 uint32_t hash; 4634 4635 HN_LOCK(sc); 4636 hash = sc->hn_rx_ring[0].hn_mbuf_hash; 4637 HN_UNLOCK(sc); 4638 snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS); 4639 return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req); 4640 } 4641 4642 static int 4643 hn_vf_sysctl(SYSCTL_HANDLER_ARGS) 4644 { 4645 struct hn_softc *sc = arg1; 4646 char vf_name[IFNAMSIZ + 1]; 4647 struct ifnet *vf_ifp; 4648 4649 HN_LOCK(sc); 4650 vf_name[0] = '\0'; 4651 vf_ifp = sc->hn_vf_ifp; 4652 if (vf_ifp != NULL) 4653 snprintf(vf_name, sizeof(vf_name), "%s", vf_ifp->if_xname); 4654 HN_UNLOCK(sc); 4655 return sysctl_handle_string(oidp, vf_name, sizeof(vf_name), req); 4656 } 4657 4658 static int 4659 hn_rxvf_sysctl(SYSCTL_HANDLER_ARGS) 4660 { 4661 struct hn_softc *sc = arg1; 4662 char vf_name[IFNAMSIZ + 1]; 4663 struct ifnet *vf_ifp; 4664 4665 HN_LOCK(sc); 4666 vf_name[0] = '\0'; 4667 vf_ifp = sc->hn_rx_ring[0].hn_rxvf_ifp; 4668 if (vf_ifp != NULL) 4669 snprintf(vf_name, sizeof(vf_name), "%s", vf_ifp->if_xname); 4670 HN_UNLOCK(sc); 4671 return sysctl_handle_string(oidp, vf_name, sizeof(vf_name), req); 4672 } 4673 4674 static int 4675 hn_vflist_sysctl(SYSCTL_HANDLER_ARGS) 4676 { 4677 struct rm_priotracker pt; 4678 struct sbuf *sb; 4679 int error, i; 4680 bool first; 4681 4682 error = sysctl_wire_old_buffer(req, 0); 4683 if (error != 0) 4684 return (error); 4685 4686 sb = sbuf_new_for_sysctl(NULL, NULL, 128, req); 4687 if (sb == NULL) 4688 return (ENOMEM); 4689 4690 rm_rlock(&hn_vfmap_lock, &pt); 4691 4692 first = true; 4693 for (i = 0; i < hn_vfmap_size; ++i) { 4694 struct ifnet *ifp; 4695 4696 if (hn_vfmap[i] == NULL) 4697 continue; 4698 4699 ifp = ifnet_byindex(i); 4700 if (ifp != NULL) { 4701 if (first) 4702 sbuf_printf(sb, "%s", ifp->if_xname); 4703 else 4704 sbuf_printf(sb, " %s", ifp->if_xname); 4705 first = false; 4706 } 4707 } 4708 4709 rm_runlock(&hn_vfmap_lock, &pt); 4710 4711 error = sbuf_finish(sb); 4712 sbuf_delete(sb); 4713 return (error); 4714 } 4715 4716 static int 4717 hn_vfmap_sysctl(SYSCTL_HANDLER_ARGS) 4718 { 4719 struct rm_priotracker pt; 4720 struct sbuf *sb; 4721 int error, i; 4722 bool first; 4723 4724 error = sysctl_wire_old_buffer(req, 0); 4725 if (error != 0) 4726 return (error); 4727 4728 sb = sbuf_new_for_sysctl(NULL, NULL, 128, req); 4729 if (sb == NULL) 4730 return (ENOMEM); 4731 4732 rm_rlock(&hn_vfmap_lock, &pt); 4733 4734 first = true; 4735 for (i = 0; i < hn_vfmap_size; ++i) { 4736 struct ifnet *ifp, *hn_ifp; 4737 4738 hn_ifp = hn_vfmap[i]; 4739 if (hn_ifp == NULL) 4740 continue; 4741 4742 ifp = ifnet_byindex(i); 4743 if (ifp != NULL) { 4744 if (first) { 4745 sbuf_printf(sb, "%s:%s", ifp->if_xname, 4746 hn_ifp->if_xname); 4747 } else { 4748 sbuf_printf(sb, " %s:%s", ifp->if_xname, 4749 hn_ifp->if_xname); 4750 } 4751 first = false; 4752 } 4753 } 4754 4755 rm_runlock(&hn_vfmap_lock, &pt); 4756 4757 error = sbuf_finish(sb); 4758 sbuf_delete(sb); 4759 return (error); 4760 } 4761 4762 static int 4763 hn_xpnt_vf_accbpf_sysctl(SYSCTL_HANDLER_ARGS) 4764 { 4765 struct hn_softc *sc = arg1; 4766 int error, onoff = 0; 4767 4768 if (sc->hn_xvf_flags & HN_XVFFLAG_ACCBPF) 4769 onoff = 1; 4770 error = sysctl_handle_int(oidp, &onoff, 0, req); 4771 if (error || req->newptr == NULL) 4772 return (error); 4773 4774 HN_LOCK(sc); 4775 /* NOTE: hn_vf_lock for hn_transmit() */ 4776 rm_wlock(&sc->hn_vf_lock); 4777 if (onoff) 4778 sc->hn_xvf_flags |= HN_XVFFLAG_ACCBPF; 4779 else 4780 sc->hn_xvf_flags &= ~HN_XVFFLAG_ACCBPF; 4781 rm_wunlock(&sc->hn_vf_lock); 4782 HN_UNLOCK(sc); 4783 4784 return (0); 4785 } 4786 4787 static int 4788 hn_xpnt_vf_enabled_sysctl(SYSCTL_HANDLER_ARGS) 4789 { 4790 struct hn_softc *sc = arg1; 4791 int enabled = 0; 4792 4793 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) 4794 enabled = 1; 4795 return (sysctl_handle_int(oidp, &enabled, 0, req)); 4796 } 4797 4798 static int 4799 hn_check_iplen(const struct mbuf *m, int hoff) 4800 { 4801 const struct ip *ip; 4802 int len, iphlen, iplen; 4803 const struct tcphdr *th; 4804 int thoff; /* TCP data offset */ 4805 4806 len = hoff + sizeof(struct ip); 4807 4808 /* The packet must be at least the size of an IP header. */ 4809 if (m->m_pkthdr.len < len) 4810 return IPPROTO_DONE; 4811 4812 /* The fixed IP header must reside completely in the first mbuf. */ 4813 if (m->m_len < len) 4814 return IPPROTO_DONE; 4815 4816 ip = mtodo(m, hoff); 4817 4818 /* Bound check the packet's stated IP header length. */ 4819 iphlen = ip->ip_hl << 2; 4820 if (iphlen < sizeof(struct ip)) /* minimum header length */ 4821 return IPPROTO_DONE; 4822 4823 /* The full IP header must reside completely in the one mbuf. */ 4824 if (m->m_len < hoff + iphlen) 4825 return IPPROTO_DONE; 4826 4827 iplen = ntohs(ip->ip_len); 4828 4829 /* 4830 * Check that the amount of data in the buffers is as 4831 * at least much as the IP header would have us expect. 4832 */ 4833 if (m->m_pkthdr.len < hoff + iplen) 4834 return IPPROTO_DONE; 4835 4836 /* 4837 * Ignore IP fragments. 4838 */ 4839 if (ntohs(ip->ip_off) & (IP_OFFMASK | IP_MF)) 4840 return IPPROTO_DONE; 4841 4842 /* 4843 * The TCP/IP or UDP/IP header must be entirely contained within 4844 * the first fragment of a packet. 4845 */ 4846 switch (ip->ip_p) { 4847 case IPPROTO_TCP: 4848 if (iplen < iphlen + sizeof(struct tcphdr)) 4849 return IPPROTO_DONE; 4850 if (m->m_len < hoff + iphlen + sizeof(struct tcphdr)) 4851 return IPPROTO_DONE; 4852 th = (const struct tcphdr *)((const uint8_t *)ip + iphlen); 4853 thoff = th->th_off << 2; 4854 if (thoff < sizeof(struct tcphdr) || thoff + iphlen > iplen) 4855 return IPPROTO_DONE; 4856 if (m->m_len < hoff + iphlen + thoff) 4857 return IPPROTO_DONE; 4858 break; 4859 case IPPROTO_UDP: 4860 if (iplen < iphlen + sizeof(struct udphdr)) 4861 return IPPROTO_DONE; 4862 if (m->m_len < hoff + iphlen + sizeof(struct udphdr)) 4863 return IPPROTO_DONE; 4864 break; 4865 default: 4866 if (iplen < iphlen) 4867 return IPPROTO_DONE; 4868 break; 4869 } 4870 return ip->ip_p; 4871 } 4872 4873 static void 4874 hn_rxpkt_proto(const struct mbuf *m_new, int *l3proto, int *l4proto) 4875 { 4876 const struct ether_header *eh; 4877 uint16_t etype; 4878 int hoff; 4879 4880 hoff = sizeof(*eh); 4881 /* Checked at the beginning of this function. */ 4882 KASSERT(m_new->m_len >= hoff, ("not ethernet frame")); 4883 4884 eh = mtod(m_new, const struct ether_header *); 4885 etype = ntohs(eh->ether_type); 4886 if (etype == ETHERTYPE_VLAN) { 4887 const struct ether_vlan_header *evl; 4888 4889 hoff = sizeof(*evl); 4890 if (m_new->m_len < hoff) 4891 return; 4892 evl = mtod(m_new, const struct ether_vlan_header *); 4893 etype = ntohs(evl->evl_proto); 4894 } 4895 *l3proto = etype; 4896 4897 if (etype == ETHERTYPE_IP) 4898 *l4proto = hn_check_iplen(m_new, hoff); 4899 else 4900 *l4proto = IPPROTO_DONE; 4901 } 4902 4903 static int 4904 hn_create_rx_data(struct hn_softc *sc, int ring_cnt) 4905 { 4906 struct sysctl_oid_list *child; 4907 struct sysctl_ctx_list *ctx; 4908 device_t dev = sc->hn_dev; 4909 #if defined(INET) || defined(INET6) 4910 #if __FreeBSD_version >= 1100095 4911 int lroent_cnt; 4912 #endif 4913 #endif 4914 int i; 4915 4916 /* 4917 * Create RXBUF for reception. 4918 * 4919 * NOTE: 4920 * - It is shared by all channels. 4921 * - A large enough buffer is allocated, certain version of NVSes 4922 * may further limit the usable space. 4923 */ 4924 sc->hn_rxbuf = hyperv_dmamem_alloc(bus_get_dma_tag(dev), 4925 PAGE_SIZE, 0, HN_RXBUF_SIZE, &sc->hn_rxbuf_dma, 4926 BUS_DMA_WAITOK | BUS_DMA_ZERO); 4927 if (sc->hn_rxbuf == NULL) { 4928 device_printf(sc->hn_dev, "allocate rxbuf failed\n"); 4929 return (ENOMEM); 4930 } 4931 4932 sc->hn_rx_ring_cnt = ring_cnt; 4933 sc->hn_rx_ring_inuse = sc->hn_rx_ring_cnt; 4934 4935 sc->hn_rx_ring = malloc(sizeof(struct hn_rx_ring) * sc->hn_rx_ring_cnt, 4936 M_DEVBUF, M_WAITOK | M_ZERO); 4937 4938 #if defined(INET) || defined(INET6) 4939 #if __FreeBSD_version >= 1100095 4940 lroent_cnt = hn_lro_entry_count; 4941 if (lroent_cnt < TCP_LRO_ENTRIES) 4942 lroent_cnt = TCP_LRO_ENTRIES; 4943 if (bootverbose) 4944 device_printf(dev, "LRO: entry count %d\n", lroent_cnt); 4945 #endif 4946 #endif /* INET || INET6 */ 4947 4948 ctx = device_get_sysctl_ctx(dev); 4949 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); 4950 4951 /* Create dev.hn.UNIT.rx sysctl tree */ 4952 sc->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rx", 4953 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 4954 4955 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4956 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 4957 4958 rxr->hn_br = hyperv_dmamem_alloc(bus_get_dma_tag(dev), 4959 PAGE_SIZE, 0, HN_TXBR_SIZE + HN_RXBR_SIZE, 4960 &rxr->hn_br_dma, BUS_DMA_WAITOK); 4961 if (rxr->hn_br == NULL) { 4962 device_printf(dev, "allocate bufring failed\n"); 4963 return (ENOMEM); 4964 } 4965 4966 if (hn_trust_hosttcp) 4967 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_TCP; 4968 if (hn_trust_hostudp) 4969 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_UDP; 4970 if (hn_trust_hostip) 4971 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_IP; 4972 rxr->hn_mbuf_hash = NDIS_HASH_ALL; 4973 rxr->hn_ifp = sc->hn_ifp; 4974 if (i < sc->hn_tx_ring_cnt) 4975 rxr->hn_txr = &sc->hn_tx_ring[i]; 4976 rxr->hn_pktbuf_len = HN_PKTBUF_LEN_DEF; 4977 rxr->hn_pktbuf = malloc(rxr->hn_pktbuf_len, M_DEVBUF, M_WAITOK); 4978 rxr->hn_rx_idx = i; 4979 rxr->hn_rxbuf = sc->hn_rxbuf; 4980 4981 /* 4982 * Initialize LRO. 4983 */ 4984 #if defined(INET) || defined(INET6) 4985 #if __FreeBSD_version >= 1100095 4986 tcp_lro_init_args(&rxr->hn_lro, sc->hn_ifp, lroent_cnt, 4987 hn_lro_mbufq_depth); 4988 #else 4989 tcp_lro_init(&rxr->hn_lro); 4990 rxr->hn_lro.ifp = sc->hn_ifp; 4991 #endif 4992 #if __FreeBSD_version >= 1100099 4993 rxr->hn_lro.lro_length_lim = HN_LRO_LENLIM_DEF; 4994 rxr->hn_lro.lro_ackcnt_lim = HN_LRO_ACKCNT_DEF; 4995 #endif 4996 #endif /* INET || INET6 */ 4997 4998 if (sc->hn_rx_sysctl_tree != NULL) { 4999 char name[16]; 5000 5001 /* 5002 * Create per RX ring sysctl tree: 5003 * dev.hn.UNIT.rx.RINGID 5004 */ 5005 snprintf(name, sizeof(name), "%d", i); 5006 rxr->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, 5007 SYSCTL_CHILDREN(sc->hn_rx_sysctl_tree), 5008 OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 5009 5010 if (rxr->hn_rx_sysctl_tree != NULL) { 5011 SYSCTL_ADD_ULONG(ctx, 5012 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), 5013 OID_AUTO, "packets", CTLFLAG_RW, 5014 &rxr->hn_pkts, "# of packets received"); 5015 SYSCTL_ADD_ULONG(ctx, 5016 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), 5017 OID_AUTO, "rss_pkts", CTLFLAG_RW, 5018 &rxr->hn_rss_pkts, 5019 "# of packets w/ RSS info received"); 5020 SYSCTL_ADD_INT(ctx, 5021 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), 5022 OID_AUTO, "pktbuf_len", CTLFLAG_RD, 5023 &rxr->hn_pktbuf_len, 0, 5024 "Temporary channel packet buffer length"); 5025 } 5026 } 5027 } 5028 5029 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_queued", 5030 CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5031 __offsetof(struct hn_rx_ring, hn_lro.lro_queued), 5032 #if __FreeBSD_version < 1100095 5033 hn_rx_stat_int_sysctl, 5034 #else 5035 hn_rx_stat_u64_sysctl, 5036 #endif 5037 "LU", "LRO queued"); 5038 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_flushed", 5039 CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5040 __offsetof(struct hn_rx_ring, hn_lro.lro_flushed), 5041 #if __FreeBSD_version < 1100095 5042 hn_rx_stat_int_sysctl, 5043 #else 5044 hn_rx_stat_u64_sysctl, 5045 #endif 5046 "LU", "LRO flushed"); 5047 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_tried", 5048 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5049 __offsetof(struct hn_rx_ring, hn_lro_tried), 5050 hn_rx_stat_ulong_sysctl, "LU", "# of LRO tries"); 5051 #if __FreeBSD_version >= 1100099 5052 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_length_lim", 5053 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 5054 hn_lro_lenlim_sysctl, "IU", 5055 "Max # of data bytes to be aggregated by LRO"); 5056 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_ackcnt_lim", 5057 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 5058 hn_lro_ackcnt_sysctl, "I", 5059 "Max # of ACKs to be aggregated by LRO"); 5060 #endif 5061 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hosttcp", 5062 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_TCP, 5063 hn_trust_hcsum_sysctl, "I", 5064 "Trust tcp segement verification on host side, " 5065 "when csum info is missing"); 5066 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostudp", 5067 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_UDP, 5068 hn_trust_hcsum_sysctl, "I", 5069 "Trust udp datagram verification on host side, " 5070 "when csum info is missing"); 5071 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostip", 5072 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_IP, 5073 hn_trust_hcsum_sysctl, "I", 5074 "Trust ip packet verification on host side, " 5075 "when csum info is missing"); 5076 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_ip", 5077 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5078 __offsetof(struct hn_rx_ring, hn_csum_ip), 5079 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM IP"); 5080 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_tcp", 5081 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5082 __offsetof(struct hn_rx_ring, hn_csum_tcp), 5083 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM TCP"); 5084 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_udp", 5085 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5086 __offsetof(struct hn_rx_ring, hn_csum_udp), 5087 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM UDP"); 5088 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_trusted", 5089 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5090 __offsetof(struct hn_rx_ring, hn_csum_trusted), 5091 hn_rx_stat_ulong_sysctl, "LU", 5092 "# of packets that we trust host's csum verification"); 5093 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "small_pkts", 5094 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5095 __offsetof(struct hn_rx_ring, hn_small_pkts), 5096 hn_rx_stat_ulong_sysctl, "LU", "# of small packets received"); 5097 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_ack_failed", 5098 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5099 __offsetof(struct hn_rx_ring, hn_ack_failed), 5100 hn_rx_stat_ulong_sysctl, "LU", "# of RXBUF ack failures"); 5101 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_cnt", 5102 CTLFLAG_RD, &sc->hn_rx_ring_cnt, 0, "# created RX rings"); 5103 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_inuse", 5104 CTLFLAG_RD, &sc->hn_rx_ring_inuse, 0, "# used RX rings"); 5105 5106 return (0); 5107 } 5108 5109 static void 5110 hn_destroy_rx_data(struct hn_softc *sc) 5111 { 5112 int i; 5113 5114 if (sc->hn_rxbuf != NULL) { 5115 if ((sc->hn_flags & HN_FLAG_RXBUF_REF) == 0) 5116 hyperv_dmamem_free(&sc->hn_rxbuf_dma, sc->hn_rxbuf); 5117 else 5118 device_printf(sc->hn_dev, "RXBUF is referenced\n"); 5119 sc->hn_rxbuf = NULL; 5120 } 5121 5122 if (sc->hn_rx_ring_cnt == 0) 5123 return; 5124 5125 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 5126 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 5127 5128 if (rxr->hn_br == NULL) 5129 continue; 5130 if ((rxr->hn_rx_flags & HN_RX_FLAG_BR_REF) == 0) { 5131 hyperv_dmamem_free(&rxr->hn_br_dma, rxr->hn_br); 5132 } else { 5133 device_printf(sc->hn_dev, 5134 "%dth channel bufring is referenced", i); 5135 } 5136 rxr->hn_br = NULL; 5137 5138 #if defined(INET) || defined(INET6) 5139 tcp_lro_free(&rxr->hn_lro); 5140 #endif 5141 free(rxr->hn_pktbuf, M_DEVBUF); 5142 } 5143 free(sc->hn_rx_ring, M_DEVBUF); 5144 sc->hn_rx_ring = NULL; 5145 5146 sc->hn_rx_ring_cnt = 0; 5147 sc->hn_rx_ring_inuse = 0; 5148 } 5149 5150 static int 5151 hn_tx_ring_create(struct hn_softc *sc, int id) 5152 { 5153 struct hn_tx_ring *txr = &sc->hn_tx_ring[id]; 5154 device_t dev = sc->hn_dev; 5155 bus_dma_tag_t parent_dtag; 5156 int error, i; 5157 5158 txr->hn_sc = sc; 5159 txr->hn_tx_idx = id; 5160 5161 #ifndef HN_USE_TXDESC_BUFRING 5162 mtx_init(&txr->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN); 5163 #endif 5164 mtx_init(&txr->hn_tx_lock, "hn tx", NULL, MTX_DEF); 5165 5166 txr->hn_txdesc_cnt = HN_TX_DESC_CNT; 5167 txr->hn_txdesc = malloc(sizeof(struct hn_txdesc) * txr->hn_txdesc_cnt, 5168 M_DEVBUF, M_WAITOK | M_ZERO); 5169 #ifndef HN_USE_TXDESC_BUFRING 5170 SLIST_INIT(&txr->hn_txlist); 5171 #else 5172 txr->hn_txdesc_br = buf_ring_alloc(txr->hn_txdesc_cnt, M_DEVBUF, 5173 M_WAITOK, &txr->hn_tx_lock); 5174 #endif 5175 5176 if (hn_tx_taskq_mode == HN_TX_TASKQ_M_EVTTQ) { 5177 txr->hn_tx_taskq = VMBUS_GET_EVENT_TASKQ( 5178 device_get_parent(dev), dev, HN_RING_IDX2CPU(sc, id)); 5179 } else { 5180 txr->hn_tx_taskq = sc->hn_tx_taskqs[id % hn_tx_taskq_cnt]; 5181 } 5182 5183 #ifdef HN_IFSTART_SUPPORT 5184 if (hn_use_if_start) { 5185 txr->hn_txeof = hn_start_txeof; 5186 TASK_INIT(&txr->hn_tx_task, 0, hn_start_taskfunc, txr); 5187 TASK_INIT(&txr->hn_txeof_task, 0, hn_start_txeof_taskfunc, txr); 5188 } else 5189 #endif 5190 { 5191 int br_depth; 5192 5193 txr->hn_txeof = hn_xmit_txeof; 5194 TASK_INIT(&txr->hn_tx_task, 0, hn_xmit_taskfunc, txr); 5195 TASK_INIT(&txr->hn_txeof_task, 0, hn_xmit_txeof_taskfunc, txr); 5196 5197 br_depth = hn_get_txswq_depth(txr); 5198 txr->hn_mbuf_br = buf_ring_alloc(br_depth, M_DEVBUF, 5199 M_WAITOK, &txr->hn_tx_lock); 5200 } 5201 5202 txr->hn_direct_tx_size = hn_direct_tx_size; 5203 5204 /* 5205 * Always schedule transmission instead of trying to do direct 5206 * transmission. This one gives the best performance so far. 5207 */ 5208 txr->hn_sched_tx = 1; 5209 5210 parent_dtag = bus_get_dma_tag(dev); 5211 5212 /* DMA tag for RNDIS packet messages. */ 5213 error = bus_dma_tag_create(parent_dtag, /* parent */ 5214 HN_RNDIS_PKT_ALIGN, /* alignment */ 5215 HN_RNDIS_PKT_BOUNDARY, /* boundary */ 5216 BUS_SPACE_MAXADDR, /* lowaddr */ 5217 BUS_SPACE_MAXADDR, /* highaddr */ 5218 NULL, NULL, /* filter, filterarg */ 5219 HN_RNDIS_PKT_LEN, /* maxsize */ 5220 1, /* nsegments */ 5221 HN_RNDIS_PKT_LEN, /* maxsegsize */ 5222 0, /* flags */ 5223 NULL, /* lockfunc */ 5224 NULL, /* lockfuncarg */ 5225 &txr->hn_tx_rndis_dtag); 5226 if (error) { 5227 device_printf(dev, "failed to create rndis dmatag\n"); 5228 return error; 5229 } 5230 5231 /* DMA tag for data. */ 5232 error = bus_dma_tag_create(parent_dtag, /* parent */ 5233 1, /* alignment */ 5234 HN_TX_DATA_BOUNDARY, /* boundary */ 5235 BUS_SPACE_MAXADDR, /* lowaddr */ 5236 BUS_SPACE_MAXADDR, /* highaddr */ 5237 NULL, NULL, /* filter, filterarg */ 5238 HN_TX_DATA_MAXSIZE, /* maxsize */ 5239 HN_TX_DATA_SEGCNT_MAX, /* nsegments */ 5240 HN_TX_DATA_SEGSIZE, /* maxsegsize */ 5241 0, /* flags */ 5242 NULL, /* lockfunc */ 5243 NULL, /* lockfuncarg */ 5244 &txr->hn_tx_data_dtag); 5245 if (error) { 5246 device_printf(dev, "failed to create data dmatag\n"); 5247 return error; 5248 } 5249 5250 for (i = 0; i < txr->hn_txdesc_cnt; ++i) { 5251 struct hn_txdesc *txd = &txr->hn_txdesc[i]; 5252 5253 txd->txr = txr; 5254 txd->chim_index = HN_NVS_CHIM_IDX_INVALID; 5255 STAILQ_INIT(&txd->agg_list); 5256 5257 /* 5258 * Allocate and load RNDIS packet message. 5259 */ 5260 error = bus_dmamem_alloc(txr->hn_tx_rndis_dtag, 5261 (void **)&txd->rndis_pkt, 5262 BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, 5263 &txd->rndis_pkt_dmap); 5264 if (error) { 5265 device_printf(dev, 5266 "failed to allocate rndis_packet_msg, %d\n", i); 5267 return error; 5268 } 5269 5270 error = bus_dmamap_load(txr->hn_tx_rndis_dtag, 5271 txd->rndis_pkt_dmap, 5272 txd->rndis_pkt, HN_RNDIS_PKT_LEN, 5273 hyperv_dma_map_paddr, &txd->rndis_pkt_paddr, 5274 BUS_DMA_NOWAIT); 5275 if (error) { 5276 device_printf(dev, 5277 "failed to load rndis_packet_msg, %d\n", i); 5278 bus_dmamem_free(txr->hn_tx_rndis_dtag, 5279 txd->rndis_pkt, txd->rndis_pkt_dmap); 5280 return error; 5281 } 5282 5283 /* DMA map for TX data. */ 5284 error = bus_dmamap_create(txr->hn_tx_data_dtag, 0, 5285 &txd->data_dmap); 5286 if (error) { 5287 device_printf(dev, 5288 "failed to allocate tx data dmamap\n"); 5289 bus_dmamap_unload(txr->hn_tx_rndis_dtag, 5290 txd->rndis_pkt_dmap); 5291 bus_dmamem_free(txr->hn_tx_rndis_dtag, 5292 txd->rndis_pkt, txd->rndis_pkt_dmap); 5293 return error; 5294 } 5295 5296 /* All set, put it to list */ 5297 txd->flags |= HN_TXD_FLAG_ONLIST; 5298 #ifndef HN_USE_TXDESC_BUFRING 5299 SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link); 5300 #else 5301 buf_ring_enqueue(txr->hn_txdesc_br, txd); 5302 #endif 5303 } 5304 txr->hn_txdesc_avail = txr->hn_txdesc_cnt; 5305 5306 if (sc->hn_tx_sysctl_tree != NULL) { 5307 struct sysctl_oid_list *child; 5308 struct sysctl_ctx_list *ctx; 5309 char name[16]; 5310 5311 /* 5312 * Create per TX ring sysctl tree: 5313 * dev.hn.UNIT.tx.RINGID 5314 */ 5315 ctx = device_get_sysctl_ctx(dev); 5316 child = SYSCTL_CHILDREN(sc->hn_tx_sysctl_tree); 5317 5318 snprintf(name, sizeof(name), "%d", id); 5319 txr->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, 5320 name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 5321 5322 if (txr->hn_tx_sysctl_tree != NULL) { 5323 child = SYSCTL_CHILDREN(txr->hn_tx_sysctl_tree); 5324 5325 #ifdef HN_DEBUG 5326 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail", 5327 CTLFLAG_RD, &txr->hn_txdesc_avail, 0, 5328 "# of available TX descs"); 5329 #endif 5330 #ifdef HN_IFSTART_SUPPORT 5331 if (!hn_use_if_start) 5332 #endif 5333 { 5334 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "oactive", 5335 CTLFLAG_RD, &txr->hn_oactive, 0, 5336 "over active"); 5337 } 5338 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "packets", 5339 CTLFLAG_RW, &txr->hn_pkts, 5340 "# of packets transmitted"); 5341 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "sends", 5342 CTLFLAG_RW, &txr->hn_sends, "# of sends"); 5343 } 5344 } 5345 5346 return 0; 5347 } 5348 5349 static void 5350 hn_txdesc_dmamap_destroy(struct hn_txdesc *txd) 5351 { 5352 struct hn_tx_ring *txr = txd->txr; 5353 5354 KASSERT(txd->m == NULL, ("still has mbuf installed")); 5355 KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("still dma mapped")); 5356 5357 bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_pkt_dmap); 5358 bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_pkt, 5359 txd->rndis_pkt_dmap); 5360 bus_dmamap_destroy(txr->hn_tx_data_dtag, txd->data_dmap); 5361 } 5362 5363 static void 5364 hn_txdesc_gc(struct hn_tx_ring *txr, struct hn_txdesc *txd) 5365 { 5366 5367 KASSERT(txd->refs == 0 || txd->refs == 1, 5368 ("invalid txd refs %d", txd->refs)); 5369 5370 /* Aggregated txds will be freed by their aggregating txd. */ 5371 if (txd->refs > 0 && (txd->flags & HN_TXD_FLAG_ONAGG) == 0) { 5372 int freed; 5373 5374 freed = hn_txdesc_put(txr, txd); 5375 KASSERT(freed, ("can't free txdesc")); 5376 } 5377 } 5378 5379 static void 5380 hn_tx_ring_destroy(struct hn_tx_ring *txr) 5381 { 5382 int i; 5383 5384 if (txr->hn_txdesc == NULL) 5385 return; 5386 5387 /* 5388 * NOTE: 5389 * Because the freeing of aggregated txds will be deferred 5390 * to the aggregating txd, two passes are used here: 5391 * - The first pass GCes any pending txds. This GC is necessary, 5392 * since if the channels are revoked, hypervisor will not 5393 * deliver send-done for all pending txds. 5394 * - The second pass frees the busdma stuffs, i.e. after all txds 5395 * were freed. 5396 */ 5397 for (i = 0; i < txr->hn_txdesc_cnt; ++i) 5398 hn_txdesc_gc(txr, &txr->hn_txdesc[i]); 5399 for (i = 0; i < txr->hn_txdesc_cnt; ++i) 5400 hn_txdesc_dmamap_destroy(&txr->hn_txdesc[i]); 5401 5402 if (txr->hn_tx_data_dtag != NULL) 5403 bus_dma_tag_destroy(txr->hn_tx_data_dtag); 5404 if (txr->hn_tx_rndis_dtag != NULL) 5405 bus_dma_tag_destroy(txr->hn_tx_rndis_dtag); 5406 5407 #ifdef HN_USE_TXDESC_BUFRING 5408 buf_ring_free(txr->hn_txdesc_br, M_DEVBUF); 5409 #endif 5410 5411 free(txr->hn_txdesc, M_DEVBUF); 5412 txr->hn_txdesc = NULL; 5413 5414 if (txr->hn_mbuf_br != NULL) 5415 buf_ring_free(txr->hn_mbuf_br, M_DEVBUF); 5416 5417 #ifndef HN_USE_TXDESC_BUFRING 5418 mtx_destroy(&txr->hn_txlist_spin); 5419 #endif 5420 mtx_destroy(&txr->hn_tx_lock); 5421 } 5422 5423 static int 5424 hn_create_tx_data(struct hn_softc *sc, int ring_cnt) 5425 { 5426 struct sysctl_oid_list *child; 5427 struct sysctl_ctx_list *ctx; 5428 int i; 5429 5430 /* 5431 * Create TXBUF for chimney sending. 5432 * 5433 * NOTE: It is shared by all channels. 5434 */ 5435 sc->hn_chim = hyperv_dmamem_alloc(bus_get_dma_tag(sc->hn_dev), 5436 PAGE_SIZE, 0, HN_CHIM_SIZE, &sc->hn_chim_dma, 5437 BUS_DMA_WAITOK | BUS_DMA_ZERO); 5438 if (sc->hn_chim == NULL) { 5439 device_printf(sc->hn_dev, "allocate txbuf failed\n"); 5440 return (ENOMEM); 5441 } 5442 5443 sc->hn_tx_ring_cnt = ring_cnt; 5444 sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt; 5445 5446 sc->hn_tx_ring = malloc(sizeof(struct hn_tx_ring) * sc->hn_tx_ring_cnt, 5447 M_DEVBUF, M_WAITOK | M_ZERO); 5448 5449 ctx = device_get_sysctl_ctx(sc->hn_dev); 5450 child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->hn_dev)); 5451 5452 /* Create dev.hn.UNIT.tx sysctl tree */ 5453 sc->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "tx", 5454 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 5455 5456 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 5457 int error; 5458 5459 error = hn_tx_ring_create(sc, i); 5460 if (error) 5461 return error; 5462 } 5463 5464 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "no_txdescs", 5465 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5466 __offsetof(struct hn_tx_ring, hn_no_txdescs), 5467 hn_tx_stat_ulong_sysctl, "LU", "# of times short of TX descs"); 5468 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "send_failed", 5469 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5470 __offsetof(struct hn_tx_ring, hn_send_failed), 5471 hn_tx_stat_ulong_sysctl, "LU", "# of hyper-v sending failure"); 5472 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txdma_failed", 5473 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5474 __offsetof(struct hn_tx_ring, hn_txdma_failed), 5475 hn_tx_stat_ulong_sysctl, "LU", "# of TX DMA failure"); 5476 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_flush_failed", 5477 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5478 __offsetof(struct hn_tx_ring, hn_flush_failed), 5479 hn_tx_stat_ulong_sysctl, "LU", 5480 "# of packet transmission aggregation flush failure"); 5481 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_collapsed", 5482 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5483 __offsetof(struct hn_tx_ring, hn_tx_collapsed), 5484 hn_tx_stat_ulong_sysctl, "LU", "# of TX mbuf collapsed"); 5485 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney", 5486 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5487 __offsetof(struct hn_tx_ring, hn_tx_chimney), 5488 hn_tx_stat_ulong_sysctl, "LU", "# of chimney send"); 5489 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_tried", 5490 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5491 __offsetof(struct hn_tx_ring, hn_tx_chimney_tried), 5492 hn_tx_stat_ulong_sysctl, "LU", "# of chimney send tries"); 5493 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt", 5494 CTLFLAG_RD, &sc->hn_tx_ring[0].hn_txdesc_cnt, 0, 5495 "# of total TX descs"); 5496 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max", 5497 CTLFLAG_RD, &sc->hn_chim_szmax, 0, 5498 "Chimney send packet size upper boundary"); 5499 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size", 5500 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 5501 hn_chim_size_sysctl, "I", "Chimney send packet size limit"); 5502 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "direct_tx_size", 5503 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5504 __offsetof(struct hn_tx_ring, hn_direct_tx_size), 5505 hn_tx_conf_int_sysctl, "I", 5506 "Size of the packet for direct transmission"); 5507 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "sched_tx", 5508 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5509 __offsetof(struct hn_tx_ring, hn_sched_tx), 5510 hn_tx_conf_int_sysctl, "I", 5511 "Always schedule transmission " 5512 "instead of doing direct transmission"); 5513 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_cnt", 5514 CTLFLAG_RD, &sc->hn_tx_ring_cnt, 0, "# created TX rings"); 5515 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_inuse", 5516 CTLFLAG_RD, &sc->hn_tx_ring_inuse, 0, "# used TX rings"); 5517 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "agg_szmax", 5518 CTLFLAG_RD, &sc->hn_tx_ring[0].hn_agg_szmax, 0, 5519 "Applied packet transmission aggregation size"); 5520 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pktmax", 5521 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 5522 hn_txagg_pktmax_sysctl, "I", 5523 "Applied packet transmission aggregation packets"); 5524 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_align", 5525 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 5526 hn_txagg_align_sysctl, "I", 5527 "Applied packet transmission aggregation alignment"); 5528 5529 return 0; 5530 } 5531 5532 static void 5533 hn_set_chim_size(struct hn_softc *sc, int chim_size) 5534 { 5535 int i; 5536 5537 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 5538 sc->hn_tx_ring[i].hn_chim_size = chim_size; 5539 } 5540 5541 static void 5542 hn_set_tso_maxsize(struct hn_softc *sc, int tso_maxlen, int mtu) 5543 { 5544 struct ifnet *ifp = sc->hn_ifp; 5545 u_int hw_tsomax; 5546 int tso_minlen; 5547 5548 HN_LOCK_ASSERT(sc); 5549 5550 if ((ifp->if_capabilities & (IFCAP_TSO4 | IFCAP_TSO6)) == 0) 5551 return; 5552 5553 KASSERT(sc->hn_ndis_tso_sgmin >= 2, 5554 ("invalid NDIS tso sgmin %d", sc->hn_ndis_tso_sgmin)); 5555 tso_minlen = sc->hn_ndis_tso_sgmin * mtu; 5556 5557 KASSERT(sc->hn_ndis_tso_szmax >= tso_minlen && 5558 sc->hn_ndis_tso_szmax <= IP_MAXPACKET, 5559 ("invalid NDIS tso szmax %d", sc->hn_ndis_tso_szmax)); 5560 5561 if (tso_maxlen < tso_minlen) 5562 tso_maxlen = tso_minlen; 5563 else if (tso_maxlen > IP_MAXPACKET) 5564 tso_maxlen = IP_MAXPACKET; 5565 if (tso_maxlen > sc->hn_ndis_tso_szmax) 5566 tso_maxlen = sc->hn_ndis_tso_szmax; 5567 hw_tsomax = tso_maxlen - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); 5568 5569 if (hn_xpnt_vf_isready(sc)) { 5570 if (hw_tsomax > sc->hn_vf_ifp->if_hw_tsomax) 5571 hw_tsomax = sc->hn_vf_ifp->if_hw_tsomax; 5572 } 5573 ifp->if_hw_tsomax = hw_tsomax; 5574 if (bootverbose) 5575 if_printf(ifp, "TSO size max %u\n", ifp->if_hw_tsomax); 5576 } 5577 5578 static void 5579 hn_fixup_tx_data(struct hn_softc *sc) 5580 { 5581 uint64_t csum_assist; 5582 int i; 5583 5584 hn_set_chim_size(sc, sc->hn_chim_szmax); 5585 if (hn_tx_chimney_size > 0 && 5586 hn_tx_chimney_size < sc->hn_chim_szmax) 5587 hn_set_chim_size(sc, hn_tx_chimney_size); 5588 5589 csum_assist = 0; 5590 if (sc->hn_caps & HN_CAP_IPCS) 5591 csum_assist |= CSUM_IP; 5592 if (sc->hn_caps & HN_CAP_TCP4CS) 5593 csum_assist |= CSUM_IP_TCP; 5594 if ((sc->hn_caps & HN_CAP_UDP4CS) && hn_enable_udp4cs) 5595 csum_assist |= CSUM_IP_UDP; 5596 if (sc->hn_caps & HN_CAP_TCP6CS) 5597 csum_assist |= CSUM_IP6_TCP; 5598 if ((sc->hn_caps & HN_CAP_UDP6CS) && hn_enable_udp6cs) 5599 csum_assist |= CSUM_IP6_UDP; 5600 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 5601 sc->hn_tx_ring[i].hn_csum_assist = csum_assist; 5602 5603 if (sc->hn_caps & HN_CAP_HASHVAL) { 5604 /* 5605 * Support HASHVAL pktinfo on TX path. 5606 */ 5607 if (bootverbose) 5608 if_printf(sc->hn_ifp, "support HASHVAL pktinfo\n"); 5609 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 5610 sc->hn_tx_ring[i].hn_tx_flags |= HN_TX_FLAG_HASHVAL; 5611 } 5612 } 5613 5614 static void 5615 hn_fixup_rx_data(struct hn_softc *sc) 5616 { 5617 5618 if (sc->hn_caps & HN_CAP_UDPHASH) { 5619 int i; 5620 5621 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 5622 sc->hn_rx_ring[i].hn_rx_flags |= HN_RX_FLAG_UDP_HASH; 5623 } 5624 } 5625 5626 static void 5627 hn_destroy_tx_data(struct hn_softc *sc) 5628 { 5629 int i; 5630 5631 if (sc->hn_chim != NULL) { 5632 if ((sc->hn_flags & HN_FLAG_CHIM_REF) == 0) { 5633 hyperv_dmamem_free(&sc->hn_chim_dma, sc->hn_chim); 5634 } else { 5635 device_printf(sc->hn_dev, 5636 "chimney sending buffer is referenced"); 5637 } 5638 sc->hn_chim = NULL; 5639 } 5640 5641 if (sc->hn_tx_ring_cnt == 0) 5642 return; 5643 5644 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 5645 hn_tx_ring_destroy(&sc->hn_tx_ring[i]); 5646 5647 free(sc->hn_tx_ring, M_DEVBUF); 5648 sc->hn_tx_ring = NULL; 5649 5650 sc->hn_tx_ring_cnt = 0; 5651 sc->hn_tx_ring_inuse = 0; 5652 } 5653 5654 #ifdef HN_IFSTART_SUPPORT 5655 5656 static void 5657 hn_start_taskfunc(void *xtxr, int pending __unused) 5658 { 5659 struct hn_tx_ring *txr = xtxr; 5660 5661 mtx_lock(&txr->hn_tx_lock); 5662 hn_start_locked(txr, 0); 5663 mtx_unlock(&txr->hn_tx_lock); 5664 } 5665 5666 static int 5667 hn_start_locked(struct hn_tx_ring *txr, int len) 5668 { 5669 struct hn_softc *sc = txr->hn_sc; 5670 struct ifnet *ifp = sc->hn_ifp; 5671 int sched = 0; 5672 5673 KASSERT(hn_use_if_start, 5674 ("hn_start_locked is called, when if_start is disabled")); 5675 KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring")); 5676 mtx_assert(&txr->hn_tx_lock, MA_OWNED); 5677 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc")); 5678 5679 if (__predict_false(txr->hn_suspended)) 5680 return (0); 5681 5682 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != 5683 IFF_DRV_RUNNING) 5684 return (0); 5685 5686 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { 5687 struct hn_txdesc *txd; 5688 struct mbuf *m_head; 5689 int error; 5690 5691 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); 5692 if (m_head == NULL) 5693 break; 5694 5695 if (len > 0 && m_head->m_pkthdr.len > len) { 5696 /* 5697 * This sending could be time consuming; let callers 5698 * dispatch this packet sending (and sending of any 5699 * following up packets) to tx taskqueue. 5700 */ 5701 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 5702 sched = 1; 5703 break; 5704 } 5705 5706 #if defined(INET6) || defined(INET) 5707 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 5708 m_head = hn_tso_fixup(m_head); 5709 if (__predict_false(m_head == NULL)) { 5710 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 5711 continue; 5712 } 5713 } else if (m_head->m_pkthdr.csum_flags & 5714 (CSUM_IP_UDP | CSUM_IP_TCP | CSUM_IP6_UDP | CSUM_IP6_TCP)) { 5715 m_head = hn_set_hlen(m_head); 5716 if (__predict_false(m_head == NULL)) { 5717 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 5718 continue; 5719 } 5720 } 5721 #endif 5722 5723 txd = hn_txdesc_get(txr); 5724 if (txd == NULL) { 5725 txr->hn_no_txdescs++; 5726 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 5727 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 5728 break; 5729 } 5730 5731 error = hn_encap(ifp, txr, txd, &m_head); 5732 if (error) { 5733 /* Both txd and m_head are freed */ 5734 KASSERT(txr->hn_agg_txd == NULL, 5735 ("encap failed w/ pending aggregating txdesc")); 5736 continue; 5737 } 5738 5739 if (txr->hn_agg_pktleft == 0) { 5740 if (txr->hn_agg_txd != NULL) { 5741 KASSERT(m_head == NULL, 5742 ("pending mbuf for aggregating txdesc")); 5743 error = hn_flush_txagg(ifp, txr); 5744 if (__predict_false(error)) { 5745 atomic_set_int(&ifp->if_drv_flags, 5746 IFF_DRV_OACTIVE); 5747 break; 5748 } 5749 } else { 5750 KASSERT(m_head != NULL, ("mbuf was freed")); 5751 error = hn_txpkt(ifp, txr, txd); 5752 if (__predict_false(error)) { 5753 /* txd is freed, but m_head is not */ 5754 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 5755 atomic_set_int(&ifp->if_drv_flags, 5756 IFF_DRV_OACTIVE); 5757 break; 5758 } 5759 } 5760 } 5761 #ifdef INVARIANTS 5762 else { 5763 KASSERT(txr->hn_agg_txd != NULL, 5764 ("no aggregating txdesc")); 5765 KASSERT(m_head == NULL, 5766 ("pending mbuf for aggregating txdesc")); 5767 } 5768 #endif 5769 } 5770 5771 /* Flush pending aggerated transmission. */ 5772 if (txr->hn_agg_txd != NULL) 5773 hn_flush_txagg(ifp, txr); 5774 return (sched); 5775 } 5776 5777 static void 5778 hn_start(struct ifnet *ifp) 5779 { 5780 struct hn_softc *sc = ifp->if_softc; 5781 struct hn_tx_ring *txr = &sc->hn_tx_ring[0]; 5782 5783 if (txr->hn_sched_tx) 5784 goto do_sched; 5785 5786 if (mtx_trylock(&txr->hn_tx_lock)) { 5787 int sched; 5788 5789 sched = hn_start_locked(txr, txr->hn_direct_tx_size); 5790 mtx_unlock(&txr->hn_tx_lock); 5791 if (!sched) 5792 return; 5793 } 5794 do_sched: 5795 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); 5796 } 5797 5798 static void 5799 hn_start_txeof_taskfunc(void *xtxr, int pending __unused) 5800 { 5801 struct hn_tx_ring *txr = xtxr; 5802 5803 mtx_lock(&txr->hn_tx_lock); 5804 atomic_clear_int(&txr->hn_sc->hn_ifp->if_drv_flags, IFF_DRV_OACTIVE); 5805 hn_start_locked(txr, 0); 5806 mtx_unlock(&txr->hn_tx_lock); 5807 } 5808 5809 static void 5810 hn_start_txeof(struct hn_tx_ring *txr) 5811 { 5812 struct hn_softc *sc = txr->hn_sc; 5813 struct ifnet *ifp = sc->hn_ifp; 5814 5815 KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring")); 5816 5817 if (txr->hn_sched_tx) 5818 goto do_sched; 5819 5820 if (mtx_trylock(&txr->hn_tx_lock)) { 5821 int sched; 5822 5823 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 5824 sched = hn_start_locked(txr, txr->hn_direct_tx_size); 5825 mtx_unlock(&txr->hn_tx_lock); 5826 if (sched) { 5827 taskqueue_enqueue(txr->hn_tx_taskq, 5828 &txr->hn_tx_task); 5829 } 5830 } else { 5831 do_sched: 5832 /* 5833 * Release the OACTIVE earlier, with the hope, that 5834 * others could catch up. The task will clear the 5835 * flag again with the hn_tx_lock to avoid possible 5836 * races. 5837 */ 5838 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 5839 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); 5840 } 5841 } 5842 5843 #endif /* HN_IFSTART_SUPPORT */ 5844 5845 static int 5846 hn_xmit(struct hn_tx_ring *txr, int len) 5847 { 5848 struct hn_softc *sc = txr->hn_sc; 5849 struct ifnet *ifp = sc->hn_ifp; 5850 struct mbuf *m_head; 5851 int sched = 0; 5852 5853 mtx_assert(&txr->hn_tx_lock, MA_OWNED); 5854 #ifdef HN_IFSTART_SUPPORT 5855 KASSERT(hn_use_if_start == 0, 5856 ("hn_xmit is called, when if_start is enabled")); 5857 #endif 5858 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc")); 5859 5860 if (__predict_false(txr->hn_suspended)) 5861 return (0); 5862 5863 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || txr->hn_oactive) 5864 return (0); 5865 5866 while ((m_head = drbr_peek(ifp, txr->hn_mbuf_br)) != NULL) { 5867 struct hn_txdesc *txd; 5868 int error; 5869 5870 if (len > 0 && m_head->m_pkthdr.len > len) { 5871 /* 5872 * This sending could be time consuming; let callers 5873 * dispatch this packet sending (and sending of any 5874 * following up packets) to tx taskqueue. 5875 */ 5876 drbr_putback(ifp, txr->hn_mbuf_br, m_head); 5877 sched = 1; 5878 break; 5879 } 5880 5881 txd = hn_txdesc_get(txr); 5882 if (txd == NULL) { 5883 txr->hn_no_txdescs++; 5884 drbr_putback(ifp, txr->hn_mbuf_br, m_head); 5885 txr->hn_oactive = 1; 5886 break; 5887 } 5888 5889 error = hn_encap(ifp, txr, txd, &m_head); 5890 if (error) { 5891 /* Both txd and m_head are freed; discard */ 5892 KASSERT(txr->hn_agg_txd == NULL, 5893 ("encap failed w/ pending aggregating txdesc")); 5894 drbr_advance(ifp, txr->hn_mbuf_br); 5895 continue; 5896 } 5897 5898 if (txr->hn_agg_pktleft == 0) { 5899 if (txr->hn_agg_txd != NULL) { 5900 KASSERT(m_head == NULL, 5901 ("pending mbuf for aggregating txdesc")); 5902 error = hn_flush_txagg(ifp, txr); 5903 if (__predict_false(error)) { 5904 txr->hn_oactive = 1; 5905 break; 5906 } 5907 } else { 5908 KASSERT(m_head != NULL, ("mbuf was freed")); 5909 error = hn_txpkt(ifp, txr, txd); 5910 if (__predict_false(error)) { 5911 /* txd is freed, but m_head is not */ 5912 drbr_putback(ifp, txr->hn_mbuf_br, 5913 m_head); 5914 txr->hn_oactive = 1; 5915 break; 5916 } 5917 } 5918 } 5919 #ifdef INVARIANTS 5920 else { 5921 KASSERT(txr->hn_agg_txd != NULL, 5922 ("no aggregating txdesc")); 5923 KASSERT(m_head == NULL, 5924 ("pending mbuf for aggregating txdesc")); 5925 } 5926 #endif 5927 5928 /* Sent */ 5929 drbr_advance(ifp, txr->hn_mbuf_br); 5930 } 5931 5932 /* Flush pending aggerated transmission. */ 5933 if (txr->hn_agg_txd != NULL) 5934 hn_flush_txagg(ifp, txr); 5935 return (sched); 5936 } 5937 5938 static int 5939 hn_transmit(struct ifnet *ifp, struct mbuf *m) 5940 { 5941 struct hn_softc *sc = ifp->if_softc; 5942 struct hn_tx_ring *txr; 5943 int error, idx = 0; 5944 5945 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) { 5946 struct rm_priotracker pt; 5947 5948 rm_rlock(&sc->hn_vf_lock, &pt); 5949 if (__predict_true(sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) { 5950 struct mbuf *m_bpf = NULL; 5951 int obytes, omcast; 5952 5953 obytes = m->m_pkthdr.len; 5954 omcast = (m->m_flags & M_MCAST) != 0; 5955 5956 if (sc->hn_xvf_flags & HN_XVFFLAG_ACCBPF) { 5957 if (bpf_peers_present(ifp->if_bpf)) { 5958 m_bpf = m_copypacket(m, M_NOWAIT); 5959 if (m_bpf == NULL) { 5960 /* 5961 * Failed to grab a shallow 5962 * copy; tap now. 5963 */ 5964 ETHER_BPF_MTAP(ifp, m); 5965 } 5966 } 5967 } else { 5968 ETHER_BPF_MTAP(ifp, m); 5969 } 5970 5971 error = sc->hn_vf_ifp->if_transmit(sc->hn_vf_ifp, m); 5972 rm_runlock(&sc->hn_vf_lock, &pt); 5973 5974 if (m_bpf != NULL) { 5975 if (!error) 5976 ETHER_BPF_MTAP(ifp, m_bpf); 5977 m_freem(m_bpf); 5978 } 5979 5980 if (error == ENOBUFS) { 5981 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); 5982 } else if (error) { 5983 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 5984 } else { 5985 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 5986 if_inc_counter(ifp, IFCOUNTER_OBYTES, obytes); 5987 if (omcast) { 5988 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 5989 omcast); 5990 } 5991 } 5992 return (error); 5993 } 5994 rm_runlock(&sc->hn_vf_lock, &pt); 5995 } 5996 5997 #if defined(INET6) || defined(INET) 5998 /* 5999 * Perform TSO packet header fixup or get l2/l3 header length now, 6000 * since packet headers should be cache-hot. 6001 */ 6002 if (m->m_pkthdr.csum_flags & CSUM_TSO) { 6003 m = hn_tso_fixup(m); 6004 if (__predict_false(m == NULL)) { 6005 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 6006 return EIO; 6007 } 6008 } else if (m->m_pkthdr.csum_flags & 6009 (CSUM_IP_UDP | CSUM_IP_TCP | CSUM_IP6_UDP | CSUM_IP6_TCP)) { 6010 m = hn_set_hlen(m); 6011 if (__predict_false(m == NULL)) { 6012 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 6013 return EIO; 6014 } 6015 } 6016 #endif 6017 6018 /* 6019 * Select the TX ring based on flowid 6020 */ 6021 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 6022 #ifdef RSS 6023 uint32_t bid; 6024 6025 if (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m), 6026 &bid) == 0) 6027 idx = bid % sc->hn_tx_ring_inuse; 6028 else 6029 #endif 6030 { 6031 #if defined(INET6) || defined(INET) 6032 int tcpsyn = 0; 6033 6034 if (m->m_pkthdr.len < 128 && 6035 (m->m_pkthdr.csum_flags & 6036 (CSUM_IP_TCP | CSUM_IP6_TCP)) && 6037 (m->m_pkthdr.csum_flags & CSUM_TSO) == 0) { 6038 m = hn_check_tcpsyn(m, &tcpsyn); 6039 if (__predict_false(m == NULL)) { 6040 if_inc_counter(ifp, 6041 IFCOUNTER_OERRORS, 1); 6042 return (EIO); 6043 } 6044 } 6045 #else 6046 const int tcpsyn = 0; 6047 #endif 6048 if (tcpsyn) 6049 idx = 0; 6050 else 6051 idx = m->m_pkthdr.flowid % sc->hn_tx_ring_inuse; 6052 } 6053 } 6054 txr = &sc->hn_tx_ring[idx]; 6055 6056 error = drbr_enqueue(ifp, txr->hn_mbuf_br, m); 6057 if (error) { 6058 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); 6059 return error; 6060 } 6061 6062 if (txr->hn_oactive) 6063 return 0; 6064 6065 if (txr->hn_sched_tx) 6066 goto do_sched; 6067 6068 if (mtx_trylock(&txr->hn_tx_lock)) { 6069 int sched; 6070 6071 sched = hn_xmit(txr, txr->hn_direct_tx_size); 6072 mtx_unlock(&txr->hn_tx_lock); 6073 if (!sched) 6074 return 0; 6075 } 6076 do_sched: 6077 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); 6078 return 0; 6079 } 6080 6081 static void 6082 hn_tx_ring_qflush(struct hn_tx_ring *txr) 6083 { 6084 struct mbuf *m; 6085 6086 mtx_lock(&txr->hn_tx_lock); 6087 while ((m = buf_ring_dequeue_sc(txr->hn_mbuf_br)) != NULL) 6088 m_freem(m); 6089 mtx_unlock(&txr->hn_tx_lock); 6090 } 6091 6092 static void 6093 hn_xmit_qflush(struct ifnet *ifp) 6094 { 6095 struct hn_softc *sc = ifp->if_softc; 6096 struct rm_priotracker pt; 6097 int i; 6098 6099 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) 6100 hn_tx_ring_qflush(&sc->hn_tx_ring[i]); 6101 if_qflush(ifp); 6102 6103 rm_rlock(&sc->hn_vf_lock, &pt); 6104 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) 6105 sc->hn_vf_ifp->if_qflush(sc->hn_vf_ifp); 6106 rm_runlock(&sc->hn_vf_lock, &pt); 6107 } 6108 6109 static void 6110 hn_xmit_txeof(struct hn_tx_ring *txr) 6111 { 6112 6113 if (txr->hn_sched_tx) 6114 goto do_sched; 6115 6116 if (mtx_trylock(&txr->hn_tx_lock)) { 6117 int sched; 6118 6119 txr->hn_oactive = 0; 6120 sched = hn_xmit(txr, txr->hn_direct_tx_size); 6121 mtx_unlock(&txr->hn_tx_lock); 6122 if (sched) { 6123 taskqueue_enqueue(txr->hn_tx_taskq, 6124 &txr->hn_tx_task); 6125 } 6126 } else { 6127 do_sched: 6128 /* 6129 * Release the oactive earlier, with the hope, that 6130 * others could catch up. The task will clear the 6131 * oactive again with the hn_tx_lock to avoid possible 6132 * races. 6133 */ 6134 txr->hn_oactive = 0; 6135 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); 6136 } 6137 } 6138 6139 static void 6140 hn_xmit_taskfunc(void *xtxr, int pending __unused) 6141 { 6142 struct hn_tx_ring *txr = xtxr; 6143 6144 mtx_lock(&txr->hn_tx_lock); 6145 hn_xmit(txr, 0); 6146 mtx_unlock(&txr->hn_tx_lock); 6147 } 6148 6149 static void 6150 hn_xmit_txeof_taskfunc(void *xtxr, int pending __unused) 6151 { 6152 struct hn_tx_ring *txr = xtxr; 6153 6154 mtx_lock(&txr->hn_tx_lock); 6155 txr->hn_oactive = 0; 6156 hn_xmit(txr, 0); 6157 mtx_unlock(&txr->hn_tx_lock); 6158 } 6159 6160 static int 6161 hn_chan_attach(struct hn_softc *sc, struct vmbus_channel *chan) 6162 { 6163 struct vmbus_chan_br cbr; 6164 struct hn_rx_ring *rxr; 6165 struct hn_tx_ring *txr = NULL; 6166 int idx, error; 6167 6168 idx = vmbus_chan_subidx(chan); 6169 6170 /* 6171 * Link this channel to RX/TX ring. 6172 */ 6173 KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse, 6174 ("invalid channel index %d, should > 0 && < %d", 6175 idx, sc->hn_rx_ring_inuse)); 6176 rxr = &sc->hn_rx_ring[idx]; 6177 KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED) == 0, 6178 ("RX ring %d already attached", idx)); 6179 rxr->hn_rx_flags |= HN_RX_FLAG_ATTACHED; 6180 rxr->hn_chan = chan; 6181 6182 if (bootverbose) { 6183 if_printf(sc->hn_ifp, "link RX ring %d to chan%u\n", 6184 idx, vmbus_chan_id(chan)); 6185 } 6186 6187 if (idx < sc->hn_tx_ring_inuse) { 6188 txr = &sc->hn_tx_ring[idx]; 6189 KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED) == 0, 6190 ("TX ring %d already attached", idx)); 6191 txr->hn_tx_flags |= HN_TX_FLAG_ATTACHED; 6192 6193 txr->hn_chan = chan; 6194 if (bootverbose) { 6195 if_printf(sc->hn_ifp, "link TX ring %d to chan%u\n", 6196 idx, vmbus_chan_id(chan)); 6197 } 6198 } 6199 6200 /* Bind this channel to a proper CPU. */ 6201 vmbus_chan_cpu_set(chan, HN_RING_IDX2CPU(sc, idx)); 6202 6203 /* 6204 * Open this channel 6205 */ 6206 cbr.cbr = rxr->hn_br; 6207 cbr.cbr_paddr = rxr->hn_br_dma.hv_paddr; 6208 cbr.cbr_txsz = HN_TXBR_SIZE; 6209 cbr.cbr_rxsz = HN_RXBR_SIZE; 6210 error = vmbus_chan_open_br(chan, &cbr, NULL, 0, hn_chan_callback, rxr); 6211 if (error) { 6212 if (error == EISCONN) { 6213 if_printf(sc->hn_ifp, "bufring is connected after " 6214 "chan%u open failure\n", vmbus_chan_id(chan)); 6215 rxr->hn_rx_flags |= HN_RX_FLAG_BR_REF; 6216 } else { 6217 if_printf(sc->hn_ifp, "open chan%u failed: %d\n", 6218 vmbus_chan_id(chan), error); 6219 } 6220 } 6221 return (error); 6222 } 6223 6224 static void 6225 hn_chan_detach(struct hn_softc *sc, struct vmbus_channel *chan) 6226 { 6227 struct hn_rx_ring *rxr; 6228 int idx, error; 6229 6230 idx = vmbus_chan_subidx(chan); 6231 6232 /* 6233 * Link this channel to RX/TX ring. 6234 */ 6235 KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse, 6236 ("invalid channel index %d, should > 0 && < %d", 6237 idx, sc->hn_rx_ring_inuse)); 6238 rxr = &sc->hn_rx_ring[idx]; 6239 KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED), 6240 ("RX ring %d is not attached", idx)); 6241 rxr->hn_rx_flags &= ~HN_RX_FLAG_ATTACHED; 6242 6243 if (idx < sc->hn_tx_ring_inuse) { 6244 struct hn_tx_ring *txr = &sc->hn_tx_ring[idx]; 6245 6246 KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED), 6247 ("TX ring %d is not attached attached", idx)); 6248 txr->hn_tx_flags &= ~HN_TX_FLAG_ATTACHED; 6249 } 6250 6251 /* 6252 * Close this channel. 6253 * 6254 * NOTE: 6255 * Channel closing does _not_ destroy the target channel. 6256 */ 6257 error = vmbus_chan_close_direct(chan); 6258 if (error == EISCONN) { 6259 if_printf(sc->hn_ifp, "chan%u bufring is connected " 6260 "after being closed\n", vmbus_chan_id(chan)); 6261 rxr->hn_rx_flags |= HN_RX_FLAG_BR_REF; 6262 } else if (error) { 6263 if_printf(sc->hn_ifp, "chan%u close failed: %d\n", 6264 vmbus_chan_id(chan), error); 6265 } 6266 } 6267 6268 static int 6269 hn_attach_subchans(struct hn_softc *sc) 6270 { 6271 struct vmbus_channel **subchans; 6272 int subchan_cnt = sc->hn_rx_ring_inuse - 1; 6273 int i, error = 0; 6274 6275 KASSERT(subchan_cnt > 0, ("no sub-channels")); 6276 6277 /* Attach the sub-channels. */ 6278 subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt); 6279 for (i = 0; i < subchan_cnt; ++i) { 6280 int error1; 6281 6282 error1 = hn_chan_attach(sc, subchans[i]); 6283 if (error1) { 6284 error = error1; 6285 /* Move on; all channels will be detached later. */ 6286 } 6287 } 6288 vmbus_subchan_rel(subchans, subchan_cnt); 6289 6290 if (error) { 6291 if_printf(sc->hn_ifp, "sub-channels attach failed: %d\n", error); 6292 } else { 6293 if (bootverbose) { 6294 if_printf(sc->hn_ifp, "%d sub-channels attached\n", 6295 subchan_cnt); 6296 } 6297 } 6298 return (error); 6299 } 6300 6301 static void 6302 hn_detach_allchans(struct hn_softc *sc) 6303 { 6304 struct vmbus_channel **subchans; 6305 int subchan_cnt = sc->hn_rx_ring_inuse - 1; 6306 int i; 6307 6308 if (subchan_cnt == 0) 6309 goto back; 6310 6311 /* Detach the sub-channels. */ 6312 subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt); 6313 for (i = 0; i < subchan_cnt; ++i) 6314 hn_chan_detach(sc, subchans[i]); 6315 vmbus_subchan_rel(subchans, subchan_cnt); 6316 6317 back: 6318 /* 6319 * Detach the primary channel, _after_ all sub-channels 6320 * are detached. 6321 */ 6322 hn_chan_detach(sc, sc->hn_prichan); 6323 6324 /* Wait for sub-channels to be destroyed, if any. */ 6325 vmbus_subchan_drain(sc->hn_prichan); 6326 6327 #ifdef INVARIANTS 6328 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 6329 KASSERT((sc->hn_rx_ring[i].hn_rx_flags & 6330 HN_RX_FLAG_ATTACHED) == 0, 6331 ("%dth RX ring is still attached", i)); 6332 } 6333 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 6334 KASSERT((sc->hn_tx_ring[i].hn_tx_flags & 6335 HN_TX_FLAG_ATTACHED) == 0, 6336 ("%dth TX ring is still attached", i)); 6337 } 6338 #endif 6339 } 6340 6341 static int 6342 hn_synth_alloc_subchans(struct hn_softc *sc, int *nsubch) 6343 { 6344 struct vmbus_channel **subchans; 6345 int nchan, rxr_cnt, error; 6346 6347 nchan = *nsubch + 1; 6348 if (nchan == 1) { 6349 /* 6350 * Multiple RX/TX rings are not requested. 6351 */ 6352 *nsubch = 0; 6353 return (0); 6354 } 6355 6356 /* 6357 * Query RSS capabilities, e.g. # of RX rings, and # of indirect 6358 * table entries. 6359 */ 6360 error = hn_rndis_query_rsscaps(sc, &rxr_cnt); 6361 if (error) { 6362 /* No RSS; this is benign. */ 6363 *nsubch = 0; 6364 return (0); 6365 } 6366 if (bootverbose) { 6367 if_printf(sc->hn_ifp, "RX rings offered %u, requested %d\n", 6368 rxr_cnt, nchan); 6369 } 6370 6371 if (nchan > rxr_cnt) 6372 nchan = rxr_cnt; 6373 if (nchan == 1) { 6374 if_printf(sc->hn_ifp, "only 1 channel is supported, no vRSS\n"); 6375 *nsubch = 0; 6376 return (0); 6377 } 6378 6379 /* 6380 * Allocate sub-channels from NVS. 6381 */ 6382 *nsubch = nchan - 1; 6383 error = hn_nvs_alloc_subchans(sc, nsubch); 6384 if (error || *nsubch == 0) { 6385 /* Failed to allocate sub-channels. */ 6386 *nsubch = 0; 6387 return (0); 6388 } 6389 6390 /* 6391 * Wait for all sub-channels to become ready before moving on. 6392 */ 6393 subchans = vmbus_subchan_get(sc->hn_prichan, *nsubch); 6394 vmbus_subchan_rel(subchans, *nsubch); 6395 return (0); 6396 } 6397 6398 static bool 6399 hn_synth_attachable(const struct hn_softc *sc) 6400 { 6401 int i; 6402 6403 if (sc->hn_flags & HN_FLAG_ERRORS) 6404 return (false); 6405 6406 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 6407 const struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 6408 6409 if (rxr->hn_rx_flags & HN_RX_FLAG_BR_REF) 6410 return (false); 6411 } 6412 return (true); 6413 } 6414 6415 /* 6416 * Make sure that the RX filter is zero after the successful 6417 * RNDIS initialization. 6418 * 6419 * NOTE: 6420 * Under certain conditions on certain versions of Hyper-V, 6421 * the RNDIS rxfilter is _not_ zero on the hypervisor side 6422 * after the successful RNDIS initialization, which breaks 6423 * the assumption of any following code (well, it breaks the 6424 * RNDIS API contract actually). Clear the RNDIS rxfilter 6425 * explicitly, drain packets sneaking through, and drain the 6426 * interrupt taskqueues scheduled due to the stealth packets. 6427 */ 6428 static void 6429 hn_rndis_init_fixat(struct hn_softc *sc, int nchan) 6430 { 6431 6432 hn_disable_rx(sc); 6433 hn_drain_rxtx(sc, nchan); 6434 } 6435 6436 static int 6437 hn_synth_attach(struct hn_softc *sc, int mtu) 6438 { 6439 #define ATTACHED_NVS 0x0002 6440 #define ATTACHED_RNDIS 0x0004 6441 6442 struct ndis_rssprm_toeplitz *rss = &sc->hn_rss; 6443 int error, nsubch, nchan = 1, i, rndis_inited; 6444 uint32_t old_caps, attached = 0; 6445 6446 KASSERT((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0, 6447 ("synthetic parts were attached")); 6448 6449 if (!hn_synth_attachable(sc)) 6450 return (ENXIO); 6451 6452 /* Save capabilities for later verification. */ 6453 old_caps = sc->hn_caps; 6454 sc->hn_caps = 0; 6455 6456 /* Clear RSS stuffs. */ 6457 sc->hn_rss_ind_size = 0; 6458 sc->hn_rss_hash = 0; 6459 sc->hn_rss_hcap = 0; 6460 6461 /* 6462 * Attach the primary channel _before_ attaching NVS and RNDIS. 6463 */ 6464 error = hn_chan_attach(sc, sc->hn_prichan); 6465 if (error) 6466 goto failed; 6467 6468 /* 6469 * Attach NVS. 6470 */ 6471 error = hn_nvs_attach(sc, mtu); 6472 if (error) 6473 goto failed; 6474 attached |= ATTACHED_NVS; 6475 6476 /* 6477 * Attach RNDIS _after_ NVS is attached. 6478 */ 6479 error = hn_rndis_attach(sc, mtu, &rndis_inited); 6480 if (rndis_inited) 6481 attached |= ATTACHED_RNDIS; 6482 if (error) 6483 goto failed; 6484 6485 /* 6486 * Make sure capabilities are not changed. 6487 */ 6488 if (device_is_attached(sc->hn_dev) && old_caps != sc->hn_caps) { 6489 if_printf(sc->hn_ifp, "caps mismatch old 0x%08x, new 0x%08x\n", 6490 old_caps, sc->hn_caps); 6491 error = ENXIO; 6492 goto failed; 6493 } 6494 6495 /* 6496 * Allocate sub-channels for multi-TX/RX rings. 6497 * 6498 * NOTE: 6499 * The # of RX rings that can be used is equivalent to the # of 6500 * channels to be requested. 6501 */ 6502 nsubch = sc->hn_rx_ring_cnt - 1; 6503 error = hn_synth_alloc_subchans(sc, &nsubch); 6504 if (error) 6505 goto failed; 6506 /* NOTE: _Full_ synthetic parts detach is required now. */ 6507 sc->hn_flags |= HN_FLAG_SYNTH_ATTACHED; 6508 6509 /* 6510 * Set the # of TX/RX rings that could be used according to 6511 * the # of channels that NVS offered. 6512 */ 6513 nchan = nsubch + 1; 6514 hn_set_ring_inuse(sc, nchan); 6515 if (nchan == 1) { 6516 /* Only the primary channel can be used; done */ 6517 goto back; 6518 } 6519 6520 /* 6521 * Attach the sub-channels. 6522 * 6523 * NOTE: hn_set_ring_inuse() _must_ have been called. 6524 */ 6525 error = hn_attach_subchans(sc); 6526 if (error) 6527 goto failed; 6528 6529 /* 6530 * Configure RSS key and indirect table _after_ all sub-channels 6531 * are attached. 6532 */ 6533 if ((sc->hn_flags & HN_FLAG_HAS_RSSKEY) == 0) { 6534 /* 6535 * RSS key is not set yet; set it to the default RSS key. 6536 */ 6537 if (bootverbose) 6538 if_printf(sc->hn_ifp, "setup default RSS key\n"); 6539 #ifdef RSS 6540 rss_getkey(rss->rss_key); 6541 #else 6542 memcpy(rss->rss_key, hn_rss_key_default, sizeof(rss->rss_key)); 6543 #endif 6544 sc->hn_flags |= HN_FLAG_HAS_RSSKEY; 6545 } 6546 6547 if ((sc->hn_flags & HN_FLAG_HAS_RSSIND) == 0) { 6548 /* 6549 * RSS indirect table is not set yet; set it up in round- 6550 * robin fashion. 6551 */ 6552 if (bootverbose) { 6553 if_printf(sc->hn_ifp, "setup default RSS indirect " 6554 "table\n"); 6555 } 6556 for (i = 0; i < NDIS_HASH_INDCNT; ++i) { 6557 uint32_t subidx; 6558 6559 #ifdef RSS 6560 subidx = rss_get_indirection_to_bucket(i); 6561 #else 6562 subidx = i; 6563 #endif 6564 rss->rss_ind[i] = subidx % nchan; 6565 } 6566 sc->hn_flags |= HN_FLAG_HAS_RSSIND; 6567 } else { 6568 /* 6569 * # of usable channels may be changed, so we have to 6570 * make sure that all entries in RSS indirect table 6571 * are valid. 6572 * 6573 * NOTE: hn_set_ring_inuse() _must_ have been called. 6574 */ 6575 hn_rss_ind_fixup(sc); 6576 } 6577 6578 sc->hn_rss_hash = sc->hn_rss_hcap; 6579 if ((sc->hn_flags & HN_FLAG_RXVF) || 6580 (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) { 6581 /* NOTE: Don't reconfigure RSS; will do immediately. */ 6582 hn_vf_rss_fixup(sc, false); 6583 } 6584 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE); 6585 if (error) 6586 goto failed; 6587 back: 6588 /* 6589 * Fixup transmission aggregation setup. 6590 */ 6591 hn_set_txagg(sc); 6592 hn_rndis_init_fixat(sc, nchan); 6593 return (0); 6594 6595 failed: 6596 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) { 6597 hn_rndis_init_fixat(sc, nchan); 6598 hn_synth_detach(sc); 6599 } else { 6600 if (attached & ATTACHED_RNDIS) { 6601 hn_rndis_init_fixat(sc, nchan); 6602 hn_rndis_detach(sc); 6603 } 6604 if (attached & ATTACHED_NVS) 6605 hn_nvs_detach(sc); 6606 hn_chan_detach(sc, sc->hn_prichan); 6607 /* Restore old capabilities. */ 6608 sc->hn_caps = old_caps; 6609 } 6610 return (error); 6611 6612 #undef ATTACHED_RNDIS 6613 #undef ATTACHED_NVS 6614 } 6615 6616 /* 6617 * NOTE: 6618 * The interface must have been suspended though hn_suspend(), before 6619 * this function get called. 6620 */ 6621 static void 6622 hn_synth_detach(struct hn_softc *sc) 6623 { 6624 6625 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, 6626 ("synthetic parts were not attached")); 6627 6628 /* Detach the RNDIS first. */ 6629 hn_rndis_detach(sc); 6630 6631 /* Detach NVS. */ 6632 hn_nvs_detach(sc); 6633 6634 /* Detach all of the channels. */ 6635 hn_detach_allchans(sc); 6636 6637 if (vmbus_current_version >= VMBUS_VERSION_WIN10 && sc->hn_rxbuf_gpadl != 0) { 6638 /* 6639 * Host is post-Win2016, disconnect RXBUF from primary channel here. 6640 */ 6641 int error; 6642 6643 error = vmbus_chan_gpadl_disconnect(sc->hn_prichan, 6644 sc->hn_rxbuf_gpadl); 6645 if (error) { 6646 if_printf(sc->hn_ifp, 6647 "rxbuf gpadl disconn failed: %d\n", error); 6648 sc->hn_flags |= HN_FLAG_RXBUF_REF; 6649 } 6650 sc->hn_rxbuf_gpadl = 0; 6651 } 6652 6653 if (vmbus_current_version >= VMBUS_VERSION_WIN10 && sc->hn_chim_gpadl != 0) { 6654 /* 6655 * Host is post-Win2016, disconnect chimney sending buffer from 6656 * primary channel here. 6657 */ 6658 int error; 6659 6660 error = vmbus_chan_gpadl_disconnect(sc->hn_prichan, 6661 sc->hn_chim_gpadl); 6662 if (error) { 6663 if_printf(sc->hn_ifp, 6664 "chim gpadl disconn failed: %d\n", error); 6665 sc->hn_flags |= HN_FLAG_CHIM_REF; 6666 } 6667 sc->hn_chim_gpadl = 0; 6668 } 6669 sc->hn_flags &= ~HN_FLAG_SYNTH_ATTACHED; 6670 } 6671 6672 static void 6673 hn_set_ring_inuse(struct hn_softc *sc, int ring_cnt) 6674 { 6675 KASSERT(ring_cnt > 0 && ring_cnt <= sc->hn_rx_ring_cnt, 6676 ("invalid ring count %d", ring_cnt)); 6677 6678 if (sc->hn_tx_ring_cnt > ring_cnt) 6679 sc->hn_tx_ring_inuse = ring_cnt; 6680 else 6681 sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt; 6682 sc->hn_rx_ring_inuse = ring_cnt; 6683 6684 #ifdef RSS 6685 if (sc->hn_rx_ring_inuse != rss_getnumbuckets()) { 6686 if_printf(sc->hn_ifp, "# of RX rings (%d) does not match " 6687 "# of RSS buckets (%d)\n", sc->hn_rx_ring_inuse, 6688 rss_getnumbuckets()); 6689 } 6690 #endif 6691 6692 if (bootverbose) { 6693 if_printf(sc->hn_ifp, "%d TX ring, %d RX ring\n", 6694 sc->hn_tx_ring_inuse, sc->hn_rx_ring_inuse); 6695 } 6696 } 6697 6698 static void 6699 hn_chan_drain(struct hn_softc *sc, struct vmbus_channel *chan) 6700 { 6701 6702 /* 6703 * NOTE: 6704 * The TX bufring will not be drained by the hypervisor, 6705 * if the primary channel is revoked. 6706 */ 6707 while (!vmbus_chan_rx_empty(chan) || 6708 (!vmbus_chan_is_revoked(sc->hn_prichan) && 6709 !vmbus_chan_tx_empty(chan))) 6710 pause("waitch", 1); 6711 vmbus_chan_intr_drain(chan); 6712 } 6713 6714 static void 6715 hn_disable_rx(struct hn_softc *sc) 6716 { 6717 6718 /* 6719 * Disable RX by clearing RX filter forcefully. 6720 */ 6721 sc->hn_rx_filter = NDIS_PACKET_TYPE_NONE; 6722 hn_rndis_set_rxfilter(sc, sc->hn_rx_filter); /* ignore error */ 6723 6724 /* 6725 * Give RNDIS enough time to flush all pending data packets. 6726 */ 6727 pause("waitrx", (200 * hz) / 1000); 6728 } 6729 6730 /* 6731 * NOTE: 6732 * RX/TX _must_ have been suspended/disabled, before this function 6733 * is called. 6734 */ 6735 static void 6736 hn_drain_rxtx(struct hn_softc *sc, int nchan) 6737 { 6738 struct vmbus_channel **subch = NULL; 6739 int nsubch; 6740 6741 /* 6742 * Drain RX/TX bufrings and interrupts. 6743 */ 6744 nsubch = nchan - 1; 6745 if (nsubch > 0) 6746 subch = vmbus_subchan_get(sc->hn_prichan, nsubch); 6747 6748 if (subch != NULL) { 6749 int i; 6750 6751 for (i = 0; i < nsubch; ++i) 6752 hn_chan_drain(sc, subch[i]); 6753 } 6754 hn_chan_drain(sc, sc->hn_prichan); 6755 6756 if (subch != NULL) 6757 vmbus_subchan_rel(subch, nsubch); 6758 } 6759 6760 static void 6761 hn_suspend_data(struct hn_softc *sc) 6762 { 6763 struct hn_tx_ring *txr; 6764 int i; 6765 6766 HN_LOCK_ASSERT(sc); 6767 6768 /* 6769 * Suspend TX. 6770 */ 6771 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { 6772 txr = &sc->hn_tx_ring[i]; 6773 6774 mtx_lock(&txr->hn_tx_lock); 6775 txr->hn_suspended = 1; 6776 mtx_unlock(&txr->hn_tx_lock); 6777 /* No one is able send more packets now. */ 6778 6779 /* 6780 * Wait for all pending sends to finish. 6781 * 6782 * NOTE: 6783 * We will _not_ receive all pending send-done, if the 6784 * primary channel is revoked. 6785 */ 6786 while (hn_tx_ring_pending(txr) && 6787 !vmbus_chan_is_revoked(sc->hn_prichan)) 6788 pause("hnwtx", 1 /* 1 tick */); 6789 } 6790 6791 /* 6792 * Disable RX. 6793 */ 6794 hn_disable_rx(sc); 6795 6796 /* 6797 * Drain RX/TX. 6798 */ 6799 hn_drain_rxtx(sc, sc->hn_rx_ring_inuse); 6800 6801 /* 6802 * Drain any pending TX tasks. 6803 * 6804 * NOTE: 6805 * The above hn_drain_rxtx() can dispatch TX tasks, so the TX 6806 * tasks will have to be drained _after_ the above hn_drain_rxtx(). 6807 */ 6808 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { 6809 txr = &sc->hn_tx_ring[i]; 6810 6811 taskqueue_drain(txr->hn_tx_taskq, &txr->hn_tx_task); 6812 taskqueue_drain(txr->hn_tx_taskq, &txr->hn_txeof_task); 6813 } 6814 } 6815 6816 static void 6817 hn_suspend_mgmt_taskfunc(void *xsc, int pending __unused) 6818 { 6819 6820 ((struct hn_softc *)xsc)->hn_mgmt_taskq = NULL; 6821 } 6822 6823 static void 6824 hn_suspend_mgmt(struct hn_softc *sc) 6825 { 6826 struct task task; 6827 6828 HN_LOCK_ASSERT(sc); 6829 6830 /* 6831 * Make sure that hn_mgmt_taskq0 can nolonger be accessed 6832 * through hn_mgmt_taskq. 6833 */ 6834 TASK_INIT(&task, 0, hn_suspend_mgmt_taskfunc, sc); 6835 vmbus_chan_run_task(sc->hn_prichan, &task); 6836 6837 /* 6838 * Make sure that all pending management tasks are completed. 6839 */ 6840 taskqueue_drain(sc->hn_mgmt_taskq0, &sc->hn_netchg_init); 6841 taskqueue_drain_timeout(sc->hn_mgmt_taskq0, &sc->hn_netchg_status); 6842 taskqueue_drain_all(sc->hn_mgmt_taskq0); 6843 } 6844 6845 static void 6846 hn_suspend(struct hn_softc *sc) 6847 { 6848 6849 /* Disable polling. */ 6850 hn_polling(sc, 0); 6851 6852 /* 6853 * If the non-transparent mode VF is activated, the synthetic 6854 * device is receiving packets, so the data path of the 6855 * synthetic device must be suspended. 6856 */ 6857 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) || 6858 (sc->hn_flags & HN_FLAG_RXVF)) 6859 hn_suspend_data(sc); 6860 hn_suspend_mgmt(sc); 6861 } 6862 6863 static void 6864 hn_resume_tx(struct hn_softc *sc, int tx_ring_cnt) 6865 { 6866 int i; 6867 6868 KASSERT(tx_ring_cnt <= sc->hn_tx_ring_cnt, 6869 ("invalid TX ring count %d", tx_ring_cnt)); 6870 6871 for (i = 0; i < tx_ring_cnt; ++i) { 6872 struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; 6873 6874 mtx_lock(&txr->hn_tx_lock); 6875 txr->hn_suspended = 0; 6876 mtx_unlock(&txr->hn_tx_lock); 6877 } 6878 } 6879 6880 static void 6881 hn_resume_data(struct hn_softc *sc) 6882 { 6883 int i; 6884 6885 HN_LOCK_ASSERT(sc); 6886 6887 /* 6888 * Re-enable RX. 6889 */ 6890 hn_rxfilter_config(sc); 6891 6892 /* 6893 * Make sure to clear suspend status on "all" TX rings, 6894 * since hn_tx_ring_inuse can be changed after 6895 * hn_suspend_data(). 6896 */ 6897 hn_resume_tx(sc, sc->hn_tx_ring_cnt); 6898 6899 #ifdef HN_IFSTART_SUPPORT 6900 if (!hn_use_if_start) 6901 #endif 6902 { 6903 /* 6904 * Flush unused drbrs, since hn_tx_ring_inuse may be 6905 * reduced. 6906 */ 6907 for (i = sc->hn_tx_ring_inuse; i < sc->hn_tx_ring_cnt; ++i) 6908 hn_tx_ring_qflush(&sc->hn_tx_ring[i]); 6909 } 6910 6911 /* 6912 * Kick start TX. 6913 */ 6914 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { 6915 struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; 6916 6917 /* 6918 * Use txeof task, so that any pending oactive can be 6919 * cleared properly. 6920 */ 6921 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); 6922 } 6923 } 6924 6925 static void 6926 hn_resume_mgmt(struct hn_softc *sc) 6927 { 6928 6929 sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0; 6930 6931 /* 6932 * Kick off network change detection, if it was pending. 6933 * If no network change was pending, start link status 6934 * checks, which is more lightweight than network change 6935 * detection. 6936 */ 6937 if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG) 6938 hn_change_network(sc); 6939 else 6940 hn_update_link_status(sc); 6941 } 6942 6943 static void 6944 hn_resume(struct hn_softc *sc) 6945 { 6946 6947 /* 6948 * If the non-transparent mode VF is activated, the synthetic 6949 * device have to receive packets, so the data path of the 6950 * synthetic device must be resumed. 6951 */ 6952 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) || 6953 (sc->hn_flags & HN_FLAG_RXVF)) 6954 hn_resume_data(sc); 6955 6956 /* 6957 * Don't resume link status change if VF is attached/activated. 6958 * - In the non-transparent VF mode, the synthetic device marks 6959 * link down until the VF is deactivated; i.e. VF is down. 6960 * - In transparent VF mode, VF's media status is used until 6961 * the VF is detached. 6962 */ 6963 if ((sc->hn_flags & HN_FLAG_RXVF) == 0 && 6964 !(hn_xpnt_vf && sc->hn_vf_ifp != NULL)) 6965 hn_resume_mgmt(sc); 6966 6967 /* 6968 * Re-enable polling if this interface is running and 6969 * the polling is requested. 6970 */ 6971 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) && sc->hn_pollhz > 0) 6972 hn_polling(sc, sc->hn_pollhz); 6973 } 6974 6975 static void 6976 hn_rndis_rx_status(struct hn_softc *sc, const void *data, int dlen) 6977 { 6978 const struct rndis_status_msg *msg; 6979 int ofs; 6980 6981 if (dlen < sizeof(*msg)) { 6982 if_printf(sc->hn_ifp, "invalid RNDIS status\n"); 6983 return; 6984 } 6985 msg = data; 6986 6987 switch (msg->rm_status) { 6988 case RNDIS_STATUS_MEDIA_CONNECT: 6989 case RNDIS_STATUS_MEDIA_DISCONNECT: 6990 hn_update_link_status(sc); 6991 break; 6992 6993 case RNDIS_STATUS_TASK_OFFLOAD_CURRENT_CONFIG: 6994 case RNDIS_STATUS_LINK_SPEED_CHANGE: 6995 /* Not really useful; ignore. */ 6996 break; 6997 6998 case RNDIS_STATUS_NETWORK_CHANGE: 6999 ofs = RNDIS_STBUFOFFSET_ABS(msg->rm_stbufoffset); 7000 if (dlen < ofs + msg->rm_stbuflen || 7001 msg->rm_stbuflen < sizeof(uint32_t)) { 7002 if_printf(sc->hn_ifp, "network changed\n"); 7003 } else { 7004 uint32_t change; 7005 7006 memcpy(&change, ((const uint8_t *)msg) + ofs, 7007 sizeof(change)); 7008 if_printf(sc->hn_ifp, "network changed, change %u\n", 7009 change); 7010 } 7011 hn_change_network(sc); 7012 break; 7013 7014 default: 7015 if_printf(sc->hn_ifp, "unknown RNDIS status 0x%08x\n", 7016 msg->rm_status); 7017 break; 7018 } 7019 } 7020 7021 static int 7022 hn_rndis_rxinfo(const void *info_data, int info_dlen, struct hn_rxinfo *info) 7023 { 7024 const struct rndis_pktinfo *pi = info_data; 7025 uint32_t mask = 0; 7026 7027 while (info_dlen != 0) { 7028 const void *data; 7029 uint32_t dlen; 7030 7031 if (__predict_false(info_dlen < sizeof(*pi))) 7032 return (EINVAL); 7033 if (__predict_false(info_dlen < pi->rm_size)) 7034 return (EINVAL); 7035 info_dlen -= pi->rm_size; 7036 7037 if (__predict_false(pi->rm_size & RNDIS_PKTINFO_SIZE_ALIGNMASK)) 7038 return (EINVAL); 7039 if (__predict_false(pi->rm_size < pi->rm_pktinfooffset)) 7040 return (EINVAL); 7041 dlen = pi->rm_size - pi->rm_pktinfooffset; 7042 data = pi->rm_data; 7043 7044 switch (pi->rm_type) { 7045 case NDIS_PKTINFO_TYPE_VLAN: 7046 if (__predict_false(dlen < NDIS_VLAN_INFO_SIZE)) 7047 return (EINVAL); 7048 info->vlan_info = *((const uint32_t *)data); 7049 mask |= HN_RXINFO_VLAN; 7050 break; 7051 7052 case NDIS_PKTINFO_TYPE_CSUM: 7053 if (__predict_false(dlen < NDIS_RXCSUM_INFO_SIZE)) 7054 return (EINVAL); 7055 info->csum_info = *((const uint32_t *)data); 7056 mask |= HN_RXINFO_CSUM; 7057 break; 7058 7059 case HN_NDIS_PKTINFO_TYPE_HASHVAL: 7060 if (__predict_false(dlen < HN_NDIS_HASH_VALUE_SIZE)) 7061 return (EINVAL); 7062 info->hash_value = *((const uint32_t *)data); 7063 mask |= HN_RXINFO_HASHVAL; 7064 break; 7065 7066 case HN_NDIS_PKTINFO_TYPE_HASHINF: 7067 if (__predict_false(dlen < HN_NDIS_HASH_INFO_SIZE)) 7068 return (EINVAL); 7069 info->hash_info = *((const uint32_t *)data); 7070 mask |= HN_RXINFO_HASHINF; 7071 break; 7072 7073 default: 7074 goto next; 7075 } 7076 7077 if (mask == HN_RXINFO_ALL) { 7078 /* All found; done */ 7079 break; 7080 } 7081 next: 7082 pi = (const struct rndis_pktinfo *) 7083 ((const uint8_t *)pi + pi->rm_size); 7084 } 7085 7086 /* 7087 * Final fixup. 7088 * - If there is no hash value, invalidate the hash info. 7089 */ 7090 if ((mask & HN_RXINFO_HASHVAL) == 0) 7091 info->hash_info = HN_NDIS_HASH_INFO_INVALID; 7092 return (0); 7093 } 7094 7095 static __inline bool 7096 hn_rndis_check_overlap(int off, int len, int check_off, int check_len) 7097 { 7098 7099 if (off < check_off) { 7100 if (__predict_true(off + len <= check_off)) 7101 return (false); 7102 } else if (off > check_off) { 7103 if (__predict_true(check_off + check_len <= off)) 7104 return (false); 7105 } 7106 return (true); 7107 } 7108 7109 static void 7110 hn_rndis_rx_data(struct hn_rx_ring *rxr, const void *data, int dlen) 7111 { 7112 const struct rndis_packet_msg *pkt; 7113 struct hn_rxinfo info; 7114 int data_off, pktinfo_off, data_len, pktinfo_len; 7115 7116 /* 7117 * Check length. 7118 */ 7119 if (__predict_false(dlen < sizeof(*pkt))) { 7120 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg\n"); 7121 return; 7122 } 7123 pkt = data; 7124 7125 if (__predict_false(dlen < pkt->rm_len)) { 7126 if_printf(rxr->hn_ifp, "truncated RNDIS packet msg, " 7127 "dlen %d, msglen %u\n", dlen, pkt->rm_len); 7128 return; 7129 } 7130 if (__predict_false(pkt->rm_len < 7131 pkt->rm_datalen + pkt->rm_oobdatalen + pkt->rm_pktinfolen)) { 7132 if_printf(rxr->hn_ifp, "invalid RNDIS packet msglen, " 7133 "msglen %u, data %u, oob %u, pktinfo %u\n", 7134 pkt->rm_len, pkt->rm_datalen, pkt->rm_oobdatalen, 7135 pkt->rm_pktinfolen); 7136 return; 7137 } 7138 if (__predict_false(pkt->rm_datalen == 0)) { 7139 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, no data\n"); 7140 return; 7141 } 7142 7143 /* 7144 * Check offests. 7145 */ 7146 #define IS_OFFSET_INVALID(ofs) \ 7147 ((ofs) < RNDIS_PACKET_MSG_OFFSET_MIN || \ 7148 ((ofs) & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK)) 7149 7150 /* XXX Hyper-V does not meet data offset alignment requirement */ 7151 if (__predict_false(pkt->rm_dataoffset < RNDIS_PACKET_MSG_OFFSET_MIN)) { 7152 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7153 "data offset %u\n", pkt->rm_dataoffset); 7154 return; 7155 } 7156 if (__predict_false(pkt->rm_oobdataoffset > 0 && 7157 IS_OFFSET_INVALID(pkt->rm_oobdataoffset))) { 7158 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7159 "oob offset %u\n", pkt->rm_oobdataoffset); 7160 return; 7161 } 7162 if (__predict_true(pkt->rm_pktinfooffset > 0) && 7163 __predict_false(IS_OFFSET_INVALID(pkt->rm_pktinfooffset))) { 7164 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7165 "pktinfo offset %u\n", pkt->rm_pktinfooffset); 7166 return; 7167 } 7168 7169 #undef IS_OFFSET_INVALID 7170 7171 data_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_dataoffset); 7172 data_len = pkt->rm_datalen; 7173 pktinfo_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_pktinfooffset); 7174 pktinfo_len = pkt->rm_pktinfolen; 7175 7176 /* 7177 * Check OOB coverage. 7178 */ 7179 if (__predict_false(pkt->rm_oobdatalen != 0)) { 7180 int oob_off, oob_len; 7181 7182 if_printf(rxr->hn_ifp, "got oobdata\n"); 7183 oob_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_oobdataoffset); 7184 oob_len = pkt->rm_oobdatalen; 7185 7186 if (__predict_false(oob_off + oob_len > pkt->rm_len)) { 7187 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7188 "oob overflow, msglen %u, oob abs %d len %d\n", 7189 pkt->rm_len, oob_off, oob_len); 7190 return; 7191 } 7192 7193 /* 7194 * Check against data. 7195 */ 7196 if (hn_rndis_check_overlap(oob_off, oob_len, 7197 data_off, data_len)) { 7198 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7199 "oob overlaps data, oob abs %d len %d, " 7200 "data abs %d len %d\n", 7201 oob_off, oob_len, data_off, data_len); 7202 return; 7203 } 7204 7205 /* 7206 * Check against pktinfo. 7207 */ 7208 if (pktinfo_len != 0 && 7209 hn_rndis_check_overlap(oob_off, oob_len, 7210 pktinfo_off, pktinfo_len)) { 7211 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7212 "oob overlaps pktinfo, oob abs %d len %d, " 7213 "pktinfo abs %d len %d\n", 7214 oob_off, oob_len, pktinfo_off, pktinfo_len); 7215 return; 7216 } 7217 } 7218 7219 /* 7220 * Check per-packet-info coverage and find useful per-packet-info. 7221 */ 7222 info.vlan_info = HN_NDIS_VLAN_INFO_INVALID; 7223 info.csum_info = HN_NDIS_RXCSUM_INFO_INVALID; 7224 info.hash_info = HN_NDIS_HASH_INFO_INVALID; 7225 if (__predict_true(pktinfo_len != 0)) { 7226 bool overlap; 7227 int error; 7228 7229 if (__predict_false(pktinfo_off + pktinfo_len > pkt->rm_len)) { 7230 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7231 "pktinfo overflow, msglen %u, " 7232 "pktinfo abs %d len %d\n", 7233 pkt->rm_len, pktinfo_off, pktinfo_len); 7234 return; 7235 } 7236 7237 /* 7238 * Check packet info coverage. 7239 */ 7240 overlap = hn_rndis_check_overlap(pktinfo_off, pktinfo_len, 7241 data_off, data_len); 7242 if (__predict_false(overlap)) { 7243 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7244 "pktinfo overlap data, pktinfo abs %d len %d, " 7245 "data abs %d len %d\n", 7246 pktinfo_off, pktinfo_len, data_off, data_len); 7247 return; 7248 } 7249 7250 /* 7251 * Find useful per-packet-info. 7252 */ 7253 error = hn_rndis_rxinfo(((const uint8_t *)pkt) + pktinfo_off, 7254 pktinfo_len, &info); 7255 if (__predict_false(error)) { 7256 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg " 7257 "pktinfo\n"); 7258 return; 7259 } 7260 } 7261 7262 if (__predict_false(data_off + data_len > pkt->rm_len)) { 7263 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7264 "data overflow, msglen %u, data abs %d len %d\n", 7265 pkt->rm_len, data_off, data_len); 7266 return; 7267 } 7268 hn_rxpkt(rxr, ((const uint8_t *)pkt) + data_off, data_len, &info); 7269 } 7270 7271 static __inline void 7272 hn_rndis_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen) 7273 { 7274 const struct rndis_msghdr *hdr; 7275 7276 if (__predict_false(dlen < sizeof(*hdr))) { 7277 if_printf(rxr->hn_ifp, "invalid RNDIS msg\n"); 7278 return; 7279 } 7280 hdr = data; 7281 7282 if (__predict_true(hdr->rm_type == REMOTE_NDIS_PACKET_MSG)) { 7283 /* Hot data path. */ 7284 hn_rndis_rx_data(rxr, data, dlen); 7285 /* Done! */ 7286 return; 7287 } 7288 7289 if (hdr->rm_type == REMOTE_NDIS_INDICATE_STATUS_MSG) 7290 hn_rndis_rx_status(rxr->hn_ifp->if_softc, data, dlen); 7291 else 7292 hn_rndis_rx_ctrl(rxr->hn_ifp->if_softc, data, dlen); 7293 } 7294 7295 static void 7296 hn_nvs_handle_notify(struct hn_softc *sc, const struct vmbus_chanpkt_hdr *pkt) 7297 { 7298 const struct hn_nvs_hdr *hdr; 7299 7300 if (VMBUS_CHANPKT_DATALEN(pkt) < sizeof(*hdr)) { 7301 if_printf(sc->hn_ifp, "invalid nvs notify\n"); 7302 return; 7303 } 7304 hdr = VMBUS_CHANPKT_CONST_DATA(pkt); 7305 7306 if (hdr->nvs_type == HN_NVS_TYPE_TXTBL_NOTE) { 7307 /* Useless; ignore */ 7308 return; 7309 } 7310 if_printf(sc->hn_ifp, "got notify, nvs type %u\n", hdr->nvs_type); 7311 } 7312 7313 static void 7314 hn_nvs_handle_comp(struct hn_softc *sc, struct vmbus_channel *chan, 7315 const struct vmbus_chanpkt_hdr *pkt) 7316 { 7317 struct hn_nvs_sendctx *sndc; 7318 7319 sndc = (struct hn_nvs_sendctx *)(uintptr_t)pkt->cph_xactid; 7320 sndc->hn_cb(sndc, sc, chan, VMBUS_CHANPKT_CONST_DATA(pkt), 7321 VMBUS_CHANPKT_DATALEN(pkt)); 7322 /* 7323 * NOTE: 7324 * 'sndc' CAN NOT be accessed anymore, since it can be freed by 7325 * its callback. 7326 */ 7327 } 7328 7329 static void 7330 hn_nvs_handle_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan, 7331 const struct vmbus_chanpkt_hdr *pkthdr) 7332 { 7333 const struct vmbus_chanpkt_rxbuf *pkt; 7334 const struct hn_nvs_hdr *nvs_hdr; 7335 int count, i, hlen; 7336 7337 if (__predict_false(VMBUS_CHANPKT_DATALEN(pkthdr) < sizeof(*nvs_hdr))) { 7338 if_printf(rxr->hn_ifp, "invalid nvs RNDIS\n"); 7339 return; 7340 } 7341 nvs_hdr = VMBUS_CHANPKT_CONST_DATA(pkthdr); 7342 7343 /* Make sure that this is a RNDIS message. */ 7344 if (__predict_false(nvs_hdr->nvs_type != HN_NVS_TYPE_RNDIS)) { 7345 if_printf(rxr->hn_ifp, "nvs type %u, not RNDIS\n", 7346 nvs_hdr->nvs_type); 7347 return; 7348 } 7349 7350 hlen = VMBUS_CHANPKT_GETLEN(pkthdr->cph_hlen); 7351 if (__predict_false(hlen < sizeof(*pkt))) { 7352 if_printf(rxr->hn_ifp, "invalid rxbuf chanpkt\n"); 7353 return; 7354 } 7355 pkt = (const struct vmbus_chanpkt_rxbuf *)pkthdr; 7356 7357 if (__predict_false(pkt->cp_rxbuf_id != HN_NVS_RXBUF_SIG)) { 7358 if_printf(rxr->hn_ifp, "invalid rxbuf_id 0x%08x\n", 7359 pkt->cp_rxbuf_id); 7360 return; 7361 } 7362 7363 count = pkt->cp_rxbuf_cnt; 7364 if (__predict_false(hlen < 7365 __offsetof(struct vmbus_chanpkt_rxbuf, cp_rxbuf[count]))) { 7366 if_printf(rxr->hn_ifp, "invalid rxbuf_cnt %d\n", count); 7367 return; 7368 } 7369 7370 /* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */ 7371 for (i = 0; i < count; ++i) { 7372 int ofs, len; 7373 7374 ofs = pkt->cp_rxbuf[i].rb_ofs; 7375 len = pkt->cp_rxbuf[i].rb_len; 7376 if (__predict_false(ofs + len > HN_RXBUF_SIZE)) { 7377 if_printf(rxr->hn_ifp, "%dth RNDIS msg overflow rxbuf, " 7378 "ofs %d, len %d\n", i, ofs, len); 7379 continue; 7380 } 7381 hn_rndis_rxpkt(rxr, rxr->hn_rxbuf + ofs, len); 7382 } 7383 7384 /* 7385 * Ack the consumed RXBUF associated w/ this channel packet, 7386 * so that this RXBUF can be recycled by the hypervisor. 7387 */ 7388 hn_nvs_ack_rxbuf(rxr, chan, pkt->cp_hdr.cph_xactid); 7389 } 7390 7391 static void 7392 hn_nvs_ack_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan, 7393 uint64_t tid) 7394 { 7395 struct hn_nvs_rndis_ack ack; 7396 int retries, error; 7397 7398 ack.nvs_type = HN_NVS_TYPE_RNDIS_ACK; 7399 ack.nvs_status = HN_NVS_STATUS_OK; 7400 7401 retries = 0; 7402 again: 7403 error = vmbus_chan_send(chan, VMBUS_CHANPKT_TYPE_COMP, 7404 VMBUS_CHANPKT_FLAG_NONE, &ack, sizeof(ack), tid); 7405 if (__predict_false(error == EAGAIN)) { 7406 /* 7407 * NOTE: 7408 * This should _not_ happen in real world, since the 7409 * consumption of the TX bufring from the TX path is 7410 * controlled. 7411 */ 7412 if (rxr->hn_ack_failed == 0) 7413 if_printf(rxr->hn_ifp, "RXBUF ack retry\n"); 7414 rxr->hn_ack_failed++; 7415 retries++; 7416 if (retries < 10) { 7417 DELAY(100); 7418 goto again; 7419 } 7420 /* RXBUF leaks! */ 7421 if_printf(rxr->hn_ifp, "RXBUF ack failed\n"); 7422 } 7423 } 7424 7425 static void 7426 hn_chan_callback(struct vmbus_channel *chan, void *xrxr) 7427 { 7428 struct hn_rx_ring *rxr = xrxr; 7429 struct hn_softc *sc = rxr->hn_ifp->if_softc; 7430 7431 for (;;) { 7432 struct vmbus_chanpkt_hdr *pkt = rxr->hn_pktbuf; 7433 int error, pktlen; 7434 7435 pktlen = rxr->hn_pktbuf_len; 7436 error = vmbus_chan_recv_pkt(chan, pkt, &pktlen); 7437 if (__predict_false(error == ENOBUFS)) { 7438 void *nbuf; 7439 int nlen; 7440 7441 /* 7442 * Expand channel packet buffer. 7443 * 7444 * XXX 7445 * Use M_WAITOK here, since allocation failure 7446 * is fatal. 7447 */ 7448 nlen = rxr->hn_pktbuf_len * 2; 7449 while (nlen < pktlen) 7450 nlen *= 2; 7451 nbuf = malloc(nlen, M_DEVBUF, M_WAITOK); 7452 7453 if_printf(rxr->hn_ifp, "expand pktbuf %d -> %d\n", 7454 rxr->hn_pktbuf_len, nlen); 7455 7456 free(rxr->hn_pktbuf, M_DEVBUF); 7457 rxr->hn_pktbuf = nbuf; 7458 rxr->hn_pktbuf_len = nlen; 7459 /* Retry! */ 7460 continue; 7461 } else if (__predict_false(error == EAGAIN)) { 7462 /* No more channel packets; done! */ 7463 break; 7464 } 7465 KASSERT(!error, ("vmbus_chan_recv_pkt failed: %d", error)); 7466 7467 switch (pkt->cph_type) { 7468 case VMBUS_CHANPKT_TYPE_COMP: 7469 hn_nvs_handle_comp(sc, chan, pkt); 7470 break; 7471 7472 case VMBUS_CHANPKT_TYPE_RXBUF: 7473 hn_nvs_handle_rxbuf(rxr, chan, pkt); 7474 break; 7475 7476 case VMBUS_CHANPKT_TYPE_INBAND: 7477 hn_nvs_handle_notify(sc, pkt); 7478 break; 7479 7480 default: 7481 if_printf(rxr->hn_ifp, "unknown chan pkt %u\n", 7482 pkt->cph_type); 7483 break; 7484 } 7485 } 7486 hn_chan_rollup(rxr, rxr->hn_txr); 7487 } 7488 7489 static void 7490 hn_sysinit(void *arg __unused) 7491 { 7492 int i; 7493 7494 hn_udpcs_fixup = counter_u64_alloc(M_WAITOK); 7495 7496 #ifdef HN_IFSTART_SUPPORT 7497 /* 7498 * Don't use ifnet.if_start if transparent VF mode is requested; 7499 * mainly due to the IFF_DRV_OACTIVE flag. 7500 */ 7501 if (hn_xpnt_vf && hn_use_if_start) { 7502 hn_use_if_start = 0; 7503 printf("hn: tranparent VF mode, if_transmit will be used, " 7504 "instead of if_start\n"); 7505 } 7506 #endif 7507 if (hn_xpnt_vf_attwait < HN_XPNT_VF_ATTWAIT_MIN) { 7508 printf("hn: invalid transparent VF attach routing " 7509 "wait timeout %d, reset to %d\n", 7510 hn_xpnt_vf_attwait, HN_XPNT_VF_ATTWAIT_MIN); 7511 hn_xpnt_vf_attwait = HN_XPNT_VF_ATTWAIT_MIN; 7512 } 7513 7514 /* 7515 * Initialize VF map. 7516 */ 7517 rm_init_flags(&hn_vfmap_lock, "hn_vfmap", RM_SLEEPABLE); 7518 hn_vfmap_size = HN_VFMAP_SIZE_DEF; 7519 hn_vfmap = malloc(sizeof(struct ifnet *) * hn_vfmap_size, M_DEVBUF, 7520 M_WAITOK | M_ZERO); 7521 7522 /* 7523 * Fix the # of TX taskqueues. 7524 */ 7525 if (hn_tx_taskq_cnt <= 0) 7526 hn_tx_taskq_cnt = 1; 7527 else if (hn_tx_taskq_cnt > mp_ncpus) 7528 hn_tx_taskq_cnt = mp_ncpus; 7529 7530 /* 7531 * Fix the TX taskqueue mode. 7532 */ 7533 switch (hn_tx_taskq_mode) { 7534 case HN_TX_TASKQ_M_INDEP: 7535 case HN_TX_TASKQ_M_GLOBAL: 7536 case HN_TX_TASKQ_M_EVTTQ: 7537 break; 7538 default: 7539 hn_tx_taskq_mode = HN_TX_TASKQ_M_INDEP; 7540 break; 7541 } 7542 7543 if (vm_guest != VM_GUEST_HV) 7544 return; 7545 7546 if (hn_tx_taskq_mode != HN_TX_TASKQ_M_GLOBAL) 7547 return; 7548 7549 hn_tx_taskque = malloc(hn_tx_taskq_cnt * sizeof(struct taskqueue *), 7550 M_DEVBUF, M_WAITOK); 7551 for (i = 0; i < hn_tx_taskq_cnt; ++i) { 7552 hn_tx_taskque[i] = taskqueue_create("hn_tx", M_WAITOK, 7553 taskqueue_thread_enqueue, &hn_tx_taskque[i]); 7554 taskqueue_start_threads(&hn_tx_taskque[i], 1, PI_NET, 7555 "hn tx%d", i); 7556 } 7557 } 7558 SYSINIT(hn_sysinit, SI_SUB_DRIVERS, SI_ORDER_SECOND, hn_sysinit, NULL); 7559 7560 static void 7561 hn_sysuninit(void *arg __unused) 7562 { 7563 7564 if (hn_tx_taskque != NULL) { 7565 int i; 7566 7567 for (i = 0; i < hn_tx_taskq_cnt; ++i) 7568 taskqueue_free(hn_tx_taskque[i]); 7569 free(hn_tx_taskque, M_DEVBUF); 7570 } 7571 7572 if (hn_vfmap != NULL) 7573 free(hn_vfmap, M_DEVBUF); 7574 rm_destroy(&hn_vfmap_lock); 7575 7576 counter_u64_free(hn_udpcs_fixup); 7577 } 7578 SYSUNINIT(hn_sysuninit, SI_SUB_DRIVERS, SI_ORDER_SECOND, hn_sysuninit, NULL); 7579