1 /*- 2 * Copyright (c) 2010-2012 Citrix Inc. 3 * Copyright (c) 2009-2012,2016-2017 Microsoft Corp. 4 * Copyright (c) 2012 NetApp Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /*- 30 * Copyright (c) 2004-2006 Kip Macy 31 * All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 45 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 52 * SUCH DAMAGE. 53 */ 54 55 #include <sys/cdefs.h> 56 __FBSDID("$FreeBSD$"); 57 58 #include "opt_hn.h" 59 #include "opt_inet6.h" 60 #include "opt_inet.h" 61 #include "opt_rss.h" 62 63 #include <sys/param.h> 64 #include <sys/systm.h> 65 #include <sys/bus.h> 66 #include <sys/counter.h> 67 #include <sys/kernel.h> 68 #include <sys/limits.h> 69 #include <sys/malloc.h> 70 #include <sys/mbuf.h> 71 #include <sys/module.h> 72 #include <sys/queue.h> 73 #include <sys/lock.h> 74 #include <sys/rmlock.h> 75 #include <sys/sbuf.h> 76 #include <sys/smp.h> 77 #include <sys/socket.h> 78 #include <sys/sockio.h> 79 #include <sys/sx.h> 80 #include <sys/sysctl.h> 81 #include <sys/taskqueue.h> 82 #include <sys/buf_ring.h> 83 #include <sys/eventhandler.h> 84 85 #include <machine/atomic.h> 86 #include <machine/in_cksum.h> 87 88 #include <net/bpf.h> 89 #include <net/ethernet.h> 90 #include <net/if.h> 91 #include <net/if_dl.h> 92 #include <net/if_media.h> 93 #include <net/if_types.h> 94 #include <net/if_var.h> 95 #include <net/rndis.h> 96 #ifdef RSS 97 #include <net/rss_config.h> 98 #endif 99 100 #include <netinet/in_systm.h> 101 #include <netinet/in.h> 102 #include <netinet/ip.h> 103 #include <netinet/ip6.h> 104 #include <netinet/tcp.h> 105 #include <netinet/tcp_lro.h> 106 #include <netinet/udp.h> 107 108 #include <dev/hyperv/include/hyperv.h> 109 #include <dev/hyperv/include/hyperv_busdma.h> 110 #include <dev/hyperv/include/vmbus.h> 111 #include <dev/hyperv/include/vmbus_xact.h> 112 113 #include <dev/hyperv/netvsc/ndis.h> 114 #include <dev/hyperv/netvsc/if_hnreg.h> 115 #include <dev/hyperv/netvsc/if_hnvar.h> 116 #include <dev/hyperv/netvsc/hn_nvs.h> 117 #include <dev/hyperv/netvsc/hn_rndis.h> 118 119 #include "vmbus_if.h" 120 121 #define HN_IFSTART_SUPPORT 122 123 #define HN_RING_CNT_DEF_MAX 8 124 125 #define HN_VFMAP_SIZE_DEF 8 126 127 #define HN_XPNT_VF_ATTWAIT_MIN 2 /* seconds */ 128 129 /* YYY should get it from the underlying channel */ 130 #define HN_TX_DESC_CNT 512 131 132 #define HN_RNDIS_PKT_LEN \ 133 (sizeof(struct rndis_packet_msg) + \ 134 HN_RNDIS_PKTINFO_SIZE(HN_NDIS_HASH_VALUE_SIZE) + \ 135 HN_RNDIS_PKTINFO_SIZE(NDIS_VLAN_INFO_SIZE) + \ 136 HN_RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) + \ 137 HN_RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE)) 138 #define HN_RNDIS_PKT_BOUNDARY PAGE_SIZE 139 #define HN_RNDIS_PKT_ALIGN CACHE_LINE_SIZE 140 141 #define HN_TX_DATA_BOUNDARY PAGE_SIZE 142 #define HN_TX_DATA_MAXSIZE IP_MAXPACKET 143 #define HN_TX_DATA_SEGSIZE PAGE_SIZE 144 /* -1 for RNDIS packet message */ 145 #define HN_TX_DATA_SEGCNT_MAX (HN_GPACNT_MAX - 1) 146 147 #define HN_DIRECT_TX_SIZE_DEF 128 148 149 #define HN_EARLY_TXEOF_THRESH 8 150 151 #define HN_PKTBUF_LEN_DEF (16 * 1024) 152 153 #define HN_LROENT_CNT_DEF 128 154 155 #define HN_LRO_LENLIM_MULTIRX_DEF (12 * ETHERMTU) 156 #define HN_LRO_LENLIM_DEF (25 * ETHERMTU) 157 /* YYY 2*MTU is a bit rough, but should be good enough. */ 158 #define HN_LRO_LENLIM_MIN(ifp) (2 * (ifp)->if_mtu) 159 160 #define HN_LRO_ACKCNT_DEF 1 161 162 #define HN_LOCK_INIT(sc) \ 163 sx_init(&(sc)->hn_lock, device_get_nameunit((sc)->hn_dev)) 164 #define HN_LOCK_DESTROY(sc) sx_destroy(&(sc)->hn_lock) 165 #define HN_LOCK_ASSERT(sc) sx_assert(&(sc)->hn_lock, SA_XLOCKED) 166 #define HN_LOCK(sc) \ 167 do { \ 168 while (sx_try_xlock(&(sc)->hn_lock) == 0) \ 169 DELAY(1000); \ 170 } while (0) 171 #define HN_UNLOCK(sc) sx_xunlock(&(sc)->hn_lock) 172 173 #define HN_CSUM_IP_MASK (CSUM_IP | CSUM_IP_TCP | CSUM_IP_UDP) 174 #define HN_CSUM_IP6_MASK (CSUM_IP6_TCP | CSUM_IP6_UDP) 175 #define HN_CSUM_IP_HWASSIST(sc) \ 176 ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP_MASK) 177 #define HN_CSUM_IP6_HWASSIST(sc) \ 178 ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP6_MASK) 179 180 #define HN_PKTSIZE_MIN(align) \ 181 roundup2(ETHER_MIN_LEN + ETHER_VLAN_ENCAP_LEN - ETHER_CRC_LEN + \ 182 HN_RNDIS_PKT_LEN, (align)) 183 #define HN_PKTSIZE(m, align) \ 184 roundup2((m)->m_pkthdr.len + HN_RNDIS_PKT_LEN, (align)) 185 186 #ifdef RSS 187 #define HN_RING_IDX2CPU(sc, idx) rss_getcpu((idx) % rss_getnumbuckets()) 188 #else 189 #define HN_RING_IDX2CPU(sc, idx) (((sc)->hn_cpu + (idx)) % mp_ncpus) 190 #endif 191 192 struct hn_txdesc { 193 #ifndef HN_USE_TXDESC_BUFRING 194 SLIST_ENTRY(hn_txdesc) link; 195 #endif 196 STAILQ_ENTRY(hn_txdesc) agg_link; 197 198 /* Aggregated txdescs, in sending order. */ 199 STAILQ_HEAD(, hn_txdesc) agg_list; 200 201 /* The oldest packet, if transmission aggregation happens. */ 202 struct mbuf *m; 203 struct hn_tx_ring *txr; 204 int refs; 205 uint32_t flags; /* HN_TXD_FLAG_ */ 206 struct hn_nvs_sendctx send_ctx; 207 uint32_t chim_index; 208 int chim_size; 209 210 bus_dmamap_t data_dmap; 211 212 bus_addr_t rndis_pkt_paddr; 213 struct rndis_packet_msg *rndis_pkt; 214 bus_dmamap_t rndis_pkt_dmap; 215 }; 216 217 #define HN_TXD_FLAG_ONLIST 0x0001 218 #define HN_TXD_FLAG_DMAMAP 0x0002 219 #define HN_TXD_FLAG_ONAGG 0x0004 220 221 struct hn_rxinfo { 222 uint32_t vlan_info; 223 uint32_t csum_info; 224 uint32_t hash_info; 225 uint32_t hash_value; 226 }; 227 228 struct hn_rxvf_setarg { 229 struct hn_rx_ring *rxr; 230 struct ifnet *vf_ifp; 231 }; 232 233 #define HN_RXINFO_VLAN 0x0001 234 #define HN_RXINFO_CSUM 0x0002 235 #define HN_RXINFO_HASHINF 0x0004 236 #define HN_RXINFO_HASHVAL 0x0008 237 #define HN_RXINFO_ALL \ 238 (HN_RXINFO_VLAN | \ 239 HN_RXINFO_CSUM | \ 240 HN_RXINFO_HASHINF | \ 241 HN_RXINFO_HASHVAL) 242 243 #define HN_NDIS_VLAN_INFO_INVALID 0xffffffff 244 #define HN_NDIS_RXCSUM_INFO_INVALID 0 245 #define HN_NDIS_HASH_INFO_INVALID 0 246 247 static int hn_probe(device_t); 248 static int hn_attach(device_t); 249 static int hn_detach(device_t); 250 static int hn_shutdown(device_t); 251 static void hn_chan_callback(struct vmbus_channel *, 252 void *); 253 254 static void hn_init(void *); 255 static int hn_ioctl(struct ifnet *, u_long, caddr_t); 256 #ifdef HN_IFSTART_SUPPORT 257 static void hn_start(struct ifnet *); 258 #endif 259 static int hn_transmit(struct ifnet *, struct mbuf *); 260 static void hn_xmit_qflush(struct ifnet *); 261 static int hn_ifmedia_upd(struct ifnet *); 262 static void hn_ifmedia_sts(struct ifnet *, 263 struct ifmediareq *); 264 265 static void hn_ifnet_event(void *, struct ifnet *, int); 266 static void hn_ifaddr_event(void *, struct ifnet *); 267 static void hn_ifnet_attevent(void *, struct ifnet *); 268 static void hn_ifnet_detevent(void *, struct ifnet *); 269 static void hn_ifnet_lnkevent(void *, struct ifnet *, int); 270 271 static bool hn_ismyvf(const struct hn_softc *, 272 const struct ifnet *); 273 static void hn_rxvf_change(struct hn_softc *, 274 struct ifnet *, bool); 275 static void hn_rxvf_set(struct hn_softc *, struct ifnet *); 276 static void hn_rxvf_set_task(void *, int); 277 static void hn_xpnt_vf_input(struct ifnet *, struct mbuf *); 278 static int hn_xpnt_vf_iocsetflags(struct hn_softc *); 279 static int hn_xpnt_vf_iocsetcaps(struct hn_softc *, 280 struct ifreq *); 281 static void hn_xpnt_vf_saveifflags(struct hn_softc *); 282 static bool hn_xpnt_vf_isready(struct hn_softc *); 283 static void hn_xpnt_vf_setready(struct hn_softc *); 284 static void hn_xpnt_vf_init_taskfunc(void *, int); 285 static void hn_xpnt_vf_init(struct hn_softc *); 286 static void hn_xpnt_vf_setenable(struct hn_softc *); 287 static void hn_xpnt_vf_setdisable(struct hn_softc *, bool); 288 static void hn_vf_rss_fixup(struct hn_softc *, bool); 289 static void hn_vf_rss_restore(struct hn_softc *); 290 291 static int hn_rndis_rxinfo(const void *, int, 292 struct hn_rxinfo *); 293 static void hn_rndis_rx_data(struct hn_rx_ring *, 294 const void *, int); 295 static void hn_rndis_rx_status(struct hn_softc *, 296 const void *, int); 297 static void hn_rndis_init_fixat(struct hn_softc *, int); 298 299 static void hn_nvs_handle_notify(struct hn_softc *, 300 const struct vmbus_chanpkt_hdr *); 301 static void hn_nvs_handle_comp(struct hn_softc *, 302 struct vmbus_channel *, 303 const struct vmbus_chanpkt_hdr *); 304 static void hn_nvs_handle_rxbuf(struct hn_rx_ring *, 305 struct vmbus_channel *, 306 const struct vmbus_chanpkt_hdr *); 307 static void hn_nvs_ack_rxbuf(struct hn_rx_ring *, 308 struct vmbus_channel *, uint64_t); 309 310 #if __FreeBSD_version >= 1100099 311 static int hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS); 312 static int hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS); 313 #endif 314 static int hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS); 315 static int hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS); 316 #if __FreeBSD_version < 1100095 317 static int hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS); 318 #else 319 static int hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS); 320 #endif 321 static int hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS); 322 static int hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS); 323 static int hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS); 324 static int hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS); 325 static int hn_caps_sysctl(SYSCTL_HANDLER_ARGS); 326 static int hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS); 327 static int hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS); 328 #ifndef RSS 329 static int hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS); 330 static int hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS); 331 #endif 332 static int hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS); 333 static int hn_rss_hcap_sysctl(SYSCTL_HANDLER_ARGS); 334 static int hn_rss_mbuf_sysctl(SYSCTL_HANDLER_ARGS); 335 static int hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS); 336 static int hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS); 337 static int hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS); 338 static int hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS); 339 static int hn_polling_sysctl(SYSCTL_HANDLER_ARGS); 340 static int hn_vf_sysctl(SYSCTL_HANDLER_ARGS); 341 static int hn_rxvf_sysctl(SYSCTL_HANDLER_ARGS); 342 static int hn_vflist_sysctl(SYSCTL_HANDLER_ARGS); 343 static int hn_vfmap_sysctl(SYSCTL_HANDLER_ARGS); 344 static int hn_xpnt_vf_accbpf_sysctl(SYSCTL_HANDLER_ARGS); 345 static int hn_xpnt_vf_enabled_sysctl(SYSCTL_HANDLER_ARGS); 346 347 static void hn_stop(struct hn_softc *, bool); 348 static void hn_init_locked(struct hn_softc *); 349 static int hn_chan_attach(struct hn_softc *, 350 struct vmbus_channel *); 351 static void hn_chan_detach(struct hn_softc *, 352 struct vmbus_channel *); 353 static int hn_attach_subchans(struct hn_softc *); 354 static void hn_detach_allchans(struct hn_softc *); 355 static void hn_chan_rollup(struct hn_rx_ring *, 356 struct hn_tx_ring *); 357 static void hn_set_ring_inuse(struct hn_softc *, int); 358 static int hn_synth_attach(struct hn_softc *, int); 359 static void hn_synth_detach(struct hn_softc *); 360 static int hn_synth_alloc_subchans(struct hn_softc *, 361 int *); 362 static bool hn_synth_attachable(const struct hn_softc *); 363 static void hn_suspend(struct hn_softc *); 364 static void hn_suspend_data(struct hn_softc *); 365 static void hn_suspend_mgmt(struct hn_softc *); 366 static void hn_resume(struct hn_softc *); 367 static void hn_resume_data(struct hn_softc *); 368 static void hn_resume_mgmt(struct hn_softc *); 369 static void hn_suspend_mgmt_taskfunc(void *, int); 370 static void hn_chan_drain(struct hn_softc *, 371 struct vmbus_channel *); 372 static void hn_disable_rx(struct hn_softc *); 373 static void hn_drain_rxtx(struct hn_softc *, int); 374 static void hn_polling(struct hn_softc *, u_int); 375 static void hn_chan_polling(struct vmbus_channel *, u_int); 376 static void hn_mtu_change_fixup(struct hn_softc *); 377 378 static void hn_update_link_status(struct hn_softc *); 379 static void hn_change_network(struct hn_softc *); 380 static void hn_link_taskfunc(void *, int); 381 static void hn_netchg_init_taskfunc(void *, int); 382 static void hn_netchg_status_taskfunc(void *, int); 383 static void hn_link_status(struct hn_softc *); 384 385 static int hn_create_rx_data(struct hn_softc *, int); 386 static void hn_destroy_rx_data(struct hn_softc *); 387 static int hn_check_iplen(const struct mbuf *, int); 388 static void hn_rxpkt_proto(const struct mbuf *, int *, int *); 389 static int hn_set_rxfilter(struct hn_softc *, uint32_t); 390 static int hn_rxfilter_config(struct hn_softc *); 391 static int hn_rss_reconfig(struct hn_softc *); 392 static void hn_rss_ind_fixup(struct hn_softc *); 393 static void hn_rss_mbuf_hash(struct hn_softc *, uint32_t); 394 static int hn_rxpkt(struct hn_rx_ring *, const void *, 395 int, const struct hn_rxinfo *); 396 static uint32_t hn_rss_type_fromndis(uint32_t); 397 static uint32_t hn_rss_type_tondis(uint32_t); 398 399 static int hn_tx_ring_create(struct hn_softc *, int); 400 static void hn_tx_ring_destroy(struct hn_tx_ring *); 401 static int hn_create_tx_data(struct hn_softc *, int); 402 static void hn_fixup_tx_data(struct hn_softc *); 403 static void hn_fixup_rx_data(struct hn_softc *); 404 static void hn_destroy_tx_data(struct hn_softc *); 405 static void hn_txdesc_dmamap_destroy(struct hn_txdesc *); 406 static void hn_txdesc_gc(struct hn_tx_ring *, 407 struct hn_txdesc *); 408 static int hn_encap(struct ifnet *, struct hn_tx_ring *, 409 struct hn_txdesc *, struct mbuf **); 410 static int hn_txpkt(struct ifnet *, struct hn_tx_ring *, 411 struct hn_txdesc *); 412 static void hn_set_chim_size(struct hn_softc *, int); 413 static void hn_set_tso_maxsize(struct hn_softc *, int, int); 414 static bool hn_tx_ring_pending(struct hn_tx_ring *); 415 static void hn_tx_ring_qflush(struct hn_tx_ring *); 416 static void hn_resume_tx(struct hn_softc *, int); 417 static void hn_set_txagg(struct hn_softc *); 418 static void *hn_try_txagg(struct ifnet *, 419 struct hn_tx_ring *, struct hn_txdesc *, 420 int); 421 static int hn_get_txswq_depth(const struct hn_tx_ring *); 422 static void hn_txpkt_done(struct hn_nvs_sendctx *, 423 struct hn_softc *, struct vmbus_channel *, 424 const void *, int); 425 static int hn_txpkt_sglist(struct hn_tx_ring *, 426 struct hn_txdesc *); 427 static int hn_txpkt_chim(struct hn_tx_ring *, 428 struct hn_txdesc *); 429 static int hn_xmit(struct hn_tx_ring *, int); 430 static void hn_xmit_taskfunc(void *, int); 431 static void hn_xmit_txeof(struct hn_tx_ring *); 432 static void hn_xmit_txeof_taskfunc(void *, int); 433 #ifdef HN_IFSTART_SUPPORT 434 static int hn_start_locked(struct hn_tx_ring *, int); 435 static void hn_start_taskfunc(void *, int); 436 static void hn_start_txeof(struct hn_tx_ring *); 437 static void hn_start_txeof_taskfunc(void *, int); 438 #endif 439 440 SYSCTL_NODE(_hw, OID_AUTO, hn, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 441 "Hyper-V network interface"); 442 443 /* Trust tcp segements verification on host side. */ 444 static int hn_trust_hosttcp = 1; 445 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hosttcp, CTLFLAG_RDTUN, 446 &hn_trust_hosttcp, 0, 447 "Trust tcp segement verification on host side, " 448 "when csum info is missing (global setting)"); 449 450 /* Trust udp datagrams verification on host side. */ 451 static int hn_trust_hostudp = 1; 452 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostudp, CTLFLAG_RDTUN, 453 &hn_trust_hostudp, 0, 454 "Trust udp datagram verification on host side, " 455 "when csum info is missing (global setting)"); 456 457 /* Trust ip packets verification on host side. */ 458 static int hn_trust_hostip = 1; 459 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostip, CTLFLAG_RDTUN, 460 &hn_trust_hostip, 0, 461 "Trust ip packet verification on host side, " 462 "when csum info is missing (global setting)"); 463 464 /* 465 * Offload UDP/IPv4 checksum. 466 */ 467 static int hn_enable_udp4cs = 1; 468 SYSCTL_INT(_hw_hn, OID_AUTO, enable_udp4cs, CTLFLAG_RDTUN, 469 &hn_enable_udp4cs, 0, "Offload UDP/IPv4 checksum"); 470 471 /* 472 * Offload UDP/IPv6 checksum. 473 */ 474 static int hn_enable_udp6cs = 1; 475 SYSCTL_INT(_hw_hn, OID_AUTO, enable_udp6cs, CTLFLAG_RDTUN, 476 &hn_enable_udp6cs, 0, "Offload UDP/IPv6 checksum"); 477 478 /* Stats. */ 479 static counter_u64_t hn_udpcs_fixup; 480 SYSCTL_COUNTER_U64(_hw_hn, OID_AUTO, udpcs_fixup, CTLFLAG_RW, 481 &hn_udpcs_fixup, "# of UDP checksum fixup"); 482 483 /* 484 * See hn_set_hlen(). 485 * 486 * This value is for Azure. For Hyper-V, set this above 487 * 65536 to disable UDP datagram checksum fixup. 488 */ 489 static int hn_udpcs_fixup_mtu = 1420; 490 SYSCTL_INT(_hw_hn, OID_AUTO, udpcs_fixup_mtu, CTLFLAG_RWTUN, 491 &hn_udpcs_fixup_mtu, 0, "UDP checksum fixup MTU threshold"); 492 493 /* Limit TSO burst size */ 494 static int hn_tso_maxlen = IP_MAXPACKET; 495 SYSCTL_INT(_hw_hn, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN, 496 &hn_tso_maxlen, 0, "TSO burst limit"); 497 498 /* Limit chimney send size */ 499 static int hn_tx_chimney_size = 0; 500 SYSCTL_INT(_hw_hn, OID_AUTO, tx_chimney_size, CTLFLAG_RDTUN, 501 &hn_tx_chimney_size, 0, "Chimney send packet size limit"); 502 503 /* Limit the size of packet for direct transmission */ 504 static int hn_direct_tx_size = HN_DIRECT_TX_SIZE_DEF; 505 SYSCTL_INT(_hw_hn, OID_AUTO, direct_tx_size, CTLFLAG_RDTUN, 506 &hn_direct_tx_size, 0, "Size of the packet for direct transmission"); 507 508 /* # of LRO entries per RX ring */ 509 #if defined(INET) || defined(INET6) 510 #if __FreeBSD_version >= 1100095 511 static int hn_lro_entry_count = HN_LROENT_CNT_DEF; 512 SYSCTL_INT(_hw_hn, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN, 513 &hn_lro_entry_count, 0, "LRO entry count"); 514 #endif 515 #endif 516 517 static int hn_tx_taskq_cnt = 1; 518 SYSCTL_INT(_hw_hn, OID_AUTO, tx_taskq_cnt, CTLFLAG_RDTUN, 519 &hn_tx_taskq_cnt, 0, "# of TX taskqueues"); 520 521 #define HN_TX_TASKQ_M_INDEP 0 522 #define HN_TX_TASKQ_M_GLOBAL 1 523 #define HN_TX_TASKQ_M_EVTTQ 2 524 525 static int hn_tx_taskq_mode = HN_TX_TASKQ_M_INDEP; 526 SYSCTL_INT(_hw_hn, OID_AUTO, tx_taskq_mode, CTLFLAG_RDTUN, 527 &hn_tx_taskq_mode, 0, "TX taskqueue modes: " 528 "0 - independent, 1 - share global tx taskqs, 2 - share event taskqs"); 529 530 #ifndef HN_USE_TXDESC_BUFRING 531 static int hn_use_txdesc_bufring = 0; 532 #else 533 static int hn_use_txdesc_bufring = 1; 534 #endif 535 SYSCTL_INT(_hw_hn, OID_AUTO, use_txdesc_bufring, CTLFLAG_RD, 536 &hn_use_txdesc_bufring, 0, "Use buf_ring for TX descriptors"); 537 538 #ifdef HN_IFSTART_SUPPORT 539 /* Use ifnet.if_start instead of ifnet.if_transmit */ 540 static int hn_use_if_start = 0; 541 SYSCTL_INT(_hw_hn, OID_AUTO, use_if_start, CTLFLAG_RDTUN, 542 &hn_use_if_start, 0, "Use if_start TX method"); 543 #endif 544 545 /* # of channels to use */ 546 static int hn_chan_cnt = 0; 547 SYSCTL_INT(_hw_hn, OID_AUTO, chan_cnt, CTLFLAG_RDTUN, 548 &hn_chan_cnt, 0, 549 "# of channels to use; each channel has one RX ring and one TX ring"); 550 551 /* # of transmit rings to use */ 552 static int hn_tx_ring_cnt = 0; 553 SYSCTL_INT(_hw_hn, OID_AUTO, tx_ring_cnt, CTLFLAG_RDTUN, 554 &hn_tx_ring_cnt, 0, "# of TX rings to use"); 555 556 /* Software TX ring deptch */ 557 static int hn_tx_swq_depth = 0; 558 SYSCTL_INT(_hw_hn, OID_AUTO, tx_swq_depth, CTLFLAG_RDTUN, 559 &hn_tx_swq_depth, 0, "Depth of IFQ or BUFRING"); 560 561 /* Enable sorted LRO, and the depth of the per-channel mbuf queue */ 562 #if __FreeBSD_version >= 1100095 563 static u_int hn_lro_mbufq_depth = 0; 564 SYSCTL_UINT(_hw_hn, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN, 565 &hn_lro_mbufq_depth, 0, "Depth of LRO mbuf queue"); 566 #endif 567 568 /* Packet transmission aggregation size limit */ 569 static int hn_tx_agg_size = -1; 570 SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_size, CTLFLAG_RDTUN, 571 &hn_tx_agg_size, 0, "Packet transmission aggregation size limit"); 572 573 /* Packet transmission aggregation count limit */ 574 static int hn_tx_agg_pkts = -1; 575 SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_pkts, CTLFLAG_RDTUN, 576 &hn_tx_agg_pkts, 0, "Packet transmission aggregation packet limit"); 577 578 /* VF list */ 579 SYSCTL_PROC(_hw_hn, OID_AUTO, vflist, CTLFLAG_RD | CTLTYPE_STRING, 580 0, 0, hn_vflist_sysctl, "A", "VF list"); 581 582 /* VF mapping */ 583 SYSCTL_PROC(_hw_hn, OID_AUTO, vfmap, CTLFLAG_RD | CTLTYPE_STRING, 584 0, 0, hn_vfmap_sysctl, "A", "VF mapping"); 585 586 /* Transparent VF */ 587 static int hn_xpnt_vf = 1; 588 SYSCTL_INT(_hw_hn, OID_AUTO, vf_transparent, CTLFLAG_RDTUN, 589 &hn_xpnt_vf, 0, "Transparent VF mod"); 590 591 /* Accurate BPF support for Transparent VF */ 592 static int hn_xpnt_vf_accbpf = 0; 593 SYSCTL_INT(_hw_hn, OID_AUTO, vf_xpnt_accbpf, CTLFLAG_RDTUN, 594 &hn_xpnt_vf_accbpf, 0, "Accurate BPF for transparent VF"); 595 596 /* Extra wait for transparent VF attach routing; unit seconds. */ 597 static int hn_xpnt_vf_attwait = HN_XPNT_VF_ATTWAIT_MIN; 598 SYSCTL_INT(_hw_hn, OID_AUTO, vf_xpnt_attwait, CTLFLAG_RWTUN, 599 &hn_xpnt_vf_attwait, 0, 600 "Extra wait for transparent VF attach routing; unit: seconds"); 601 602 static u_int hn_cpu_index; /* next CPU for channel */ 603 static struct taskqueue **hn_tx_taskque;/* shared TX taskqueues */ 604 605 static struct rmlock hn_vfmap_lock; 606 static int hn_vfmap_size; 607 static struct ifnet **hn_vfmap; 608 609 #ifndef RSS 610 static const uint8_t 611 hn_rss_key_default[NDIS_HASH_KEYSIZE_TOEPLITZ] = { 612 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 613 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 614 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, 615 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, 616 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa 617 }; 618 #endif /* !RSS */ 619 620 static const struct hyperv_guid hn_guid = { 621 .hv_guid = { 622 0x63, 0x51, 0x61, 0xf8, 0x3e, 0xdf, 0xc5, 0x46, 623 0x91, 0x3f, 0xf2, 0xd2, 0xf9, 0x65, 0xed, 0x0e } 624 }; 625 626 static device_method_t hn_methods[] = { 627 /* Device interface */ 628 DEVMETHOD(device_probe, hn_probe), 629 DEVMETHOD(device_attach, hn_attach), 630 DEVMETHOD(device_detach, hn_detach), 631 DEVMETHOD(device_shutdown, hn_shutdown), 632 DEVMETHOD_END 633 }; 634 635 static driver_t hn_driver = { 636 "hn", 637 hn_methods, 638 sizeof(struct hn_softc) 639 }; 640 641 static devclass_t hn_devclass; 642 643 DRIVER_MODULE(hn, vmbus, hn_driver, hn_devclass, 0, 0); 644 MODULE_VERSION(hn, 1); 645 MODULE_DEPEND(hn, vmbus, 1, 1, 1); 646 647 #if __FreeBSD_version >= 1100099 648 static void 649 hn_set_lro_lenlim(struct hn_softc *sc, int lenlim) 650 { 651 int i; 652 653 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 654 sc->hn_rx_ring[i].hn_lro.lro_length_lim = lenlim; 655 } 656 #endif 657 658 static int 659 hn_txpkt_sglist(struct hn_tx_ring *txr, struct hn_txdesc *txd) 660 { 661 662 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID && 663 txd->chim_size == 0, ("invalid rndis sglist txd")); 664 return (hn_nvs_send_rndis_sglist(txr->hn_chan, HN_NVS_RNDIS_MTYPE_DATA, 665 &txd->send_ctx, txr->hn_gpa, txr->hn_gpa_cnt)); 666 } 667 668 static int 669 hn_txpkt_chim(struct hn_tx_ring *txr, struct hn_txdesc *txd) 670 { 671 struct hn_nvs_rndis rndis; 672 673 KASSERT(txd->chim_index != HN_NVS_CHIM_IDX_INVALID && 674 txd->chim_size > 0, ("invalid rndis chim txd")); 675 676 rndis.nvs_type = HN_NVS_TYPE_RNDIS; 677 rndis.nvs_rndis_mtype = HN_NVS_RNDIS_MTYPE_DATA; 678 rndis.nvs_chim_idx = txd->chim_index; 679 rndis.nvs_chim_sz = txd->chim_size; 680 681 return (hn_nvs_send(txr->hn_chan, VMBUS_CHANPKT_FLAG_RC, 682 &rndis, sizeof(rndis), &txd->send_ctx)); 683 } 684 685 static __inline uint32_t 686 hn_chim_alloc(struct hn_softc *sc) 687 { 688 int i, bmap_cnt = sc->hn_chim_bmap_cnt; 689 u_long *bmap = sc->hn_chim_bmap; 690 uint32_t ret = HN_NVS_CHIM_IDX_INVALID; 691 692 for (i = 0; i < bmap_cnt; ++i) { 693 int idx; 694 695 idx = ffsl(~bmap[i]); 696 if (idx == 0) 697 continue; 698 699 --idx; /* ffsl is 1-based */ 700 KASSERT(i * LONG_BIT + idx < sc->hn_chim_cnt, 701 ("invalid i %d and idx %d", i, idx)); 702 703 if (atomic_testandset_long(&bmap[i], idx)) 704 continue; 705 706 ret = i * LONG_BIT + idx; 707 break; 708 } 709 return (ret); 710 } 711 712 static __inline void 713 hn_chim_free(struct hn_softc *sc, uint32_t chim_idx) 714 { 715 u_long mask; 716 uint32_t idx; 717 718 idx = chim_idx / LONG_BIT; 719 KASSERT(idx < sc->hn_chim_bmap_cnt, 720 ("invalid chimney index 0x%x", chim_idx)); 721 722 mask = 1UL << (chim_idx % LONG_BIT); 723 KASSERT(sc->hn_chim_bmap[idx] & mask, 724 ("index bitmap 0x%lx, chimney index %u, " 725 "bitmap idx %d, bitmask 0x%lx", 726 sc->hn_chim_bmap[idx], chim_idx, idx, mask)); 727 728 atomic_clear_long(&sc->hn_chim_bmap[idx], mask); 729 } 730 731 #if defined(INET6) || defined(INET) 732 733 #define PULLUP_HDR(m, len) \ 734 do { \ 735 if (__predict_false((m)->m_len < (len))) { \ 736 (m) = m_pullup((m), (len)); \ 737 if ((m) == NULL) \ 738 return (NULL); \ 739 } \ 740 } while (0) 741 742 /* 743 * NOTE: If this function failed, the m_head would be freed. 744 */ 745 static __inline struct mbuf * 746 hn_tso_fixup(struct mbuf *m_head) 747 { 748 struct ether_vlan_header *evl; 749 struct tcphdr *th; 750 int ehlen; 751 752 KASSERT(M_WRITABLE(m_head), ("TSO mbuf not writable")); 753 754 PULLUP_HDR(m_head, sizeof(*evl)); 755 evl = mtod(m_head, struct ether_vlan_header *); 756 if (evl->evl_encap_proto == ntohs(ETHERTYPE_VLAN)) 757 ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 758 else 759 ehlen = ETHER_HDR_LEN; 760 m_head->m_pkthdr.l2hlen = ehlen; 761 762 #ifdef INET 763 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) { 764 struct ip *ip; 765 int iphlen; 766 767 PULLUP_HDR(m_head, ehlen + sizeof(*ip)); 768 ip = mtodo(m_head, ehlen); 769 iphlen = ip->ip_hl << 2; 770 m_head->m_pkthdr.l3hlen = iphlen; 771 772 PULLUP_HDR(m_head, ehlen + iphlen + sizeof(*th)); 773 th = mtodo(m_head, ehlen + iphlen); 774 775 ip->ip_len = 0; 776 ip->ip_sum = 0; 777 th->th_sum = in_pseudo(ip->ip_src.s_addr, 778 ip->ip_dst.s_addr, htons(IPPROTO_TCP)); 779 } 780 #endif 781 #if defined(INET6) && defined(INET) 782 else 783 #endif 784 #ifdef INET6 785 { 786 struct ip6_hdr *ip6; 787 788 PULLUP_HDR(m_head, ehlen + sizeof(*ip6)); 789 ip6 = mtodo(m_head, ehlen); 790 if (ip6->ip6_nxt != IPPROTO_TCP) { 791 m_freem(m_head); 792 return (NULL); 793 } 794 m_head->m_pkthdr.l3hlen = sizeof(*ip6); 795 796 PULLUP_HDR(m_head, ehlen + sizeof(*ip6) + sizeof(*th)); 797 th = mtodo(m_head, ehlen + sizeof(*ip6)); 798 799 ip6->ip6_plen = 0; 800 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); 801 } 802 #endif 803 return (m_head); 804 } 805 806 /* 807 * NOTE: If this function failed, the m_head would be freed. 808 */ 809 static __inline struct mbuf * 810 hn_set_hlen(struct mbuf *m_head) 811 { 812 const struct ether_vlan_header *evl; 813 int ehlen; 814 815 PULLUP_HDR(m_head, sizeof(*evl)); 816 evl = mtod(m_head, const struct ether_vlan_header *); 817 if (evl->evl_encap_proto == ntohs(ETHERTYPE_VLAN)) 818 ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 819 else 820 ehlen = ETHER_HDR_LEN; 821 m_head->m_pkthdr.l2hlen = ehlen; 822 823 #ifdef INET 824 if (m_head->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP_UDP)) { 825 const struct ip *ip; 826 int iphlen; 827 828 PULLUP_HDR(m_head, ehlen + sizeof(*ip)); 829 ip = mtodo(m_head, ehlen); 830 iphlen = ip->ip_hl << 2; 831 m_head->m_pkthdr.l3hlen = iphlen; 832 833 /* 834 * UDP checksum offload does not work in Azure, if the 835 * following conditions meet: 836 * - sizeof(IP hdr + UDP hdr + payload) > 1420. 837 * - IP_DF is not set in the IP hdr. 838 * 839 * Fallback to software checksum for these UDP datagrams. 840 */ 841 if ((m_head->m_pkthdr.csum_flags & CSUM_IP_UDP) && 842 m_head->m_pkthdr.len > hn_udpcs_fixup_mtu + ehlen && 843 (ntohs(ip->ip_off) & IP_DF) == 0) { 844 uint16_t off = ehlen + iphlen; 845 846 counter_u64_add(hn_udpcs_fixup, 1); 847 PULLUP_HDR(m_head, off + sizeof(struct udphdr)); 848 *(uint16_t *)(m_head->m_data + off + 849 m_head->m_pkthdr.csum_data) = in_cksum_skip( 850 m_head, m_head->m_pkthdr.len, off); 851 m_head->m_pkthdr.csum_flags &= ~CSUM_IP_UDP; 852 } 853 } 854 #endif 855 #if defined(INET6) && defined(INET) 856 else 857 #endif 858 #ifdef INET6 859 { 860 const struct ip6_hdr *ip6; 861 862 PULLUP_HDR(m_head, ehlen + sizeof(*ip6)); 863 ip6 = mtodo(m_head, ehlen); 864 if (ip6->ip6_nxt != IPPROTO_TCP && 865 ip6->ip6_nxt != IPPROTO_UDP) { 866 m_freem(m_head); 867 return (NULL); 868 } 869 m_head->m_pkthdr.l3hlen = sizeof(*ip6); 870 } 871 #endif 872 return (m_head); 873 } 874 875 /* 876 * NOTE: If this function failed, the m_head would be freed. 877 */ 878 static __inline struct mbuf * 879 hn_check_tcpsyn(struct mbuf *m_head, int *tcpsyn) 880 { 881 const struct tcphdr *th; 882 int ehlen, iphlen; 883 884 *tcpsyn = 0; 885 ehlen = m_head->m_pkthdr.l2hlen; 886 iphlen = m_head->m_pkthdr.l3hlen; 887 888 PULLUP_HDR(m_head, ehlen + iphlen + sizeof(*th)); 889 th = mtodo(m_head, ehlen + iphlen); 890 if (th->th_flags & TH_SYN) 891 *tcpsyn = 1; 892 return (m_head); 893 } 894 895 #undef PULLUP_HDR 896 897 #endif /* INET6 || INET */ 898 899 static int 900 hn_set_rxfilter(struct hn_softc *sc, uint32_t filter) 901 { 902 int error = 0; 903 904 HN_LOCK_ASSERT(sc); 905 906 if (sc->hn_rx_filter != filter) { 907 error = hn_rndis_set_rxfilter(sc, filter); 908 if (!error) 909 sc->hn_rx_filter = filter; 910 } 911 return (error); 912 } 913 914 static int 915 hn_rxfilter_config(struct hn_softc *sc) 916 { 917 struct ifnet *ifp = sc->hn_ifp; 918 uint32_t filter; 919 920 HN_LOCK_ASSERT(sc); 921 922 /* 923 * If the non-transparent mode VF is activated, we don't know how 924 * its RX filter is configured, so stick the synthetic device in 925 * the promiscous mode. 926 */ 927 if ((ifp->if_flags & IFF_PROMISC) || (sc->hn_flags & HN_FLAG_RXVF)) { 928 filter = NDIS_PACKET_TYPE_PROMISCUOUS; 929 } else { 930 filter = NDIS_PACKET_TYPE_DIRECTED; 931 if (ifp->if_flags & IFF_BROADCAST) 932 filter |= NDIS_PACKET_TYPE_BROADCAST; 933 /* TODO: support multicast list */ 934 if ((ifp->if_flags & IFF_ALLMULTI) || 935 !CK_STAILQ_EMPTY(&ifp->if_multiaddrs)) 936 filter |= NDIS_PACKET_TYPE_ALL_MULTICAST; 937 } 938 return (hn_set_rxfilter(sc, filter)); 939 } 940 941 static void 942 hn_set_txagg(struct hn_softc *sc) 943 { 944 uint32_t size, pkts; 945 int i; 946 947 /* 948 * Setup aggregation size. 949 */ 950 if (sc->hn_agg_size < 0) 951 size = UINT32_MAX; 952 else 953 size = sc->hn_agg_size; 954 955 if (sc->hn_rndis_agg_size < size) 956 size = sc->hn_rndis_agg_size; 957 958 /* NOTE: We only aggregate packets using chimney sending buffers. */ 959 if (size > (uint32_t)sc->hn_chim_szmax) 960 size = sc->hn_chim_szmax; 961 962 if (size <= 2 * HN_PKTSIZE_MIN(sc->hn_rndis_agg_align)) { 963 /* Disable */ 964 size = 0; 965 pkts = 0; 966 goto done; 967 } 968 969 /* NOTE: Type of the per TX ring setting is 'int'. */ 970 if (size > INT_MAX) 971 size = INT_MAX; 972 973 /* 974 * Setup aggregation packet count. 975 */ 976 if (sc->hn_agg_pkts < 0) 977 pkts = UINT32_MAX; 978 else 979 pkts = sc->hn_agg_pkts; 980 981 if (sc->hn_rndis_agg_pkts < pkts) 982 pkts = sc->hn_rndis_agg_pkts; 983 984 if (pkts <= 1) { 985 /* Disable */ 986 size = 0; 987 pkts = 0; 988 goto done; 989 } 990 991 /* NOTE: Type of the per TX ring setting is 'short'. */ 992 if (pkts > SHRT_MAX) 993 pkts = SHRT_MAX; 994 995 done: 996 /* NOTE: Type of the per TX ring setting is 'short'. */ 997 if (sc->hn_rndis_agg_align > SHRT_MAX) { 998 /* Disable */ 999 size = 0; 1000 pkts = 0; 1001 } 1002 1003 if (bootverbose) { 1004 if_printf(sc->hn_ifp, "TX agg size %u, pkts %u, align %u\n", 1005 size, pkts, sc->hn_rndis_agg_align); 1006 } 1007 1008 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 1009 struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; 1010 1011 mtx_lock(&txr->hn_tx_lock); 1012 txr->hn_agg_szmax = size; 1013 txr->hn_agg_pktmax = pkts; 1014 txr->hn_agg_align = sc->hn_rndis_agg_align; 1015 mtx_unlock(&txr->hn_tx_lock); 1016 } 1017 } 1018 1019 static int 1020 hn_get_txswq_depth(const struct hn_tx_ring *txr) 1021 { 1022 1023 KASSERT(txr->hn_txdesc_cnt > 0, ("tx ring is not setup yet")); 1024 if (hn_tx_swq_depth < txr->hn_txdesc_cnt) 1025 return txr->hn_txdesc_cnt; 1026 return hn_tx_swq_depth; 1027 } 1028 1029 static int 1030 hn_rss_reconfig(struct hn_softc *sc) 1031 { 1032 int error; 1033 1034 HN_LOCK_ASSERT(sc); 1035 1036 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) 1037 return (ENXIO); 1038 1039 /* 1040 * Disable RSS first. 1041 * 1042 * NOTE: 1043 * Direct reconfiguration by setting the UNCHG flags does 1044 * _not_ work properly. 1045 */ 1046 if (bootverbose) 1047 if_printf(sc->hn_ifp, "disable RSS\n"); 1048 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_DISABLE); 1049 if (error) { 1050 if_printf(sc->hn_ifp, "RSS disable failed\n"); 1051 return (error); 1052 } 1053 1054 /* 1055 * Reenable the RSS w/ the updated RSS key or indirect 1056 * table. 1057 */ 1058 if (bootverbose) 1059 if_printf(sc->hn_ifp, "reconfig RSS\n"); 1060 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE); 1061 if (error) { 1062 if_printf(sc->hn_ifp, "RSS reconfig failed\n"); 1063 return (error); 1064 } 1065 return (0); 1066 } 1067 1068 static void 1069 hn_rss_ind_fixup(struct hn_softc *sc) 1070 { 1071 struct ndis_rssprm_toeplitz *rss = &sc->hn_rss; 1072 int i, nchan; 1073 1074 nchan = sc->hn_rx_ring_inuse; 1075 KASSERT(nchan > 1, ("invalid # of channels %d", nchan)); 1076 1077 /* 1078 * Check indirect table to make sure that all channels in it 1079 * can be used. 1080 */ 1081 for (i = 0; i < NDIS_HASH_INDCNT; ++i) { 1082 if (rss->rss_ind[i] >= nchan) { 1083 if_printf(sc->hn_ifp, 1084 "RSS indirect table %d fixup: %u -> %d\n", 1085 i, rss->rss_ind[i], nchan - 1); 1086 rss->rss_ind[i] = nchan - 1; 1087 } 1088 } 1089 } 1090 1091 static int 1092 hn_ifmedia_upd(struct ifnet *ifp __unused) 1093 { 1094 1095 return EOPNOTSUPP; 1096 } 1097 1098 static void 1099 hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) 1100 { 1101 struct hn_softc *sc = ifp->if_softc; 1102 1103 ifmr->ifm_status = IFM_AVALID; 1104 ifmr->ifm_active = IFM_ETHER; 1105 1106 if ((sc->hn_link_flags & HN_LINK_FLAG_LINKUP) == 0) { 1107 ifmr->ifm_active |= IFM_NONE; 1108 return; 1109 } 1110 ifmr->ifm_status |= IFM_ACTIVE; 1111 ifmr->ifm_active |= IFM_10G_T | IFM_FDX; 1112 } 1113 1114 static void 1115 hn_rxvf_set_task(void *xarg, int pending __unused) 1116 { 1117 struct hn_rxvf_setarg *arg = xarg; 1118 1119 arg->rxr->hn_rxvf_ifp = arg->vf_ifp; 1120 } 1121 1122 static void 1123 hn_rxvf_set(struct hn_softc *sc, struct ifnet *vf_ifp) 1124 { 1125 struct hn_rx_ring *rxr; 1126 struct hn_rxvf_setarg arg; 1127 struct task task; 1128 int i; 1129 1130 HN_LOCK_ASSERT(sc); 1131 1132 TASK_INIT(&task, 0, hn_rxvf_set_task, &arg); 1133 1134 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 1135 rxr = &sc->hn_rx_ring[i]; 1136 1137 if (i < sc->hn_rx_ring_inuse) { 1138 arg.rxr = rxr; 1139 arg.vf_ifp = vf_ifp; 1140 vmbus_chan_run_task(rxr->hn_chan, &task); 1141 } else { 1142 rxr->hn_rxvf_ifp = vf_ifp; 1143 } 1144 } 1145 } 1146 1147 static bool 1148 hn_ismyvf(const struct hn_softc *sc, const struct ifnet *ifp) 1149 { 1150 const struct ifnet *hn_ifp; 1151 1152 hn_ifp = sc->hn_ifp; 1153 1154 if (ifp == hn_ifp) 1155 return (false); 1156 1157 if (ifp->if_alloctype != IFT_ETHER) 1158 return (false); 1159 1160 /* Ignore lagg/vlan interfaces */ 1161 if (strcmp(ifp->if_dname, "lagg") == 0 || 1162 strcmp(ifp->if_dname, "vlan") == 0) 1163 return (false); 1164 1165 /* 1166 * During detach events ifp->if_addr might be NULL. 1167 * Make sure the bcmp() below doesn't panic on that: 1168 */ 1169 if (ifp->if_addr == NULL || hn_ifp->if_addr == NULL) 1170 return (false); 1171 1172 if (bcmp(IF_LLADDR(ifp), IF_LLADDR(hn_ifp), ETHER_ADDR_LEN) != 0) 1173 return (false); 1174 1175 return (true); 1176 } 1177 1178 static void 1179 hn_rxvf_change(struct hn_softc *sc, struct ifnet *ifp, bool rxvf) 1180 { 1181 struct ifnet *hn_ifp; 1182 1183 HN_LOCK(sc); 1184 1185 if (!(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED)) 1186 goto out; 1187 1188 if (!hn_ismyvf(sc, ifp)) 1189 goto out; 1190 hn_ifp = sc->hn_ifp; 1191 1192 if (rxvf) { 1193 if (sc->hn_flags & HN_FLAG_RXVF) 1194 goto out; 1195 1196 sc->hn_flags |= HN_FLAG_RXVF; 1197 hn_rxfilter_config(sc); 1198 } else { 1199 if (!(sc->hn_flags & HN_FLAG_RXVF)) 1200 goto out; 1201 1202 sc->hn_flags &= ~HN_FLAG_RXVF; 1203 if (hn_ifp->if_drv_flags & IFF_DRV_RUNNING) 1204 hn_rxfilter_config(sc); 1205 else 1206 hn_set_rxfilter(sc, NDIS_PACKET_TYPE_NONE); 1207 } 1208 1209 hn_nvs_set_datapath(sc, 1210 rxvf ? HN_NVS_DATAPATH_VF : HN_NVS_DATAPATH_SYNTH); 1211 1212 hn_rxvf_set(sc, rxvf ? ifp : NULL); 1213 1214 if (rxvf) { 1215 hn_vf_rss_fixup(sc, true); 1216 hn_suspend_mgmt(sc); 1217 sc->hn_link_flags &= 1218 ~(HN_LINK_FLAG_LINKUP | HN_LINK_FLAG_NETCHG); 1219 if_link_state_change(hn_ifp, LINK_STATE_DOWN); 1220 } else { 1221 hn_vf_rss_restore(sc); 1222 hn_resume_mgmt(sc); 1223 } 1224 1225 devctl_notify("HYPERV_NIC_VF", hn_ifp->if_xname, 1226 rxvf ? "VF_UP" : "VF_DOWN", NULL); 1227 1228 if (bootverbose) { 1229 if_printf(hn_ifp, "datapath is switched %s %s\n", 1230 rxvf ? "to" : "from", ifp->if_xname); 1231 } 1232 out: 1233 HN_UNLOCK(sc); 1234 } 1235 1236 static void 1237 hn_ifnet_event(void *arg, struct ifnet *ifp, int event) 1238 { 1239 1240 if (event != IFNET_EVENT_UP && event != IFNET_EVENT_DOWN) 1241 return; 1242 hn_rxvf_change(arg, ifp, event == IFNET_EVENT_UP); 1243 } 1244 1245 static void 1246 hn_ifaddr_event(void *arg, struct ifnet *ifp) 1247 { 1248 1249 hn_rxvf_change(arg, ifp, ifp->if_flags & IFF_UP); 1250 } 1251 1252 static int 1253 hn_xpnt_vf_iocsetcaps(struct hn_softc *sc, struct ifreq *ifr) 1254 { 1255 struct ifnet *ifp, *vf_ifp; 1256 uint64_t tmp; 1257 int error; 1258 1259 HN_LOCK_ASSERT(sc); 1260 ifp = sc->hn_ifp; 1261 vf_ifp = sc->hn_vf_ifp; 1262 1263 /* 1264 * Fix up requested capabilities w/ supported capabilities, 1265 * since the supported capabilities could have been changed. 1266 */ 1267 ifr->ifr_reqcap &= ifp->if_capabilities; 1268 /* Pass SIOCSIFCAP to VF. */ 1269 error = vf_ifp->if_ioctl(vf_ifp, SIOCSIFCAP, (caddr_t)ifr); 1270 1271 /* 1272 * NOTE: 1273 * The error will be propagated to the callers, however, it 1274 * is _not_ useful here. 1275 */ 1276 1277 /* 1278 * Merge VF's enabled capabilities. 1279 */ 1280 ifp->if_capenable = vf_ifp->if_capenable & ifp->if_capabilities; 1281 1282 tmp = vf_ifp->if_hwassist & HN_CSUM_IP_HWASSIST(sc); 1283 if (ifp->if_capenable & IFCAP_TXCSUM) 1284 ifp->if_hwassist |= tmp; 1285 else 1286 ifp->if_hwassist &= ~tmp; 1287 1288 tmp = vf_ifp->if_hwassist & HN_CSUM_IP6_HWASSIST(sc); 1289 if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) 1290 ifp->if_hwassist |= tmp; 1291 else 1292 ifp->if_hwassist &= ~tmp; 1293 1294 tmp = vf_ifp->if_hwassist & CSUM_IP_TSO; 1295 if (ifp->if_capenable & IFCAP_TSO4) 1296 ifp->if_hwassist |= tmp; 1297 else 1298 ifp->if_hwassist &= ~tmp; 1299 1300 tmp = vf_ifp->if_hwassist & CSUM_IP6_TSO; 1301 if (ifp->if_capenable & IFCAP_TSO6) 1302 ifp->if_hwassist |= tmp; 1303 else 1304 ifp->if_hwassist &= ~tmp; 1305 1306 return (error); 1307 } 1308 1309 static int 1310 hn_xpnt_vf_iocsetflags(struct hn_softc *sc) 1311 { 1312 struct ifnet *vf_ifp; 1313 struct ifreq ifr; 1314 1315 HN_LOCK_ASSERT(sc); 1316 vf_ifp = sc->hn_vf_ifp; 1317 1318 memset(&ifr, 0, sizeof(ifr)); 1319 strlcpy(ifr.ifr_name, vf_ifp->if_xname, sizeof(ifr.ifr_name)); 1320 ifr.ifr_flags = vf_ifp->if_flags & 0xffff; 1321 ifr.ifr_flagshigh = vf_ifp->if_flags >> 16; 1322 return (vf_ifp->if_ioctl(vf_ifp, SIOCSIFFLAGS, (caddr_t)&ifr)); 1323 } 1324 1325 static void 1326 hn_xpnt_vf_saveifflags(struct hn_softc *sc) 1327 { 1328 struct ifnet *ifp = sc->hn_ifp; 1329 int allmulti = 0; 1330 1331 HN_LOCK_ASSERT(sc); 1332 1333 /* XXX vlan(4) style mcast addr maintenance */ 1334 if (!CK_STAILQ_EMPTY(&ifp->if_multiaddrs)) 1335 allmulti = IFF_ALLMULTI; 1336 1337 /* Always set the VF's if_flags */ 1338 sc->hn_vf_ifp->if_flags = ifp->if_flags | allmulti; 1339 } 1340 1341 static void 1342 hn_xpnt_vf_input(struct ifnet *vf_ifp, struct mbuf *m) 1343 { 1344 struct rm_priotracker pt; 1345 struct ifnet *hn_ifp = NULL; 1346 struct mbuf *mn; 1347 1348 /* 1349 * XXX racy, if hn(4) ever detached. 1350 */ 1351 rm_rlock(&hn_vfmap_lock, &pt); 1352 if (vf_ifp->if_index < hn_vfmap_size) 1353 hn_ifp = hn_vfmap[vf_ifp->if_index]; 1354 rm_runlock(&hn_vfmap_lock, &pt); 1355 1356 if (hn_ifp != NULL) { 1357 for (mn = m; mn != NULL; mn = mn->m_nextpkt) { 1358 /* 1359 * Allow tapping on the VF. 1360 */ 1361 ETHER_BPF_MTAP(vf_ifp, mn); 1362 1363 /* 1364 * Update VF stats. 1365 */ 1366 if ((vf_ifp->if_capenable & IFCAP_HWSTATS) == 0) { 1367 if_inc_counter(vf_ifp, IFCOUNTER_IBYTES, 1368 mn->m_pkthdr.len); 1369 } 1370 /* 1371 * XXX IFCOUNTER_IMCAST 1372 * This stat updating is kinda invasive, since it 1373 * requires two checks on the mbuf: the length check 1374 * and the ethernet header check. As of this write, 1375 * all multicast packets go directly to hn(4), which 1376 * makes imcast stat updating in the VF a try in vian. 1377 */ 1378 1379 /* 1380 * Fix up rcvif and increase hn(4)'s ipackets. 1381 */ 1382 mn->m_pkthdr.rcvif = hn_ifp; 1383 if_inc_counter(hn_ifp, IFCOUNTER_IPACKETS, 1); 1384 } 1385 /* 1386 * Go through hn(4)'s if_input. 1387 */ 1388 hn_ifp->if_input(hn_ifp, m); 1389 } else { 1390 /* 1391 * In the middle of the transition; free this 1392 * mbuf chain. 1393 */ 1394 while (m != NULL) { 1395 mn = m->m_nextpkt; 1396 m->m_nextpkt = NULL; 1397 m_freem(m); 1398 m = mn; 1399 } 1400 } 1401 } 1402 1403 static void 1404 hn_mtu_change_fixup(struct hn_softc *sc) 1405 { 1406 struct ifnet *ifp; 1407 1408 HN_LOCK_ASSERT(sc); 1409 ifp = sc->hn_ifp; 1410 1411 hn_set_tso_maxsize(sc, hn_tso_maxlen, ifp->if_mtu); 1412 #if __FreeBSD_version >= 1100099 1413 if (sc->hn_rx_ring[0].hn_lro.lro_length_lim < HN_LRO_LENLIM_MIN(ifp)) 1414 hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MIN(ifp)); 1415 #endif 1416 } 1417 1418 static uint32_t 1419 hn_rss_type_fromndis(uint32_t rss_hash) 1420 { 1421 uint32_t types = 0; 1422 1423 if (rss_hash & NDIS_HASH_IPV4) 1424 types |= RSS_TYPE_IPV4; 1425 if (rss_hash & NDIS_HASH_TCP_IPV4) 1426 types |= RSS_TYPE_TCP_IPV4; 1427 if (rss_hash & NDIS_HASH_IPV6) 1428 types |= RSS_TYPE_IPV6; 1429 if (rss_hash & NDIS_HASH_IPV6_EX) 1430 types |= RSS_TYPE_IPV6_EX; 1431 if (rss_hash & NDIS_HASH_TCP_IPV6) 1432 types |= RSS_TYPE_TCP_IPV6; 1433 if (rss_hash & NDIS_HASH_TCP_IPV6_EX) 1434 types |= RSS_TYPE_TCP_IPV6_EX; 1435 if (rss_hash & NDIS_HASH_UDP_IPV4_X) 1436 types |= RSS_TYPE_UDP_IPV4; 1437 return (types); 1438 } 1439 1440 static uint32_t 1441 hn_rss_type_tondis(uint32_t types) 1442 { 1443 uint32_t rss_hash = 0; 1444 1445 KASSERT((types & (RSS_TYPE_UDP_IPV6 | RSS_TYPE_UDP_IPV6_EX)) == 0, 1446 ("UDP6 and UDP6EX are not supported")); 1447 1448 if (types & RSS_TYPE_IPV4) 1449 rss_hash |= NDIS_HASH_IPV4; 1450 if (types & RSS_TYPE_TCP_IPV4) 1451 rss_hash |= NDIS_HASH_TCP_IPV4; 1452 if (types & RSS_TYPE_IPV6) 1453 rss_hash |= NDIS_HASH_IPV6; 1454 if (types & RSS_TYPE_IPV6_EX) 1455 rss_hash |= NDIS_HASH_IPV6_EX; 1456 if (types & RSS_TYPE_TCP_IPV6) 1457 rss_hash |= NDIS_HASH_TCP_IPV6; 1458 if (types & RSS_TYPE_TCP_IPV6_EX) 1459 rss_hash |= NDIS_HASH_TCP_IPV6_EX; 1460 if (types & RSS_TYPE_UDP_IPV4) 1461 rss_hash |= NDIS_HASH_UDP_IPV4_X; 1462 return (rss_hash); 1463 } 1464 1465 static void 1466 hn_rss_mbuf_hash(struct hn_softc *sc, uint32_t mbuf_hash) 1467 { 1468 int i; 1469 1470 HN_LOCK_ASSERT(sc); 1471 1472 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 1473 sc->hn_rx_ring[i].hn_mbuf_hash = mbuf_hash; 1474 } 1475 1476 static void 1477 hn_vf_rss_fixup(struct hn_softc *sc, bool reconf) 1478 { 1479 struct ifnet *ifp, *vf_ifp; 1480 struct ifrsshash ifrh; 1481 struct ifrsskey ifrk; 1482 int error; 1483 uint32_t my_types, diff_types, mbuf_types = 0; 1484 1485 HN_LOCK_ASSERT(sc); 1486 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, 1487 ("%s: synthetic parts are not attached", sc->hn_ifp->if_xname)); 1488 1489 if (sc->hn_rx_ring_inuse == 1) { 1490 /* No RSS on synthetic parts; done. */ 1491 return; 1492 } 1493 if ((sc->hn_rss_hcap & NDIS_HASH_FUNCTION_TOEPLITZ) == 0) { 1494 /* Synthetic parts do not support Toeplitz; done. */ 1495 return; 1496 } 1497 1498 ifp = sc->hn_ifp; 1499 vf_ifp = sc->hn_vf_ifp; 1500 1501 /* 1502 * Extract VF's RSS key. Only 40 bytes key for Toeplitz is 1503 * supported. 1504 */ 1505 memset(&ifrk, 0, sizeof(ifrk)); 1506 strlcpy(ifrk.ifrk_name, vf_ifp->if_xname, sizeof(ifrk.ifrk_name)); 1507 error = vf_ifp->if_ioctl(vf_ifp, SIOCGIFRSSKEY, (caddr_t)&ifrk); 1508 if (error) { 1509 if_printf(ifp, "%s SIOCGIFRSSKEY failed: %d\n", 1510 vf_ifp->if_xname, error); 1511 goto done; 1512 } 1513 if (ifrk.ifrk_func != RSS_FUNC_TOEPLITZ) { 1514 if_printf(ifp, "%s RSS function %u is not Toeplitz\n", 1515 vf_ifp->if_xname, ifrk.ifrk_func); 1516 goto done; 1517 } 1518 if (ifrk.ifrk_keylen != NDIS_HASH_KEYSIZE_TOEPLITZ) { 1519 if_printf(ifp, "%s invalid RSS Toeplitz key length %d\n", 1520 vf_ifp->if_xname, ifrk.ifrk_keylen); 1521 goto done; 1522 } 1523 1524 /* 1525 * Extract VF's RSS hash. Only Toeplitz is supported. 1526 */ 1527 memset(&ifrh, 0, sizeof(ifrh)); 1528 strlcpy(ifrh.ifrh_name, vf_ifp->if_xname, sizeof(ifrh.ifrh_name)); 1529 error = vf_ifp->if_ioctl(vf_ifp, SIOCGIFRSSHASH, (caddr_t)&ifrh); 1530 if (error) { 1531 if_printf(ifp, "%s SIOCGRSSHASH failed: %d\n", 1532 vf_ifp->if_xname, error); 1533 goto done; 1534 } 1535 if (ifrh.ifrh_func != RSS_FUNC_TOEPLITZ) { 1536 if_printf(ifp, "%s RSS function %u is not Toeplitz\n", 1537 vf_ifp->if_xname, ifrh.ifrh_func); 1538 goto done; 1539 } 1540 1541 my_types = hn_rss_type_fromndis(sc->hn_rss_hcap); 1542 if ((ifrh.ifrh_types & my_types) == 0) { 1543 /* This disables RSS; ignore it then */ 1544 if_printf(ifp, "%s intersection of RSS types failed. " 1545 "VF %#x, mine %#x\n", vf_ifp->if_xname, 1546 ifrh.ifrh_types, my_types); 1547 goto done; 1548 } 1549 1550 diff_types = my_types ^ ifrh.ifrh_types; 1551 my_types &= ifrh.ifrh_types; 1552 mbuf_types = my_types; 1553 1554 /* 1555 * Detect RSS hash value/type confliction. 1556 * 1557 * NOTE: 1558 * We don't disable the hash type, but stop delivery the hash 1559 * value/type through mbufs on RX path. 1560 * 1561 * XXX If HN_CAP_UDPHASH is set in hn_caps, then UDP 4-tuple 1562 * hash is delivered with type of TCP_IPV4. This means if 1563 * UDP_IPV4 is enabled, then TCP_IPV4 should be forced, at 1564 * least to hn_mbuf_hash. However, given that _all_ of the 1565 * NICs implement TCP_IPV4, this will _not_ impose any issues 1566 * here. 1567 */ 1568 if ((my_types & RSS_TYPE_IPV4) && 1569 (diff_types & ifrh.ifrh_types & 1570 (RSS_TYPE_TCP_IPV4 | RSS_TYPE_UDP_IPV4))) { 1571 /* Conflict; disable IPV4 hash type/value delivery. */ 1572 if_printf(ifp, "disable IPV4 mbuf hash delivery\n"); 1573 mbuf_types &= ~RSS_TYPE_IPV4; 1574 } 1575 if ((my_types & RSS_TYPE_IPV6) && 1576 (diff_types & ifrh.ifrh_types & 1577 (RSS_TYPE_TCP_IPV6 | RSS_TYPE_UDP_IPV6 | 1578 RSS_TYPE_TCP_IPV6_EX | RSS_TYPE_UDP_IPV6_EX | 1579 RSS_TYPE_IPV6_EX))) { 1580 /* Conflict; disable IPV6 hash type/value delivery. */ 1581 if_printf(ifp, "disable IPV6 mbuf hash delivery\n"); 1582 mbuf_types &= ~RSS_TYPE_IPV6; 1583 } 1584 if ((my_types & RSS_TYPE_IPV6_EX) && 1585 (diff_types & ifrh.ifrh_types & 1586 (RSS_TYPE_TCP_IPV6 | RSS_TYPE_UDP_IPV6 | 1587 RSS_TYPE_TCP_IPV6_EX | RSS_TYPE_UDP_IPV6_EX | 1588 RSS_TYPE_IPV6))) { 1589 /* Conflict; disable IPV6_EX hash type/value delivery. */ 1590 if_printf(ifp, "disable IPV6_EX mbuf hash delivery\n"); 1591 mbuf_types &= ~RSS_TYPE_IPV6_EX; 1592 } 1593 if ((my_types & RSS_TYPE_TCP_IPV6) && 1594 (diff_types & ifrh.ifrh_types & RSS_TYPE_TCP_IPV6_EX)) { 1595 /* Conflict; disable TCP_IPV6 hash type/value delivery. */ 1596 if_printf(ifp, "disable TCP_IPV6 mbuf hash delivery\n"); 1597 mbuf_types &= ~RSS_TYPE_TCP_IPV6; 1598 } 1599 if ((my_types & RSS_TYPE_TCP_IPV6_EX) && 1600 (diff_types & ifrh.ifrh_types & RSS_TYPE_TCP_IPV6)) { 1601 /* Conflict; disable TCP_IPV6_EX hash type/value delivery. */ 1602 if_printf(ifp, "disable TCP_IPV6_EX mbuf hash delivery\n"); 1603 mbuf_types &= ~RSS_TYPE_TCP_IPV6_EX; 1604 } 1605 if ((my_types & RSS_TYPE_UDP_IPV6) && 1606 (diff_types & ifrh.ifrh_types & RSS_TYPE_UDP_IPV6_EX)) { 1607 /* Conflict; disable UDP_IPV6 hash type/value delivery. */ 1608 if_printf(ifp, "disable UDP_IPV6 mbuf hash delivery\n"); 1609 mbuf_types &= ~RSS_TYPE_UDP_IPV6; 1610 } 1611 if ((my_types & RSS_TYPE_UDP_IPV6_EX) && 1612 (diff_types & ifrh.ifrh_types & RSS_TYPE_UDP_IPV6)) { 1613 /* Conflict; disable UDP_IPV6_EX hash type/value delivery. */ 1614 if_printf(ifp, "disable UDP_IPV6_EX mbuf hash delivery\n"); 1615 mbuf_types &= ~RSS_TYPE_UDP_IPV6_EX; 1616 } 1617 1618 /* 1619 * Indirect table does not matter. 1620 */ 1621 1622 sc->hn_rss_hash = (sc->hn_rss_hcap & NDIS_HASH_FUNCTION_MASK) | 1623 hn_rss_type_tondis(my_types); 1624 memcpy(sc->hn_rss.rss_key, ifrk.ifrk_key, sizeof(sc->hn_rss.rss_key)); 1625 sc->hn_flags |= HN_FLAG_HAS_RSSKEY; 1626 1627 if (reconf) { 1628 error = hn_rss_reconfig(sc); 1629 if (error) { 1630 /* XXX roll-back? */ 1631 if_printf(ifp, "hn_rss_reconfig failed: %d\n", error); 1632 /* XXX keep going. */ 1633 } 1634 } 1635 done: 1636 /* Hash deliverability for mbufs. */ 1637 hn_rss_mbuf_hash(sc, hn_rss_type_tondis(mbuf_types)); 1638 } 1639 1640 static void 1641 hn_vf_rss_restore(struct hn_softc *sc) 1642 { 1643 1644 HN_LOCK_ASSERT(sc); 1645 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, 1646 ("%s: synthetic parts are not attached", sc->hn_ifp->if_xname)); 1647 1648 if (sc->hn_rx_ring_inuse == 1) 1649 goto done; 1650 1651 /* 1652 * Restore hash types. Key does _not_ matter. 1653 */ 1654 if (sc->hn_rss_hash != sc->hn_rss_hcap) { 1655 int error; 1656 1657 sc->hn_rss_hash = sc->hn_rss_hcap; 1658 error = hn_rss_reconfig(sc); 1659 if (error) { 1660 if_printf(sc->hn_ifp, "hn_rss_reconfig failed: %d\n", 1661 error); 1662 /* XXX keep going. */ 1663 } 1664 } 1665 done: 1666 /* Hash deliverability for mbufs. */ 1667 hn_rss_mbuf_hash(sc, NDIS_HASH_ALL); 1668 } 1669 1670 static void 1671 hn_xpnt_vf_setready(struct hn_softc *sc) 1672 { 1673 struct ifnet *ifp, *vf_ifp; 1674 struct ifreq ifr; 1675 1676 HN_LOCK_ASSERT(sc); 1677 ifp = sc->hn_ifp; 1678 vf_ifp = sc->hn_vf_ifp; 1679 1680 /* 1681 * Mark the VF ready. 1682 */ 1683 sc->hn_vf_rdytick = 0; 1684 1685 /* 1686 * Save information for restoration. 1687 */ 1688 sc->hn_saved_caps = ifp->if_capabilities; 1689 sc->hn_saved_tsomax = ifp->if_hw_tsomax; 1690 sc->hn_saved_tsosegcnt = ifp->if_hw_tsomaxsegcount; 1691 sc->hn_saved_tsosegsz = ifp->if_hw_tsomaxsegsize; 1692 1693 /* 1694 * Intersect supported/enabled capabilities. 1695 * 1696 * NOTE: 1697 * if_hwassist is not changed here. 1698 */ 1699 ifp->if_capabilities &= vf_ifp->if_capabilities; 1700 ifp->if_capenable &= ifp->if_capabilities; 1701 1702 /* 1703 * Fix TSO settings. 1704 */ 1705 if (ifp->if_hw_tsomax > vf_ifp->if_hw_tsomax) 1706 ifp->if_hw_tsomax = vf_ifp->if_hw_tsomax; 1707 if (ifp->if_hw_tsomaxsegcount > vf_ifp->if_hw_tsomaxsegcount) 1708 ifp->if_hw_tsomaxsegcount = vf_ifp->if_hw_tsomaxsegcount; 1709 if (ifp->if_hw_tsomaxsegsize > vf_ifp->if_hw_tsomaxsegsize) 1710 ifp->if_hw_tsomaxsegsize = vf_ifp->if_hw_tsomaxsegsize; 1711 1712 /* 1713 * Change VF's enabled capabilities. 1714 */ 1715 memset(&ifr, 0, sizeof(ifr)); 1716 strlcpy(ifr.ifr_name, vf_ifp->if_xname, sizeof(ifr.ifr_name)); 1717 ifr.ifr_reqcap = ifp->if_capenable; 1718 hn_xpnt_vf_iocsetcaps(sc, &ifr); 1719 1720 if (ifp->if_mtu != ETHERMTU) { 1721 int error; 1722 1723 /* 1724 * Change VF's MTU. 1725 */ 1726 memset(&ifr, 0, sizeof(ifr)); 1727 strlcpy(ifr.ifr_name, vf_ifp->if_xname, sizeof(ifr.ifr_name)); 1728 ifr.ifr_mtu = ifp->if_mtu; 1729 error = vf_ifp->if_ioctl(vf_ifp, SIOCSIFMTU, (caddr_t)&ifr); 1730 if (error) { 1731 if_printf(ifp, "%s SIOCSIFMTU %u failed\n", 1732 vf_ifp->if_xname, ifp->if_mtu); 1733 if (ifp->if_mtu > ETHERMTU) { 1734 if_printf(ifp, "change MTU to %d\n", ETHERMTU); 1735 1736 /* 1737 * XXX 1738 * No need to adjust the synthetic parts' MTU; 1739 * failure of the adjustment will cause us 1740 * infinite headache. 1741 */ 1742 ifp->if_mtu = ETHERMTU; 1743 hn_mtu_change_fixup(sc); 1744 } 1745 } 1746 } 1747 } 1748 1749 static bool 1750 hn_xpnt_vf_isready(struct hn_softc *sc) 1751 { 1752 1753 HN_LOCK_ASSERT(sc); 1754 1755 if (!hn_xpnt_vf || sc->hn_vf_ifp == NULL) 1756 return (false); 1757 1758 if (sc->hn_vf_rdytick == 0) 1759 return (true); 1760 1761 if (sc->hn_vf_rdytick > ticks) 1762 return (false); 1763 1764 /* Mark VF as ready. */ 1765 hn_xpnt_vf_setready(sc); 1766 return (true); 1767 } 1768 1769 static void 1770 hn_xpnt_vf_setenable(struct hn_softc *sc) 1771 { 1772 int i; 1773 1774 HN_LOCK_ASSERT(sc); 1775 1776 /* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */ 1777 rm_wlock(&sc->hn_vf_lock); 1778 sc->hn_xvf_flags |= HN_XVFFLAG_ENABLED; 1779 rm_wunlock(&sc->hn_vf_lock); 1780 1781 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 1782 sc->hn_rx_ring[i].hn_rx_flags |= HN_RX_FLAG_XPNT_VF; 1783 } 1784 1785 static void 1786 hn_xpnt_vf_setdisable(struct hn_softc *sc, bool clear_vf) 1787 { 1788 int i; 1789 1790 HN_LOCK_ASSERT(sc); 1791 1792 /* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */ 1793 rm_wlock(&sc->hn_vf_lock); 1794 sc->hn_xvf_flags &= ~HN_XVFFLAG_ENABLED; 1795 if (clear_vf) 1796 sc->hn_vf_ifp = NULL; 1797 rm_wunlock(&sc->hn_vf_lock); 1798 1799 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 1800 sc->hn_rx_ring[i].hn_rx_flags &= ~HN_RX_FLAG_XPNT_VF; 1801 } 1802 1803 static void 1804 hn_xpnt_vf_init(struct hn_softc *sc) 1805 { 1806 int error; 1807 1808 HN_LOCK_ASSERT(sc); 1809 1810 KASSERT((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) == 0, 1811 ("%s: transparent VF was enabled", sc->hn_ifp->if_xname)); 1812 1813 if (bootverbose) { 1814 if_printf(sc->hn_ifp, "try bringing up %s\n", 1815 sc->hn_vf_ifp->if_xname); 1816 } 1817 1818 /* 1819 * Bring the VF up. 1820 */ 1821 hn_xpnt_vf_saveifflags(sc); 1822 sc->hn_vf_ifp->if_flags |= IFF_UP; 1823 error = hn_xpnt_vf_iocsetflags(sc); 1824 if (error) { 1825 if_printf(sc->hn_ifp, "bringing up %s failed: %d\n", 1826 sc->hn_vf_ifp->if_xname, error); 1827 return; 1828 } 1829 1830 /* 1831 * NOTE: 1832 * Datapath setting must happen _after_ bringing the VF up. 1833 */ 1834 hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_VF); 1835 1836 /* 1837 * NOTE: 1838 * Fixup RSS related bits _after_ the VF is brought up, since 1839 * many VFs generate RSS key during it's initialization. 1840 */ 1841 hn_vf_rss_fixup(sc, true); 1842 1843 /* Mark transparent mode VF as enabled. */ 1844 hn_xpnt_vf_setenable(sc); 1845 } 1846 1847 static void 1848 hn_xpnt_vf_init_taskfunc(void *xsc, int pending __unused) 1849 { 1850 struct hn_softc *sc = xsc; 1851 1852 HN_LOCK(sc); 1853 1854 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) 1855 goto done; 1856 if (sc->hn_vf_ifp == NULL) 1857 goto done; 1858 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) 1859 goto done; 1860 1861 if (sc->hn_vf_rdytick != 0) { 1862 /* Mark VF as ready. */ 1863 hn_xpnt_vf_setready(sc); 1864 } 1865 1866 if (sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) { 1867 /* 1868 * Delayed VF initialization. 1869 */ 1870 if (bootverbose) { 1871 if_printf(sc->hn_ifp, "delayed initialize %s\n", 1872 sc->hn_vf_ifp->if_xname); 1873 } 1874 hn_xpnt_vf_init(sc); 1875 } 1876 done: 1877 HN_UNLOCK(sc); 1878 } 1879 1880 static void 1881 hn_ifnet_attevent(void *xsc, struct ifnet *ifp) 1882 { 1883 struct hn_softc *sc = xsc; 1884 1885 HN_LOCK(sc); 1886 1887 if (!(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED)) 1888 goto done; 1889 1890 if (!hn_ismyvf(sc, ifp)) 1891 goto done; 1892 1893 if (sc->hn_vf_ifp != NULL) { 1894 if_printf(sc->hn_ifp, "%s was attached as VF\n", 1895 sc->hn_vf_ifp->if_xname); 1896 goto done; 1897 } 1898 1899 if (hn_xpnt_vf && ifp->if_start != NULL) { 1900 /* 1901 * ifnet.if_start is _not_ supported by transparent 1902 * mode VF; mainly due to the IFF_DRV_OACTIVE flag. 1903 */ 1904 if_printf(sc->hn_ifp, "%s uses if_start, which is unsupported " 1905 "in transparent VF mode.\n", ifp->if_xname); 1906 goto done; 1907 } 1908 1909 rm_wlock(&hn_vfmap_lock); 1910 1911 if (ifp->if_index >= hn_vfmap_size) { 1912 struct ifnet **newmap; 1913 int newsize; 1914 1915 newsize = ifp->if_index + HN_VFMAP_SIZE_DEF; 1916 newmap = malloc(sizeof(struct ifnet *) * newsize, M_DEVBUF, 1917 M_WAITOK | M_ZERO); 1918 1919 memcpy(newmap, hn_vfmap, 1920 sizeof(struct ifnet *) * hn_vfmap_size); 1921 free(hn_vfmap, M_DEVBUF); 1922 hn_vfmap = newmap; 1923 hn_vfmap_size = newsize; 1924 } 1925 KASSERT(hn_vfmap[ifp->if_index] == NULL, 1926 ("%s: ifindex %d was mapped to %s", 1927 ifp->if_xname, ifp->if_index, hn_vfmap[ifp->if_index]->if_xname)); 1928 hn_vfmap[ifp->if_index] = sc->hn_ifp; 1929 1930 rm_wunlock(&hn_vfmap_lock); 1931 1932 /* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */ 1933 rm_wlock(&sc->hn_vf_lock); 1934 KASSERT((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) == 0, 1935 ("%s: transparent VF was enabled", sc->hn_ifp->if_xname)); 1936 sc->hn_vf_ifp = ifp; 1937 rm_wunlock(&sc->hn_vf_lock); 1938 1939 if (hn_xpnt_vf) { 1940 int wait_ticks; 1941 1942 /* 1943 * Install if_input for vf_ifp, which does vf_ifp -> hn_ifp. 1944 * Save vf_ifp's current if_input for later restoration. 1945 */ 1946 sc->hn_vf_input = ifp->if_input; 1947 ifp->if_input = hn_xpnt_vf_input; 1948 1949 /* 1950 * Stop link status management; use the VF's. 1951 */ 1952 hn_suspend_mgmt(sc); 1953 1954 /* 1955 * Give VF sometime to complete its attach routing. 1956 */ 1957 wait_ticks = hn_xpnt_vf_attwait * hz; 1958 sc->hn_vf_rdytick = ticks + wait_ticks; 1959 1960 taskqueue_enqueue_timeout(sc->hn_vf_taskq, &sc->hn_vf_init, 1961 wait_ticks); 1962 } 1963 done: 1964 HN_UNLOCK(sc); 1965 } 1966 1967 static void 1968 hn_ifnet_detevent(void *xsc, struct ifnet *ifp) 1969 { 1970 struct hn_softc *sc = xsc; 1971 1972 HN_LOCK(sc); 1973 1974 if (sc->hn_vf_ifp == NULL) 1975 goto done; 1976 1977 if (!hn_ismyvf(sc, ifp)) 1978 goto done; 1979 1980 if (hn_xpnt_vf) { 1981 /* 1982 * Make sure that the delayed initialization is not running. 1983 * 1984 * NOTE: 1985 * - This lock _must_ be released, since the hn_vf_init task 1986 * will try holding this lock. 1987 * - It is safe to release this lock here, since the 1988 * hn_ifnet_attevent() is interlocked by the hn_vf_ifp. 1989 * 1990 * XXX racy, if hn(4) ever detached. 1991 */ 1992 HN_UNLOCK(sc); 1993 taskqueue_drain_timeout(sc->hn_vf_taskq, &sc->hn_vf_init); 1994 HN_LOCK(sc); 1995 1996 KASSERT(sc->hn_vf_input != NULL, ("%s VF input is not saved", 1997 sc->hn_ifp->if_xname)); 1998 ifp->if_input = sc->hn_vf_input; 1999 sc->hn_vf_input = NULL; 2000 2001 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) && 2002 (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) 2003 hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_SYNTH); 2004 2005 if (sc->hn_vf_rdytick == 0) { 2006 /* 2007 * The VF was ready; restore some settings. 2008 */ 2009 sc->hn_ifp->if_capabilities = sc->hn_saved_caps; 2010 /* 2011 * NOTE: 2012 * There is _no_ need to fixup if_capenable and 2013 * if_hwassist, since the if_capabilities before 2014 * restoration was an intersection of the VF's 2015 * if_capabilites and the synthetic device's 2016 * if_capabilites. 2017 */ 2018 sc->hn_ifp->if_hw_tsomax = sc->hn_saved_tsomax; 2019 sc->hn_ifp->if_hw_tsomaxsegcount = 2020 sc->hn_saved_tsosegcnt; 2021 sc->hn_ifp->if_hw_tsomaxsegsize = sc->hn_saved_tsosegsz; 2022 } 2023 2024 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) { 2025 /* 2026 * Restore RSS settings. 2027 */ 2028 hn_vf_rss_restore(sc); 2029 2030 /* 2031 * Resume link status management, which was suspended 2032 * by hn_ifnet_attevent(). 2033 */ 2034 hn_resume_mgmt(sc); 2035 } 2036 } 2037 2038 /* Mark transparent mode VF as disabled. */ 2039 hn_xpnt_vf_setdisable(sc, true /* clear hn_vf_ifp */); 2040 2041 rm_wlock(&hn_vfmap_lock); 2042 2043 KASSERT(ifp->if_index < hn_vfmap_size, 2044 ("ifindex %d, vfmapsize %d", ifp->if_index, hn_vfmap_size)); 2045 if (hn_vfmap[ifp->if_index] != NULL) { 2046 KASSERT(hn_vfmap[ifp->if_index] == sc->hn_ifp, 2047 ("%s: ifindex %d was mapped to %s", 2048 ifp->if_xname, ifp->if_index, 2049 hn_vfmap[ifp->if_index]->if_xname)); 2050 hn_vfmap[ifp->if_index] = NULL; 2051 } 2052 2053 rm_wunlock(&hn_vfmap_lock); 2054 done: 2055 HN_UNLOCK(sc); 2056 } 2057 2058 static void 2059 hn_ifnet_lnkevent(void *xsc, struct ifnet *ifp, int link_state) 2060 { 2061 struct hn_softc *sc = xsc; 2062 2063 if (sc->hn_vf_ifp == ifp) 2064 if_link_state_change(sc->hn_ifp, link_state); 2065 } 2066 2067 static int 2068 hn_probe(device_t dev) 2069 { 2070 2071 if (VMBUS_PROBE_GUID(device_get_parent(dev), dev, &hn_guid) == 0) { 2072 device_set_desc(dev, "Hyper-V Network Interface"); 2073 return BUS_PROBE_DEFAULT; 2074 } 2075 return ENXIO; 2076 } 2077 2078 static int 2079 hn_attach(device_t dev) 2080 { 2081 struct hn_softc *sc = device_get_softc(dev); 2082 struct sysctl_oid_list *child; 2083 struct sysctl_ctx_list *ctx; 2084 uint8_t eaddr[ETHER_ADDR_LEN]; 2085 struct ifnet *ifp = NULL; 2086 int error, ring_cnt, tx_ring_cnt; 2087 uint32_t mtu; 2088 2089 sc->hn_dev = dev; 2090 sc->hn_prichan = vmbus_get_channel(dev); 2091 HN_LOCK_INIT(sc); 2092 rm_init(&sc->hn_vf_lock, "hnvf"); 2093 if (hn_xpnt_vf && hn_xpnt_vf_accbpf) 2094 sc->hn_xvf_flags |= HN_XVFFLAG_ACCBPF; 2095 2096 /* 2097 * Initialize these tunables once. 2098 */ 2099 sc->hn_agg_size = hn_tx_agg_size; 2100 sc->hn_agg_pkts = hn_tx_agg_pkts; 2101 2102 /* 2103 * Setup taskqueue for transmission. 2104 */ 2105 if (hn_tx_taskq_mode == HN_TX_TASKQ_M_INDEP) { 2106 int i; 2107 2108 sc->hn_tx_taskqs = 2109 malloc(hn_tx_taskq_cnt * sizeof(struct taskqueue *), 2110 M_DEVBUF, M_WAITOK); 2111 for (i = 0; i < hn_tx_taskq_cnt; ++i) { 2112 sc->hn_tx_taskqs[i] = taskqueue_create("hn_tx", 2113 M_WAITOK, taskqueue_thread_enqueue, 2114 &sc->hn_tx_taskqs[i]); 2115 taskqueue_start_threads(&sc->hn_tx_taskqs[i], 1, PI_NET, 2116 "%s tx%d", device_get_nameunit(dev), i); 2117 } 2118 } else if (hn_tx_taskq_mode == HN_TX_TASKQ_M_GLOBAL) { 2119 sc->hn_tx_taskqs = hn_tx_taskque; 2120 } 2121 2122 /* 2123 * Setup taskqueue for mangement tasks, e.g. link status. 2124 */ 2125 sc->hn_mgmt_taskq0 = taskqueue_create("hn_mgmt", M_WAITOK, 2126 taskqueue_thread_enqueue, &sc->hn_mgmt_taskq0); 2127 taskqueue_start_threads(&sc->hn_mgmt_taskq0, 1, PI_NET, "%s mgmt", 2128 device_get_nameunit(dev)); 2129 TASK_INIT(&sc->hn_link_task, 0, hn_link_taskfunc, sc); 2130 TASK_INIT(&sc->hn_netchg_init, 0, hn_netchg_init_taskfunc, sc); 2131 TIMEOUT_TASK_INIT(sc->hn_mgmt_taskq0, &sc->hn_netchg_status, 0, 2132 hn_netchg_status_taskfunc, sc); 2133 2134 if (hn_xpnt_vf) { 2135 /* 2136 * Setup taskqueue for VF tasks, e.g. delayed VF bringing up. 2137 */ 2138 sc->hn_vf_taskq = taskqueue_create("hn_vf", M_WAITOK, 2139 taskqueue_thread_enqueue, &sc->hn_vf_taskq); 2140 taskqueue_start_threads(&sc->hn_vf_taskq, 1, PI_NET, "%s vf", 2141 device_get_nameunit(dev)); 2142 TIMEOUT_TASK_INIT(sc->hn_vf_taskq, &sc->hn_vf_init, 0, 2143 hn_xpnt_vf_init_taskfunc, sc); 2144 } 2145 2146 /* 2147 * Allocate ifnet and setup its name earlier, so that if_printf 2148 * can be used by functions, which will be called after 2149 * ether_ifattach(). 2150 */ 2151 ifp = sc->hn_ifp = if_alloc(IFT_ETHER); 2152 ifp->if_softc = sc; 2153 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 2154 2155 /* 2156 * Initialize ifmedia earlier so that it can be unconditionally 2157 * destroyed, if error happened later on. 2158 */ 2159 ifmedia_init(&sc->hn_media, 0, hn_ifmedia_upd, hn_ifmedia_sts); 2160 2161 /* 2162 * Figure out the # of RX rings (ring_cnt) and the # of TX rings 2163 * to use (tx_ring_cnt). 2164 * 2165 * NOTE: 2166 * The # of RX rings to use is same as the # of channels to use. 2167 */ 2168 ring_cnt = hn_chan_cnt; 2169 if (ring_cnt <= 0) { 2170 /* Default */ 2171 ring_cnt = mp_ncpus; 2172 if (ring_cnt > HN_RING_CNT_DEF_MAX) 2173 ring_cnt = HN_RING_CNT_DEF_MAX; 2174 } else if (ring_cnt > mp_ncpus) { 2175 ring_cnt = mp_ncpus; 2176 } 2177 #ifdef RSS 2178 if (ring_cnt > rss_getnumbuckets()) 2179 ring_cnt = rss_getnumbuckets(); 2180 #endif 2181 2182 tx_ring_cnt = hn_tx_ring_cnt; 2183 if (tx_ring_cnt <= 0 || tx_ring_cnt > ring_cnt) 2184 tx_ring_cnt = ring_cnt; 2185 #ifdef HN_IFSTART_SUPPORT 2186 if (hn_use_if_start) { 2187 /* ifnet.if_start only needs one TX ring. */ 2188 tx_ring_cnt = 1; 2189 } 2190 #endif 2191 2192 /* 2193 * Set the leader CPU for channels. 2194 */ 2195 sc->hn_cpu = atomic_fetchadd_int(&hn_cpu_index, ring_cnt) % mp_ncpus; 2196 2197 /* 2198 * Create enough TX/RX rings, even if only limited number of 2199 * channels can be allocated. 2200 */ 2201 error = hn_create_tx_data(sc, tx_ring_cnt); 2202 if (error) 2203 goto failed; 2204 error = hn_create_rx_data(sc, ring_cnt); 2205 if (error) 2206 goto failed; 2207 2208 /* 2209 * Create transaction context for NVS and RNDIS transactions. 2210 */ 2211 sc->hn_xact = vmbus_xact_ctx_create(bus_get_dma_tag(dev), 2212 HN_XACT_REQ_SIZE, HN_XACT_RESP_SIZE, 0); 2213 if (sc->hn_xact == NULL) { 2214 error = ENXIO; 2215 goto failed; 2216 } 2217 2218 /* 2219 * Install orphan handler for the revocation of this device's 2220 * primary channel. 2221 * 2222 * NOTE: 2223 * The processing order is critical here: 2224 * Install the orphan handler, _before_ testing whether this 2225 * device's primary channel has been revoked or not. 2226 */ 2227 vmbus_chan_set_orphan(sc->hn_prichan, sc->hn_xact); 2228 if (vmbus_chan_is_revoked(sc->hn_prichan)) { 2229 error = ENXIO; 2230 goto failed; 2231 } 2232 2233 /* 2234 * Attach the synthetic parts, i.e. NVS and RNDIS. 2235 */ 2236 error = hn_synth_attach(sc, ETHERMTU); 2237 if (error) 2238 goto failed; 2239 2240 error = hn_rndis_get_eaddr(sc, eaddr); 2241 if (error) 2242 goto failed; 2243 2244 error = hn_rndis_get_mtu(sc, &mtu); 2245 if (error) 2246 mtu = ETHERMTU; 2247 else if (bootverbose) 2248 device_printf(dev, "RNDIS mtu %u\n", mtu); 2249 2250 #if __FreeBSD_version >= 1100099 2251 if (sc->hn_rx_ring_inuse > 1) { 2252 /* 2253 * Reduce TCP segment aggregation limit for multiple 2254 * RX rings to increase ACK timeliness. 2255 */ 2256 hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MULTIRX_DEF); 2257 } 2258 #endif 2259 2260 /* 2261 * Fixup TX/RX stuffs after synthetic parts are attached. 2262 */ 2263 hn_fixup_tx_data(sc); 2264 hn_fixup_rx_data(sc); 2265 2266 ctx = device_get_sysctl_ctx(dev); 2267 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); 2268 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "nvs_version", CTLFLAG_RD, 2269 &sc->hn_nvs_ver, 0, "NVS version"); 2270 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "ndis_version", 2271 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2272 hn_ndis_version_sysctl, "A", "NDIS version"); 2273 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "caps", 2274 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2275 hn_caps_sysctl, "A", "capabilities"); 2276 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "hwassist", 2277 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2278 hn_hwassist_sysctl, "A", "hwassist"); 2279 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tso_max", 2280 CTLFLAG_RD, &ifp->if_hw_tsomax, 0, "max TSO size"); 2281 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tso_maxsegcnt", 2282 CTLFLAG_RD, &ifp->if_hw_tsomaxsegcount, 0, 2283 "max # of TSO segments"); 2284 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tso_maxsegsz", 2285 CTLFLAG_RD, &ifp->if_hw_tsomaxsegsize, 0, 2286 "max size of TSO segment"); 2287 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxfilter", 2288 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2289 hn_rxfilter_sysctl, "A", "rxfilter"); 2290 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_hash", 2291 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2292 hn_rss_hash_sysctl, "A", "RSS hash"); 2293 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_hashcap", 2294 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2295 hn_rss_hcap_sysctl, "A", "RSS hash capabilities"); 2296 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "mbuf_hash", 2297 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2298 hn_rss_mbuf_sysctl, "A", "RSS hash for mbufs"); 2299 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rss_ind_size", 2300 CTLFLAG_RD, &sc->hn_rss_ind_size, 0, "RSS indirect entry count"); 2301 #ifndef RSS 2302 /* 2303 * Don't allow RSS key/indirect table changes, if RSS is defined. 2304 */ 2305 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_key", 2306 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 2307 hn_rss_key_sysctl, "IU", "RSS key"); 2308 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_ind", 2309 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 2310 hn_rss_ind_sysctl, "IU", "RSS indirect table"); 2311 #endif 2312 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_size", 2313 CTLFLAG_RD, &sc->hn_rndis_agg_size, 0, 2314 "RNDIS offered packet transmission aggregation size limit"); 2315 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_pkts", 2316 CTLFLAG_RD, &sc->hn_rndis_agg_pkts, 0, 2317 "RNDIS offered packet transmission aggregation count limit"); 2318 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_align", 2319 CTLFLAG_RD, &sc->hn_rndis_agg_align, 0, 2320 "RNDIS packet transmission aggregation alignment"); 2321 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_size", 2322 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 2323 hn_txagg_size_sysctl, "I", 2324 "Packet transmission aggregation size, 0 -- disable, -1 -- auto"); 2325 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pkts", 2326 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 2327 hn_txagg_pkts_sysctl, "I", 2328 "Packet transmission aggregation packets, " 2329 "0 -- disable, -1 -- auto"); 2330 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "polling", 2331 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 2332 hn_polling_sysctl, "I", 2333 "Polling frequency: [100,1000000], 0 disable polling"); 2334 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf", 2335 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2336 hn_vf_sysctl, "A", "Virtual Function's name"); 2337 if (!hn_xpnt_vf) { 2338 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxvf", 2339 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2340 hn_rxvf_sysctl, "A", "activated Virtual Function's name"); 2341 } else { 2342 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf_xpnt_enabled", 2343 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2344 hn_xpnt_vf_enabled_sysctl, "I", 2345 "Transparent VF enabled"); 2346 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf_xpnt_accbpf", 2347 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 2348 hn_xpnt_vf_accbpf_sysctl, "I", 2349 "Accurate BPF for transparent VF"); 2350 } 2351 2352 /* 2353 * Setup the ifmedia, which has been initialized earlier. 2354 */ 2355 ifmedia_add(&sc->hn_media, IFM_ETHER | IFM_AUTO, 0, NULL); 2356 ifmedia_set(&sc->hn_media, IFM_ETHER | IFM_AUTO); 2357 /* XXX ifmedia_set really should do this for us */ 2358 sc->hn_media.ifm_media = sc->hn_media.ifm_cur->ifm_media; 2359 2360 /* 2361 * Setup the ifnet for this interface. 2362 */ 2363 2364 ifp->if_baudrate = IF_Gbps(10); 2365 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST | 2366 IFF_NEEDSEPOCH; 2367 ifp->if_ioctl = hn_ioctl; 2368 ifp->if_init = hn_init; 2369 #ifdef HN_IFSTART_SUPPORT 2370 if (hn_use_if_start) { 2371 int qdepth = hn_get_txswq_depth(&sc->hn_tx_ring[0]); 2372 2373 ifp->if_start = hn_start; 2374 IFQ_SET_MAXLEN(&ifp->if_snd, qdepth); 2375 ifp->if_snd.ifq_drv_maxlen = qdepth - 1; 2376 IFQ_SET_READY(&ifp->if_snd); 2377 } else 2378 #endif 2379 { 2380 ifp->if_transmit = hn_transmit; 2381 ifp->if_qflush = hn_xmit_qflush; 2382 } 2383 2384 ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_LRO | IFCAP_LINKSTATE; 2385 #ifdef foo 2386 /* We can't diff IPv6 packets from IPv4 packets on RX path. */ 2387 ifp->if_capabilities |= IFCAP_RXCSUM_IPV6; 2388 #endif 2389 if (sc->hn_caps & HN_CAP_VLAN) { 2390 /* XXX not sure about VLAN_MTU. */ 2391 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU; 2392 } 2393 2394 ifp->if_hwassist = sc->hn_tx_ring[0].hn_csum_assist; 2395 if (ifp->if_hwassist & HN_CSUM_IP_MASK) 2396 ifp->if_capabilities |= IFCAP_TXCSUM; 2397 if (ifp->if_hwassist & HN_CSUM_IP6_MASK) 2398 ifp->if_capabilities |= IFCAP_TXCSUM_IPV6; 2399 if (sc->hn_caps & HN_CAP_TSO4) { 2400 ifp->if_capabilities |= IFCAP_TSO4; 2401 ifp->if_hwassist |= CSUM_IP_TSO; 2402 } 2403 if (sc->hn_caps & HN_CAP_TSO6) { 2404 ifp->if_capabilities |= IFCAP_TSO6; 2405 ifp->if_hwassist |= CSUM_IP6_TSO; 2406 } 2407 2408 /* Enable all available capabilities by default. */ 2409 ifp->if_capenable = ifp->if_capabilities; 2410 2411 /* 2412 * Disable IPv6 TSO and TXCSUM by default, they still can 2413 * be enabled through SIOCSIFCAP. 2414 */ 2415 ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6 | IFCAP_TSO6); 2416 ifp->if_hwassist &= ~(HN_CSUM_IP6_MASK | CSUM_IP6_TSO); 2417 2418 if (ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) { 2419 /* 2420 * Lock hn_set_tso_maxsize() to simplify its 2421 * internal logic. 2422 */ 2423 HN_LOCK(sc); 2424 hn_set_tso_maxsize(sc, hn_tso_maxlen, ETHERMTU); 2425 HN_UNLOCK(sc); 2426 ifp->if_hw_tsomaxsegcount = HN_TX_DATA_SEGCNT_MAX; 2427 ifp->if_hw_tsomaxsegsize = PAGE_SIZE; 2428 } 2429 2430 ether_ifattach(ifp, eaddr); 2431 2432 if ((ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) && bootverbose) { 2433 if_printf(ifp, "TSO segcnt %u segsz %u\n", 2434 ifp->if_hw_tsomaxsegcount, ifp->if_hw_tsomaxsegsize); 2435 } 2436 if (mtu < ETHERMTU) { 2437 if_printf(ifp, "fixup mtu %u -> %u\n", ifp->if_mtu, mtu); 2438 ifp->if_mtu = mtu; 2439 } 2440 2441 /* Inform the upper layer about the long frame support. */ 2442 ifp->if_hdrlen = sizeof(struct ether_vlan_header); 2443 2444 /* 2445 * Kick off link status check. 2446 */ 2447 sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0; 2448 hn_update_link_status(sc); 2449 2450 if (!hn_xpnt_vf) { 2451 sc->hn_ifnet_evthand = EVENTHANDLER_REGISTER(ifnet_event, 2452 hn_ifnet_event, sc, EVENTHANDLER_PRI_ANY); 2453 sc->hn_ifaddr_evthand = EVENTHANDLER_REGISTER(ifaddr_event, 2454 hn_ifaddr_event, sc, EVENTHANDLER_PRI_ANY); 2455 } else { 2456 sc->hn_ifnet_lnkhand = EVENTHANDLER_REGISTER(ifnet_link_event, 2457 hn_ifnet_lnkevent, sc, EVENTHANDLER_PRI_ANY); 2458 } 2459 2460 /* 2461 * NOTE: 2462 * Subscribe ether_ifattach event, instead of ifnet_arrival event, 2463 * since interface's LLADDR is needed; interface LLADDR is not 2464 * available when ifnet_arrival event is triggered. 2465 */ 2466 sc->hn_ifnet_atthand = EVENTHANDLER_REGISTER(ether_ifattach_event, 2467 hn_ifnet_attevent, sc, EVENTHANDLER_PRI_ANY); 2468 sc->hn_ifnet_dethand = EVENTHANDLER_REGISTER(ifnet_departure_event, 2469 hn_ifnet_detevent, sc, EVENTHANDLER_PRI_ANY); 2470 2471 return (0); 2472 failed: 2473 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) 2474 hn_synth_detach(sc); 2475 hn_detach(dev); 2476 return (error); 2477 } 2478 2479 static int 2480 hn_detach(device_t dev) 2481 { 2482 struct hn_softc *sc = device_get_softc(dev); 2483 struct ifnet *ifp = sc->hn_ifp, *vf_ifp; 2484 2485 if (sc->hn_xact != NULL && vmbus_chan_is_revoked(sc->hn_prichan)) { 2486 /* 2487 * In case that the vmbus missed the orphan handler 2488 * installation. 2489 */ 2490 vmbus_xact_ctx_orphan(sc->hn_xact); 2491 } 2492 2493 if (sc->hn_ifaddr_evthand != NULL) 2494 EVENTHANDLER_DEREGISTER(ifaddr_event, sc->hn_ifaddr_evthand); 2495 if (sc->hn_ifnet_evthand != NULL) 2496 EVENTHANDLER_DEREGISTER(ifnet_event, sc->hn_ifnet_evthand); 2497 if (sc->hn_ifnet_atthand != NULL) { 2498 EVENTHANDLER_DEREGISTER(ether_ifattach_event, 2499 sc->hn_ifnet_atthand); 2500 } 2501 if (sc->hn_ifnet_dethand != NULL) { 2502 EVENTHANDLER_DEREGISTER(ifnet_departure_event, 2503 sc->hn_ifnet_dethand); 2504 } 2505 if (sc->hn_ifnet_lnkhand != NULL) 2506 EVENTHANDLER_DEREGISTER(ifnet_link_event, sc->hn_ifnet_lnkhand); 2507 2508 vf_ifp = sc->hn_vf_ifp; 2509 __compiler_membar(); 2510 if (vf_ifp != NULL) 2511 hn_ifnet_detevent(sc, vf_ifp); 2512 2513 if (device_is_attached(dev)) { 2514 HN_LOCK(sc); 2515 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) { 2516 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 2517 hn_stop(sc, true); 2518 /* 2519 * NOTE: 2520 * hn_stop() only suspends data, so managment 2521 * stuffs have to be suspended manually here. 2522 */ 2523 hn_suspend_mgmt(sc); 2524 hn_synth_detach(sc); 2525 } 2526 HN_UNLOCK(sc); 2527 ether_ifdetach(ifp); 2528 } 2529 2530 ifmedia_removeall(&sc->hn_media); 2531 hn_destroy_rx_data(sc); 2532 hn_destroy_tx_data(sc); 2533 2534 if (sc->hn_tx_taskqs != NULL && sc->hn_tx_taskqs != hn_tx_taskque) { 2535 int i; 2536 2537 for (i = 0; i < hn_tx_taskq_cnt; ++i) 2538 taskqueue_free(sc->hn_tx_taskqs[i]); 2539 free(sc->hn_tx_taskqs, M_DEVBUF); 2540 } 2541 taskqueue_free(sc->hn_mgmt_taskq0); 2542 if (sc->hn_vf_taskq != NULL) 2543 taskqueue_free(sc->hn_vf_taskq); 2544 2545 if (sc->hn_xact != NULL) { 2546 /* 2547 * Uninstall the orphan handler _before_ the xact is 2548 * destructed. 2549 */ 2550 vmbus_chan_unset_orphan(sc->hn_prichan); 2551 vmbus_xact_ctx_destroy(sc->hn_xact); 2552 } 2553 2554 if_free(ifp); 2555 2556 HN_LOCK_DESTROY(sc); 2557 rm_destroy(&sc->hn_vf_lock); 2558 return (0); 2559 } 2560 2561 static int 2562 hn_shutdown(device_t dev) 2563 { 2564 2565 return (0); 2566 } 2567 2568 static void 2569 hn_link_status(struct hn_softc *sc) 2570 { 2571 uint32_t link_status; 2572 int error; 2573 2574 error = hn_rndis_get_linkstatus(sc, &link_status); 2575 if (error) { 2576 /* XXX what to do? */ 2577 return; 2578 } 2579 2580 if (link_status == NDIS_MEDIA_STATE_CONNECTED) 2581 sc->hn_link_flags |= HN_LINK_FLAG_LINKUP; 2582 else 2583 sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP; 2584 if_link_state_change(sc->hn_ifp, 2585 (sc->hn_link_flags & HN_LINK_FLAG_LINKUP) ? 2586 LINK_STATE_UP : LINK_STATE_DOWN); 2587 } 2588 2589 static void 2590 hn_link_taskfunc(void *xsc, int pending __unused) 2591 { 2592 struct hn_softc *sc = xsc; 2593 2594 if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG) 2595 return; 2596 hn_link_status(sc); 2597 } 2598 2599 static void 2600 hn_netchg_init_taskfunc(void *xsc, int pending __unused) 2601 { 2602 struct hn_softc *sc = xsc; 2603 2604 /* Prevent any link status checks from running. */ 2605 sc->hn_link_flags |= HN_LINK_FLAG_NETCHG; 2606 2607 /* 2608 * Fake up a [link down --> link up] state change; 5 seconds 2609 * delay is used, which closely simulates miibus reaction 2610 * upon link down event. 2611 */ 2612 sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP; 2613 if_link_state_change(sc->hn_ifp, LINK_STATE_DOWN); 2614 taskqueue_enqueue_timeout(sc->hn_mgmt_taskq0, 2615 &sc->hn_netchg_status, 5 * hz); 2616 } 2617 2618 static void 2619 hn_netchg_status_taskfunc(void *xsc, int pending __unused) 2620 { 2621 struct hn_softc *sc = xsc; 2622 2623 /* Re-allow link status checks. */ 2624 sc->hn_link_flags &= ~HN_LINK_FLAG_NETCHG; 2625 hn_link_status(sc); 2626 } 2627 2628 static void 2629 hn_update_link_status(struct hn_softc *sc) 2630 { 2631 2632 if (sc->hn_mgmt_taskq != NULL) 2633 taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_link_task); 2634 } 2635 2636 static void 2637 hn_change_network(struct hn_softc *sc) 2638 { 2639 2640 if (sc->hn_mgmt_taskq != NULL) 2641 taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_netchg_init); 2642 } 2643 2644 static __inline int 2645 hn_txdesc_dmamap_load(struct hn_tx_ring *txr, struct hn_txdesc *txd, 2646 struct mbuf **m_head, bus_dma_segment_t *segs, int *nsegs) 2647 { 2648 struct mbuf *m = *m_head; 2649 int error; 2650 2651 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID, ("txd uses chim")); 2652 2653 error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap, 2654 m, segs, nsegs, BUS_DMA_NOWAIT); 2655 if (error == EFBIG) { 2656 struct mbuf *m_new; 2657 2658 m_new = m_collapse(m, M_NOWAIT, HN_TX_DATA_SEGCNT_MAX); 2659 if (m_new == NULL) 2660 return ENOBUFS; 2661 else 2662 *m_head = m = m_new; 2663 txr->hn_tx_collapsed++; 2664 2665 error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, 2666 txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT); 2667 } 2668 if (!error) { 2669 bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap, 2670 BUS_DMASYNC_PREWRITE); 2671 txd->flags |= HN_TXD_FLAG_DMAMAP; 2672 } 2673 return error; 2674 } 2675 2676 static __inline int 2677 hn_txdesc_put(struct hn_tx_ring *txr, struct hn_txdesc *txd) 2678 { 2679 2680 KASSERT((txd->flags & HN_TXD_FLAG_ONLIST) == 0, 2681 ("put an onlist txd %#x", txd->flags)); 2682 KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0, 2683 ("put an onagg txd %#x", txd->flags)); 2684 2685 KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs)); 2686 if (atomic_fetchadd_int(&txd->refs, -1) != 1) 2687 return 0; 2688 2689 if (!STAILQ_EMPTY(&txd->agg_list)) { 2690 struct hn_txdesc *tmp_txd; 2691 2692 while ((tmp_txd = STAILQ_FIRST(&txd->agg_list)) != NULL) { 2693 int freed; 2694 2695 KASSERT(STAILQ_EMPTY(&tmp_txd->agg_list), 2696 ("resursive aggregation on aggregated txdesc")); 2697 KASSERT((tmp_txd->flags & HN_TXD_FLAG_ONAGG), 2698 ("not aggregated txdesc")); 2699 KASSERT((tmp_txd->flags & HN_TXD_FLAG_DMAMAP) == 0, 2700 ("aggregated txdesc uses dmamap")); 2701 KASSERT(tmp_txd->chim_index == HN_NVS_CHIM_IDX_INVALID, 2702 ("aggregated txdesc consumes " 2703 "chimney sending buffer")); 2704 KASSERT(tmp_txd->chim_size == 0, 2705 ("aggregated txdesc has non-zero " 2706 "chimney sending size")); 2707 2708 STAILQ_REMOVE_HEAD(&txd->agg_list, agg_link); 2709 tmp_txd->flags &= ~HN_TXD_FLAG_ONAGG; 2710 freed = hn_txdesc_put(txr, tmp_txd); 2711 KASSERT(freed, ("failed to free aggregated txdesc")); 2712 } 2713 } 2714 2715 if (txd->chim_index != HN_NVS_CHIM_IDX_INVALID) { 2716 KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, 2717 ("chim txd uses dmamap")); 2718 hn_chim_free(txr->hn_sc, txd->chim_index); 2719 txd->chim_index = HN_NVS_CHIM_IDX_INVALID; 2720 txd->chim_size = 0; 2721 } else if (txd->flags & HN_TXD_FLAG_DMAMAP) { 2722 bus_dmamap_sync(txr->hn_tx_data_dtag, 2723 txd->data_dmap, BUS_DMASYNC_POSTWRITE); 2724 bus_dmamap_unload(txr->hn_tx_data_dtag, 2725 txd->data_dmap); 2726 txd->flags &= ~HN_TXD_FLAG_DMAMAP; 2727 } 2728 2729 if (txd->m != NULL) { 2730 m_freem(txd->m); 2731 txd->m = NULL; 2732 } 2733 2734 txd->flags |= HN_TXD_FLAG_ONLIST; 2735 #ifndef HN_USE_TXDESC_BUFRING 2736 mtx_lock_spin(&txr->hn_txlist_spin); 2737 KASSERT(txr->hn_txdesc_avail >= 0 && 2738 txr->hn_txdesc_avail < txr->hn_txdesc_cnt, 2739 ("txdesc_put: invalid txd avail %d", txr->hn_txdesc_avail)); 2740 txr->hn_txdesc_avail++; 2741 SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link); 2742 mtx_unlock_spin(&txr->hn_txlist_spin); 2743 #else /* HN_USE_TXDESC_BUFRING */ 2744 #ifdef HN_DEBUG 2745 atomic_add_int(&txr->hn_txdesc_avail, 1); 2746 #endif 2747 buf_ring_enqueue(txr->hn_txdesc_br, txd); 2748 #endif /* !HN_USE_TXDESC_BUFRING */ 2749 2750 return 1; 2751 } 2752 2753 static __inline struct hn_txdesc * 2754 hn_txdesc_get(struct hn_tx_ring *txr) 2755 { 2756 struct hn_txdesc *txd; 2757 2758 #ifndef HN_USE_TXDESC_BUFRING 2759 mtx_lock_spin(&txr->hn_txlist_spin); 2760 txd = SLIST_FIRST(&txr->hn_txlist); 2761 if (txd != NULL) { 2762 KASSERT(txr->hn_txdesc_avail > 0, 2763 ("txdesc_get: invalid txd avail %d", txr->hn_txdesc_avail)); 2764 txr->hn_txdesc_avail--; 2765 SLIST_REMOVE_HEAD(&txr->hn_txlist, link); 2766 } 2767 mtx_unlock_spin(&txr->hn_txlist_spin); 2768 #else 2769 txd = buf_ring_dequeue_sc(txr->hn_txdesc_br); 2770 #endif 2771 2772 if (txd != NULL) { 2773 #ifdef HN_USE_TXDESC_BUFRING 2774 #ifdef HN_DEBUG 2775 atomic_subtract_int(&txr->hn_txdesc_avail, 1); 2776 #endif 2777 #endif /* HN_USE_TXDESC_BUFRING */ 2778 KASSERT(txd->m == NULL && txd->refs == 0 && 2779 STAILQ_EMPTY(&txd->agg_list) && 2780 txd->chim_index == HN_NVS_CHIM_IDX_INVALID && 2781 txd->chim_size == 0 && 2782 (txd->flags & HN_TXD_FLAG_ONLIST) && 2783 (txd->flags & HN_TXD_FLAG_ONAGG) == 0 && 2784 (txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("invalid txd")); 2785 txd->flags &= ~HN_TXD_FLAG_ONLIST; 2786 txd->refs = 1; 2787 } 2788 return txd; 2789 } 2790 2791 static __inline void 2792 hn_txdesc_hold(struct hn_txdesc *txd) 2793 { 2794 2795 /* 0->1 transition will never work */ 2796 KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs)); 2797 atomic_add_int(&txd->refs, 1); 2798 } 2799 2800 static __inline void 2801 hn_txdesc_agg(struct hn_txdesc *agg_txd, struct hn_txdesc *txd) 2802 { 2803 2804 KASSERT((agg_txd->flags & HN_TXD_FLAG_ONAGG) == 0, 2805 ("recursive aggregation on aggregating txdesc")); 2806 2807 KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0, 2808 ("already aggregated")); 2809 KASSERT(STAILQ_EMPTY(&txd->agg_list), 2810 ("recursive aggregation on to-be-aggregated txdesc")); 2811 2812 txd->flags |= HN_TXD_FLAG_ONAGG; 2813 STAILQ_INSERT_TAIL(&agg_txd->agg_list, txd, agg_link); 2814 } 2815 2816 static bool 2817 hn_tx_ring_pending(struct hn_tx_ring *txr) 2818 { 2819 bool pending = false; 2820 2821 #ifndef HN_USE_TXDESC_BUFRING 2822 mtx_lock_spin(&txr->hn_txlist_spin); 2823 if (txr->hn_txdesc_avail != txr->hn_txdesc_cnt) 2824 pending = true; 2825 mtx_unlock_spin(&txr->hn_txlist_spin); 2826 #else 2827 if (!buf_ring_full(txr->hn_txdesc_br)) 2828 pending = true; 2829 #endif 2830 return (pending); 2831 } 2832 2833 static __inline void 2834 hn_txeof(struct hn_tx_ring *txr) 2835 { 2836 txr->hn_has_txeof = 0; 2837 txr->hn_txeof(txr); 2838 } 2839 2840 static void 2841 hn_txpkt_done(struct hn_nvs_sendctx *sndc, struct hn_softc *sc, 2842 struct vmbus_channel *chan, const void *data __unused, int dlen __unused) 2843 { 2844 struct hn_txdesc *txd = sndc->hn_cbarg; 2845 struct hn_tx_ring *txr; 2846 2847 txr = txd->txr; 2848 KASSERT(txr->hn_chan == chan, 2849 ("channel mismatch, on chan%u, should be chan%u", 2850 vmbus_chan_id(chan), vmbus_chan_id(txr->hn_chan))); 2851 2852 txr->hn_has_txeof = 1; 2853 hn_txdesc_put(txr, txd); 2854 2855 ++txr->hn_txdone_cnt; 2856 if (txr->hn_txdone_cnt >= HN_EARLY_TXEOF_THRESH) { 2857 txr->hn_txdone_cnt = 0; 2858 if (txr->hn_oactive) 2859 hn_txeof(txr); 2860 } 2861 } 2862 2863 static void 2864 hn_chan_rollup(struct hn_rx_ring *rxr, struct hn_tx_ring *txr) 2865 { 2866 #if defined(INET) || defined(INET6) 2867 tcp_lro_flush_all(&rxr->hn_lro); 2868 #endif 2869 2870 /* 2871 * NOTE: 2872 * 'txr' could be NULL, if multiple channels and 2873 * ifnet.if_start method are enabled. 2874 */ 2875 if (txr == NULL || !txr->hn_has_txeof) 2876 return; 2877 2878 txr->hn_txdone_cnt = 0; 2879 hn_txeof(txr); 2880 } 2881 2882 static __inline uint32_t 2883 hn_rndis_pktmsg_offset(uint32_t ofs) 2884 { 2885 2886 KASSERT(ofs >= sizeof(struct rndis_packet_msg), 2887 ("invalid RNDIS packet msg offset %u", ofs)); 2888 return (ofs - __offsetof(struct rndis_packet_msg, rm_dataoffset)); 2889 } 2890 2891 static __inline void * 2892 hn_rndis_pktinfo_append(struct rndis_packet_msg *pkt, size_t pktsize, 2893 size_t pi_dlen, uint32_t pi_type) 2894 { 2895 const size_t pi_size = HN_RNDIS_PKTINFO_SIZE(pi_dlen); 2896 struct rndis_pktinfo *pi; 2897 2898 KASSERT((pi_size & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK) == 0, 2899 ("unaligned pktinfo size %zu, pktinfo dlen %zu", pi_size, pi_dlen)); 2900 2901 /* 2902 * Per-packet-info does not move; it only grows. 2903 * 2904 * NOTE: 2905 * rm_pktinfooffset in this phase counts from the beginning 2906 * of rndis_packet_msg. 2907 */ 2908 KASSERT(pkt->rm_pktinfooffset + pkt->rm_pktinfolen + pi_size <= pktsize, 2909 ("%u pktinfo overflows RNDIS packet msg", pi_type)); 2910 pi = (struct rndis_pktinfo *)((uint8_t *)pkt + pkt->rm_pktinfooffset + 2911 pkt->rm_pktinfolen); 2912 pkt->rm_pktinfolen += pi_size; 2913 2914 pi->rm_size = pi_size; 2915 pi->rm_type = pi_type; 2916 pi->rm_pktinfooffset = RNDIS_PKTINFO_OFFSET; 2917 2918 return (pi->rm_data); 2919 } 2920 2921 static __inline int 2922 hn_flush_txagg(struct ifnet *ifp, struct hn_tx_ring *txr) 2923 { 2924 struct hn_txdesc *txd; 2925 struct mbuf *m; 2926 int error, pkts; 2927 2928 txd = txr->hn_agg_txd; 2929 KASSERT(txd != NULL, ("no aggregate txdesc")); 2930 2931 /* 2932 * Since hn_txpkt() will reset this temporary stat, save 2933 * it now, so that oerrors can be updated properly, if 2934 * hn_txpkt() ever fails. 2935 */ 2936 pkts = txr->hn_stat_pkts; 2937 2938 /* 2939 * Since txd's mbuf will _not_ be freed upon hn_txpkt() 2940 * failure, save it for later freeing, if hn_txpkt() ever 2941 * fails. 2942 */ 2943 m = txd->m; 2944 error = hn_txpkt(ifp, txr, txd); 2945 if (__predict_false(error)) { 2946 /* txd is freed, but m is not. */ 2947 m_freem(m); 2948 2949 txr->hn_flush_failed++; 2950 if_inc_counter(ifp, IFCOUNTER_OERRORS, pkts); 2951 } 2952 2953 /* Reset all aggregation states. */ 2954 txr->hn_agg_txd = NULL; 2955 txr->hn_agg_szleft = 0; 2956 txr->hn_agg_pktleft = 0; 2957 txr->hn_agg_prevpkt = NULL; 2958 2959 return (error); 2960 } 2961 2962 static void * 2963 hn_try_txagg(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd, 2964 int pktsize) 2965 { 2966 void *chim; 2967 2968 if (txr->hn_agg_txd != NULL) { 2969 if (txr->hn_agg_pktleft >= 1 && txr->hn_agg_szleft > pktsize) { 2970 struct hn_txdesc *agg_txd = txr->hn_agg_txd; 2971 struct rndis_packet_msg *pkt = txr->hn_agg_prevpkt; 2972 int olen; 2973 2974 /* 2975 * Update the previous RNDIS packet's total length, 2976 * it can be increased due to the mandatory alignment 2977 * padding for this RNDIS packet. And update the 2978 * aggregating txdesc's chimney sending buffer size 2979 * accordingly. 2980 * 2981 * XXX 2982 * Zero-out the padding, as required by the RNDIS spec. 2983 */ 2984 olen = pkt->rm_len; 2985 pkt->rm_len = roundup2(olen, txr->hn_agg_align); 2986 agg_txd->chim_size += pkt->rm_len - olen; 2987 2988 /* Link this txdesc to the parent. */ 2989 hn_txdesc_agg(agg_txd, txd); 2990 2991 chim = (uint8_t *)pkt + pkt->rm_len; 2992 /* Save the current packet for later fixup. */ 2993 txr->hn_agg_prevpkt = chim; 2994 2995 txr->hn_agg_pktleft--; 2996 txr->hn_agg_szleft -= pktsize; 2997 if (txr->hn_agg_szleft <= 2998 HN_PKTSIZE_MIN(txr->hn_agg_align)) { 2999 /* 3000 * Probably can't aggregate more packets, 3001 * flush this aggregating txdesc proactively. 3002 */ 3003 txr->hn_agg_pktleft = 0; 3004 } 3005 /* Done! */ 3006 return (chim); 3007 } 3008 hn_flush_txagg(ifp, txr); 3009 } 3010 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc")); 3011 3012 txr->hn_tx_chimney_tried++; 3013 txd->chim_index = hn_chim_alloc(txr->hn_sc); 3014 if (txd->chim_index == HN_NVS_CHIM_IDX_INVALID) 3015 return (NULL); 3016 txr->hn_tx_chimney++; 3017 3018 chim = txr->hn_sc->hn_chim + 3019 (txd->chim_index * txr->hn_sc->hn_chim_szmax); 3020 3021 if (txr->hn_agg_pktmax > 1 && 3022 txr->hn_agg_szmax > pktsize + HN_PKTSIZE_MIN(txr->hn_agg_align)) { 3023 txr->hn_agg_txd = txd; 3024 txr->hn_agg_pktleft = txr->hn_agg_pktmax - 1; 3025 txr->hn_agg_szleft = txr->hn_agg_szmax - pktsize; 3026 txr->hn_agg_prevpkt = chim; 3027 } 3028 return (chim); 3029 } 3030 3031 /* 3032 * NOTE: 3033 * If this function fails, then both txd and m_head0 will be freed. 3034 */ 3035 static int 3036 hn_encap(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd, 3037 struct mbuf **m_head0) 3038 { 3039 bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX]; 3040 int error, nsegs, i; 3041 struct mbuf *m_head = *m_head0; 3042 struct rndis_packet_msg *pkt; 3043 uint32_t *pi_data; 3044 void *chim = NULL; 3045 int pkt_hlen, pkt_size; 3046 3047 pkt = txd->rndis_pkt; 3048 pkt_size = HN_PKTSIZE(m_head, txr->hn_agg_align); 3049 if (pkt_size < txr->hn_chim_size) { 3050 chim = hn_try_txagg(ifp, txr, txd, pkt_size); 3051 if (chim != NULL) 3052 pkt = chim; 3053 } else { 3054 if (txr->hn_agg_txd != NULL) 3055 hn_flush_txagg(ifp, txr); 3056 } 3057 3058 pkt->rm_type = REMOTE_NDIS_PACKET_MSG; 3059 pkt->rm_len = m_head->m_pkthdr.len; 3060 pkt->rm_dataoffset = 0; 3061 pkt->rm_datalen = m_head->m_pkthdr.len; 3062 pkt->rm_oobdataoffset = 0; 3063 pkt->rm_oobdatalen = 0; 3064 pkt->rm_oobdataelements = 0; 3065 pkt->rm_pktinfooffset = sizeof(*pkt); 3066 pkt->rm_pktinfolen = 0; 3067 pkt->rm_vchandle = 0; 3068 pkt->rm_reserved = 0; 3069 3070 if (txr->hn_tx_flags & HN_TX_FLAG_HASHVAL) { 3071 /* 3072 * Set the hash value for this packet, so that the host could 3073 * dispatch the TX done event for this packet back to this TX 3074 * ring's channel. 3075 */ 3076 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 3077 HN_NDIS_HASH_VALUE_SIZE, HN_NDIS_PKTINFO_TYPE_HASHVAL); 3078 *pi_data = txr->hn_tx_idx; 3079 } 3080 3081 if (m_head->m_flags & M_VLANTAG) { 3082 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 3083 NDIS_VLAN_INFO_SIZE, NDIS_PKTINFO_TYPE_VLAN); 3084 *pi_data = NDIS_VLAN_INFO_MAKE( 3085 EVL_VLANOFTAG(m_head->m_pkthdr.ether_vtag), 3086 EVL_PRIOFTAG(m_head->m_pkthdr.ether_vtag), 3087 EVL_CFIOFTAG(m_head->m_pkthdr.ether_vtag)); 3088 } 3089 3090 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 3091 #if defined(INET6) || defined(INET) 3092 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 3093 NDIS_LSO2_INFO_SIZE, NDIS_PKTINFO_TYPE_LSO); 3094 #ifdef INET 3095 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) { 3096 *pi_data = NDIS_LSO2_INFO_MAKEIPV4( 3097 m_head->m_pkthdr.l2hlen + m_head->m_pkthdr.l3hlen, 3098 m_head->m_pkthdr.tso_segsz); 3099 } 3100 #endif 3101 #if defined(INET6) && defined(INET) 3102 else 3103 #endif 3104 #ifdef INET6 3105 { 3106 *pi_data = NDIS_LSO2_INFO_MAKEIPV6( 3107 m_head->m_pkthdr.l2hlen + m_head->m_pkthdr.l3hlen, 3108 m_head->m_pkthdr.tso_segsz); 3109 } 3110 #endif 3111 #endif /* INET6 || INET */ 3112 } else if (m_head->m_pkthdr.csum_flags & txr->hn_csum_assist) { 3113 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 3114 NDIS_TXCSUM_INFO_SIZE, NDIS_PKTINFO_TYPE_CSUM); 3115 if (m_head->m_pkthdr.csum_flags & 3116 (CSUM_IP6_TCP | CSUM_IP6_UDP)) { 3117 *pi_data = NDIS_TXCSUM_INFO_IPV6; 3118 } else { 3119 *pi_data = NDIS_TXCSUM_INFO_IPV4; 3120 if (m_head->m_pkthdr.csum_flags & CSUM_IP) 3121 *pi_data |= NDIS_TXCSUM_INFO_IPCS; 3122 } 3123 3124 if (m_head->m_pkthdr.csum_flags & 3125 (CSUM_IP_TCP | CSUM_IP6_TCP)) { 3126 *pi_data |= NDIS_TXCSUM_INFO_MKTCPCS( 3127 m_head->m_pkthdr.l2hlen + m_head->m_pkthdr.l3hlen); 3128 } else if (m_head->m_pkthdr.csum_flags & 3129 (CSUM_IP_UDP | CSUM_IP6_UDP)) { 3130 *pi_data |= NDIS_TXCSUM_INFO_MKUDPCS( 3131 m_head->m_pkthdr.l2hlen + m_head->m_pkthdr.l3hlen); 3132 } 3133 } 3134 3135 pkt_hlen = pkt->rm_pktinfooffset + pkt->rm_pktinfolen; 3136 /* Fixup RNDIS packet message total length */ 3137 pkt->rm_len += pkt_hlen; 3138 /* Convert RNDIS packet message offsets */ 3139 pkt->rm_dataoffset = hn_rndis_pktmsg_offset(pkt_hlen); 3140 pkt->rm_pktinfooffset = hn_rndis_pktmsg_offset(pkt->rm_pktinfooffset); 3141 3142 /* 3143 * Fast path: Chimney sending. 3144 */ 3145 if (chim != NULL) { 3146 struct hn_txdesc *tgt_txd = txd; 3147 3148 if (txr->hn_agg_txd != NULL) { 3149 tgt_txd = txr->hn_agg_txd; 3150 #ifdef INVARIANTS 3151 *m_head0 = NULL; 3152 #endif 3153 } 3154 3155 KASSERT(pkt == chim, 3156 ("RNDIS pkt not in chimney sending buffer")); 3157 KASSERT(tgt_txd->chim_index != HN_NVS_CHIM_IDX_INVALID, 3158 ("chimney sending buffer is not used")); 3159 tgt_txd->chim_size += pkt->rm_len; 3160 3161 m_copydata(m_head, 0, m_head->m_pkthdr.len, 3162 ((uint8_t *)chim) + pkt_hlen); 3163 3164 txr->hn_gpa_cnt = 0; 3165 txr->hn_sendpkt = hn_txpkt_chim; 3166 goto done; 3167 } 3168 3169 KASSERT(txr->hn_agg_txd == NULL, ("aggregating sglist txdesc")); 3170 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID, 3171 ("chimney buffer is used")); 3172 KASSERT(pkt == txd->rndis_pkt, ("RNDIS pkt not in txdesc")); 3173 3174 error = hn_txdesc_dmamap_load(txr, txd, &m_head, segs, &nsegs); 3175 if (__predict_false(error)) { 3176 int freed; 3177 3178 /* 3179 * This mbuf is not linked w/ the txd yet, so free it now. 3180 */ 3181 m_freem(m_head); 3182 *m_head0 = NULL; 3183 3184 freed = hn_txdesc_put(txr, txd); 3185 KASSERT(freed != 0, 3186 ("fail to free txd upon txdma error")); 3187 3188 txr->hn_txdma_failed++; 3189 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 3190 return error; 3191 } 3192 *m_head0 = m_head; 3193 3194 /* +1 RNDIS packet message */ 3195 txr->hn_gpa_cnt = nsegs + 1; 3196 3197 /* send packet with page buffer */ 3198 txr->hn_gpa[0].gpa_page = atop(txd->rndis_pkt_paddr); 3199 txr->hn_gpa[0].gpa_ofs = txd->rndis_pkt_paddr & PAGE_MASK; 3200 txr->hn_gpa[0].gpa_len = pkt_hlen; 3201 3202 /* 3203 * Fill the page buffers with mbuf info after the page 3204 * buffer for RNDIS packet message. 3205 */ 3206 for (i = 0; i < nsegs; ++i) { 3207 struct vmbus_gpa *gpa = &txr->hn_gpa[i + 1]; 3208 3209 gpa->gpa_page = atop(segs[i].ds_addr); 3210 gpa->gpa_ofs = segs[i].ds_addr & PAGE_MASK; 3211 gpa->gpa_len = segs[i].ds_len; 3212 } 3213 3214 txd->chim_index = HN_NVS_CHIM_IDX_INVALID; 3215 txd->chim_size = 0; 3216 txr->hn_sendpkt = hn_txpkt_sglist; 3217 done: 3218 txd->m = m_head; 3219 3220 /* Set the completion routine */ 3221 hn_nvs_sendctx_init(&txd->send_ctx, hn_txpkt_done, txd); 3222 3223 /* Update temporary stats for later use. */ 3224 txr->hn_stat_pkts++; 3225 txr->hn_stat_size += m_head->m_pkthdr.len; 3226 if (m_head->m_flags & M_MCAST) 3227 txr->hn_stat_mcasts++; 3228 3229 return 0; 3230 } 3231 3232 /* 3233 * NOTE: 3234 * If this function fails, then txd will be freed, but the mbuf 3235 * associated w/ the txd will _not_ be freed. 3236 */ 3237 static int 3238 hn_txpkt(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd) 3239 { 3240 int error, send_failed = 0, has_bpf; 3241 3242 again: 3243 has_bpf = bpf_peers_present(ifp->if_bpf); 3244 if (has_bpf) { 3245 /* 3246 * Make sure that this txd and any aggregated txds are not 3247 * freed before ETHER_BPF_MTAP. 3248 */ 3249 hn_txdesc_hold(txd); 3250 } 3251 error = txr->hn_sendpkt(txr, txd); 3252 if (!error) { 3253 if (has_bpf) { 3254 const struct hn_txdesc *tmp_txd; 3255 3256 ETHER_BPF_MTAP(ifp, txd->m); 3257 STAILQ_FOREACH(tmp_txd, &txd->agg_list, agg_link) 3258 ETHER_BPF_MTAP(ifp, tmp_txd->m); 3259 } 3260 3261 if_inc_counter(ifp, IFCOUNTER_OPACKETS, txr->hn_stat_pkts); 3262 #ifdef HN_IFSTART_SUPPORT 3263 if (!hn_use_if_start) 3264 #endif 3265 { 3266 if_inc_counter(ifp, IFCOUNTER_OBYTES, 3267 txr->hn_stat_size); 3268 if (txr->hn_stat_mcasts != 0) { 3269 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 3270 txr->hn_stat_mcasts); 3271 } 3272 } 3273 txr->hn_pkts += txr->hn_stat_pkts; 3274 txr->hn_sends++; 3275 } 3276 if (has_bpf) 3277 hn_txdesc_put(txr, txd); 3278 3279 if (__predict_false(error)) { 3280 int freed; 3281 3282 /* 3283 * This should "really rarely" happen. 3284 * 3285 * XXX Too many RX to be acked or too many sideband 3286 * commands to run? Ask netvsc_channel_rollup() 3287 * to kick start later. 3288 */ 3289 txr->hn_has_txeof = 1; 3290 if (!send_failed) { 3291 txr->hn_send_failed++; 3292 send_failed = 1; 3293 /* 3294 * Try sending again after set hn_has_txeof; 3295 * in case that we missed the last 3296 * netvsc_channel_rollup(). 3297 */ 3298 goto again; 3299 } 3300 if_printf(ifp, "send failed\n"); 3301 3302 /* 3303 * Caller will perform further processing on the 3304 * associated mbuf, so don't free it in hn_txdesc_put(); 3305 * only unload it from the DMA map in hn_txdesc_put(), 3306 * if it was loaded. 3307 */ 3308 txd->m = NULL; 3309 freed = hn_txdesc_put(txr, txd); 3310 KASSERT(freed != 0, 3311 ("fail to free txd upon send error")); 3312 3313 txr->hn_send_failed++; 3314 } 3315 3316 /* Reset temporary stats, after this sending is done. */ 3317 txr->hn_stat_size = 0; 3318 txr->hn_stat_pkts = 0; 3319 txr->hn_stat_mcasts = 0; 3320 3321 return (error); 3322 } 3323 3324 /* 3325 * Append the specified data to the indicated mbuf chain, 3326 * Extend the mbuf chain if the new data does not fit in 3327 * existing space. 3328 * 3329 * This is a minor rewrite of m_append() from sys/kern/uipc_mbuf.c. 3330 * There should be an equivalent in the kernel mbuf code, 3331 * but there does not appear to be one yet. 3332 * 3333 * Differs from m_append() in that additional mbufs are 3334 * allocated with cluster size MJUMPAGESIZE, and filled 3335 * accordingly. 3336 * 3337 * Return 1 if able to complete the job; otherwise 0. 3338 */ 3339 static int 3340 hv_m_append(struct mbuf *m0, int len, c_caddr_t cp) 3341 { 3342 struct mbuf *m, *n; 3343 int remainder, space; 3344 3345 for (m = m0; m->m_next != NULL; m = m->m_next) 3346 ; 3347 remainder = len; 3348 space = M_TRAILINGSPACE(m); 3349 if (space > 0) { 3350 /* 3351 * Copy into available space. 3352 */ 3353 if (space > remainder) 3354 space = remainder; 3355 bcopy(cp, mtod(m, caddr_t) + m->m_len, space); 3356 m->m_len += space; 3357 cp += space; 3358 remainder -= space; 3359 } 3360 while (remainder > 0) { 3361 /* 3362 * Allocate a new mbuf; could check space 3363 * and allocate a cluster instead. 3364 */ 3365 n = m_getjcl(M_NOWAIT, m->m_type, 0, MJUMPAGESIZE); 3366 if (n == NULL) 3367 break; 3368 n->m_len = min(MJUMPAGESIZE, remainder); 3369 bcopy(cp, mtod(n, caddr_t), n->m_len); 3370 cp += n->m_len; 3371 remainder -= n->m_len; 3372 m->m_next = n; 3373 m = n; 3374 } 3375 if (m0->m_flags & M_PKTHDR) 3376 m0->m_pkthdr.len += len - remainder; 3377 3378 return (remainder == 0); 3379 } 3380 3381 #if defined(INET) || defined(INET6) 3382 static __inline int 3383 hn_lro_rx(struct lro_ctrl *lc, struct mbuf *m) 3384 { 3385 #if __FreeBSD_version >= 1100095 3386 if (hn_lro_mbufq_depth) { 3387 tcp_lro_queue_mbuf(lc, m); 3388 return 0; 3389 } 3390 #endif 3391 return tcp_lro_rx(lc, m, 0); 3392 } 3393 #endif 3394 3395 static int 3396 hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen, 3397 const struct hn_rxinfo *info) 3398 { 3399 struct ifnet *ifp, *hn_ifp = rxr->hn_ifp; 3400 struct mbuf *m_new; 3401 int size, do_lro = 0, do_csum = 1, is_vf = 0; 3402 int hash_type = M_HASHTYPE_NONE; 3403 int l3proto = ETHERTYPE_MAX, l4proto = IPPROTO_DONE; 3404 3405 ifp = hn_ifp; 3406 if (rxr->hn_rxvf_ifp != NULL) { 3407 /* 3408 * Non-transparent mode VF; pretend this packet is from 3409 * the VF. 3410 */ 3411 ifp = rxr->hn_rxvf_ifp; 3412 is_vf = 1; 3413 } else if (rxr->hn_rx_flags & HN_RX_FLAG_XPNT_VF) { 3414 /* Transparent mode VF. */ 3415 is_vf = 1; 3416 } 3417 3418 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 3419 /* 3420 * NOTE: 3421 * See the NOTE of hn_rndis_init_fixat(). This 3422 * function can be reached, immediately after the 3423 * RNDIS is initialized but before the ifnet is 3424 * setup on the hn_attach() path; drop the unexpected 3425 * packets. 3426 */ 3427 return (0); 3428 } 3429 3430 if (__predict_false(dlen < ETHER_HDR_LEN)) { 3431 if_inc_counter(hn_ifp, IFCOUNTER_IERRORS, 1); 3432 return (0); 3433 } 3434 3435 if (dlen <= MHLEN) { 3436 m_new = m_gethdr(M_NOWAIT, MT_DATA); 3437 if (m_new == NULL) { 3438 if_inc_counter(hn_ifp, IFCOUNTER_IQDROPS, 1); 3439 return (0); 3440 } 3441 memcpy(mtod(m_new, void *), data, dlen); 3442 m_new->m_pkthdr.len = m_new->m_len = dlen; 3443 rxr->hn_small_pkts++; 3444 } else { 3445 /* 3446 * Get an mbuf with a cluster. For packets 2K or less, 3447 * get a standard 2K cluster. For anything larger, get a 3448 * 4K cluster. Any buffers larger than 4K can cause problems 3449 * if looped around to the Hyper-V TX channel, so avoid them. 3450 */ 3451 size = MCLBYTES; 3452 if (dlen > MCLBYTES) { 3453 /* 4096 */ 3454 size = MJUMPAGESIZE; 3455 } 3456 3457 m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size); 3458 if (m_new == NULL) { 3459 if_inc_counter(hn_ifp, IFCOUNTER_IQDROPS, 1); 3460 return (0); 3461 } 3462 3463 hv_m_append(m_new, dlen, data); 3464 } 3465 m_new->m_pkthdr.rcvif = ifp; 3466 3467 if (__predict_false((hn_ifp->if_capenable & IFCAP_RXCSUM) == 0)) 3468 do_csum = 0; 3469 3470 /* receive side checksum offload */ 3471 if (info->csum_info != HN_NDIS_RXCSUM_INFO_INVALID) { 3472 /* IP csum offload */ 3473 if ((info->csum_info & NDIS_RXCSUM_INFO_IPCS_OK) && do_csum) { 3474 m_new->m_pkthdr.csum_flags |= 3475 (CSUM_IP_CHECKED | CSUM_IP_VALID); 3476 rxr->hn_csum_ip++; 3477 } 3478 3479 /* TCP/UDP csum offload */ 3480 if ((info->csum_info & (NDIS_RXCSUM_INFO_UDPCS_OK | 3481 NDIS_RXCSUM_INFO_TCPCS_OK)) && do_csum) { 3482 m_new->m_pkthdr.csum_flags |= 3483 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 3484 m_new->m_pkthdr.csum_data = 0xffff; 3485 if (info->csum_info & NDIS_RXCSUM_INFO_TCPCS_OK) 3486 rxr->hn_csum_tcp++; 3487 else 3488 rxr->hn_csum_udp++; 3489 } 3490 3491 /* 3492 * XXX 3493 * As of this write (Oct 28th, 2016), host side will turn 3494 * on only TCPCS_OK and IPCS_OK even for UDP datagrams, so 3495 * the do_lro setting here is actually _not_ accurate. We 3496 * depend on the RSS hash type check to reset do_lro. 3497 */ 3498 if ((info->csum_info & 3499 (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) == 3500 (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) 3501 do_lro = 1; 3502 } else { 3503 hn_rxpkt_proto(m_new, &l3proto, &l4proto); 3504 if (l3proto == ETHERTYPE_IP) { 3505 if (l4proto == IPPROTO_TCP) { 3506 if (do_csum && 3507 (rxr->hn_trust_hcsum & 3508 HN_TRUST_HCSUM_TCP)) { 3509 rxr->hn_csum_trusted++; 3510 m_new->m_pkthdr.csum_flags |= 3511 (CSUM_IP_CHECKED | CSUM_IP_VALID | 3512 CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 3513 m_new->m_pkthdr.csum_data = 0xffff; 3514 } 3515 do_lro = 1; 3516 } else if (l4proto == IPPROTO_UDP) { 3517 if (do_csum && 3518 (rxr->hn_trust_hcsum & 3519 HN_TRUST_HCSUM_UDP)) { 3520 rxr->hn_csum_trusted++; 3521 m_new->m_pkthdr.csum_flags |= 3522 (CSUM_IP_CHECKED | CSUM_IP_VALID | 3523 CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 3524 m_new->m_pkthdr.csum_data = 0xffff; 3525 } 3526 } else if (l4proto != IPPROTO_DONE && do_csum && 3527 (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_IP)) { 3528 rxr->hn_csum_trusted++; 3529 m_new->m_pkthdr.csum_flags |= 3530 (CSUM_IP_CHECKED | CSUM_IP_VALID); 3531 } 3532 } 3533 } 3534 3535 if (info->vlan_info != HN_NDIS_VLAN_INFO_INVALID) { 3536 m_new->m_pkthdr.ether_vtag = EVL_MAKETAG( 3537 NDIS_VLAN_INFO_ID(info->vlan_info), 3538 NDIS_VLAN_INFO_PRI(info->vlan_info), 3539 NDIS_VLAN_INFO_CFI(info->vlan_info)); 3540 m_new->m_flags |= M_VLANTAG; 3541 } 3542 3543 /* 3544 * If VF is activated (tranparent/non-transparent mode does not 3545 * matter here). 3546 * 3547 * - Disable LRO 3548 * 3549 * hn(4) will only receive broadcast packets, multicast packets, 3550 * TCP SYN and SYN|ACK (in Azure), LRO is useless for these 3551 * packet types. 3552 * 3553 * For non-transparent, we definitely _cannot_ enable LRO at 3554 * all, since the LRO flush will use hn(4) as the receiving 3555 * interface; i.e. hn_ifp->if_input(hn_ifp, m). 3556 */ 3557 if (is_vf) 3558 do_lro = 0; 3559 3560 /* 3561 * If VF is activated (tranparent/non-transparent mode does not 3562 * matter here), do _not_ mess with unsupported hash types or 3563 * functions. 3564 */ 3565 if (info->hash_info != HN_NDIS_HASH_INFO_INVALID) { 3566 rxr->hn_rss_pkts++; 3567 m_new->m_pkthdr.flowid = info->hash_value; 3568 if (!is_vf) 3569 hash_type = M_HASHTYPE_OPAQUE_HASH; 3570 if ((info->hash_info & NDIS_HASH_FUNCTION_MASK) == 3571 NDIS_HASH_FUNCTION_TOEPLITZ) { 3572 uint32_t type = (info->hash_info & NDIS_HASH_TYPE_MASK & 3573 rxr->hn_mbuf_hash); 3574 3575 /* 3576 * NOTE: 3577 * do_lro is resetted, if the hash types are not TCP 3578 * related. See the comment in the above csum_flags 3579 * setup section. 3580 */ 3581 switch (type) { 3582 case NDIS_HASH_IPV4: 3583 hash_type = M_HASHTYPE_RSS_IPV4; 3584 do_lro = 0; 3585 break; 3586 3587 case NDIS_HASH_TCP_IPV4: 3588 hash_type = M_HASHTYPE_RSS_TCP_IPV4; 3589 if (rxr->hn_rx_flags & HN_RX_FLAG_UDP_HASH) { 3590 int def_htype = M_HASHTYPE_OPAQUE_HASH; 3591 3592 if (is_vf) 3593 def_htype = M_HASHTYPE_NONE; 3594 3595 /* 3596 * UDP 4-tuple hash is delivered as 3597 * TCP 4-tuple hash. 3598 */ 3599 if (l3proto == ETHERTYPE_MAX) { 3600 hn_rxpkt_proto(m_new, 3601 &l3proto, &l4proto); 3602 } 3603 if (l3proto == ETHERTYPE_IP) { 3604 if (l4proto == IPPROTO_UDP && 3605 (rxr->hn_mbuf_hash & 3606 NDIS_HASH_UDP_IPV4_X)) { 3607 hash_type = 3608 M_HASHTYPE_RSS_UDP_IPV4; 3609 do_lro = 0; 3610 } else if (l4proto != 3611 IPPROTO_TCP) { 3612 hash_type = def_htype; 3613 do_lro = 0; 3614 } 3615 } else { 3616 hash_type = def_htype; 3617 do_lro = 0; 3618 } 3619 } 3620 break; 3621 3622 case NDIS_HASH_IPV6: 3623 hash_type = M_HASHTYPE_RSS_IPV6; 3624 do_lro = 0; 3625 break; 3626 3627 case NDIS_HASH_IPV6_EX: 3628 hash_type = M_HASHTYPE_RSS_IPV6_EX; 3629 do_lro = 0; 3630 break; 3631 3632 case NDIS_HASH_TCP_IPV6: 3633 hash_type = M_HASHTYPE_RSS_TCP_IPV6; 3634 break; 3635 3636 case NDIS_HASH_TCP_IPV6_EX: 3637 hash_type = M_HASHTYPE_RSS_TCP_IPV6_EX; 3638 break; 3639 } 3640 } 3641 } else if (!is_vf) { 3642 m_new->m_pkthdr.flowid = rxr->hn_rx_idx; 3643 hash_type = M_HASHTYPE_OPAQUE; 3644 } 3645 M_HASHTYPE_SET(m_new, hash_type); 3646 3647 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); 3648 if (hn_ifp != ifp) { 3649 const struct ether_header *eh; 3650 3651 /* 3652 * Non-transparent mode VF is activated. 3653 */ 3654 3655 /* 3656 * Allow tapping on hn(4). 3657 */ 3658 ETHER_BPF_MTAP(hn_ifp, m_new); 3659 3660 /* 3661 * Update hn(4)'s stats. 3662 */ 3663 if_inc_counter(hn_ifp, IFCOUNTER_IPACKETS, 1); 3664 if_inc_counter(hn_ifp, IFCOUNTER_IBYTES, m_new->m_pkthdr.len); 3665 /* Checked at the beginning of this function. */ 3666 KASSERT(m_new->m_len >= ETHER_HDR_LEN, ("not ethernet frame")); 3667 eh = mtod(m_new, struct ether_header *); 3668 if (ETHER_IS_MULTICAST(eh->ether_dhost)) 3669 if_inc_counter(hn_ifp, IFCOUNTER_IMCASTS, 1); 3670 } 3671 rxr->hn_pkts++; 3672 3673 if ((hn_ifp->if_capenable & IFCAP_LRO) && do_lro) { 3674 #if defined(INET) || defined(INET6) 3675 struct lro_ctrl *lro = &rxr->hn_lro; 3676 3677 if (lro->lro_cnt) { 3678 rxr->hn_lro_tried++; 3679 if (hn_lro_rx(lro, m_new) == 0) { 3680 /* DONE! */ 3681 return 0; 3682 } 3683 } 3684 #endif 3685 } 3686 ifp->if_input(ifp, m_new); 3687 3688 return (0); 3689 } 3690 3691 static int 3692 hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 3693 { 3694 struct hn_softc *sc = ifp->if_softc; 3695 struct ifreq *ifr = (struct ifreq *)data, ifr_vf; 3696 struct ifnet *vf_ifp; 3697 int mask, error = 0; 3698 struct ifrsskey *ifrk; 3699 struct ifrsshash *ifrh; 3700 uint32_t mtu; 3701 3702 switch (cmd) { 3703 case SIOCSIFMTU: 3704 if (ifr->ifr_mtu > HN_MTU_MAX) { 3705 error = EINVAL; 3706 break; 3707 } 3708 3709 HN_LOCK(sc); 3710 3711 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) { 3712 HN_UNLOCK(sc); 3713 break; 3714 } 3715 3716 if ((sc->hn_caps & HN_CAP_MTU) == 0) { 3717 /* Can't change MTU */ 3718 HN_UNLOCK(sc); 3719 error = EOPNOTSUPP; 3720 break; 3721 } 3722 3723 if (ifp->if_mtu == ifr->ifr_mtu) { 3724 HN_UNLOCK(sc); 3725 break; 3726 } 3727 3728 if (hn_xpnt_vf_isready(sc)) { 3729 vf_ifp = sc->hn_vf_ifp; 3730 ifr_vf = *ifr; 3731 strlcpy(ifr_vf.ifr_name, vf_ifp->if_xname, 3732 sizeof(ifr_vf.ifr_name)); 3733 error = vf_ifp->if_ioctl(vf_ifp, SIOCSIFMTU, 3734 (caddr_t)&ifr_vf); 3735 if (error) { 3736 HN_UNLOCK(sc); 3737 if_printf(ifp, "%s SIOCSIFMTU %d failed: %d\n", 3738 vf_ifp->if_xname, ifr->ifr_mtu, error); 3739 break; 3740 } 3741 } 3742 3743 /* 3744 * Suspend this interface before the synthetic parts 3745 * are ripped. 3746 */ 3747 hn_suspend(sc); 3748 3749 /* 3750 * Detach the synthetics parts, i.e. NVS and RNDIS. 3751 */ 3752 hn_synth_detach(sc); 3753 3754 /* 3755 * Reattach the synthetic parts, i.e. NVS and RNDIS, 3756 * with the new MTU setting. 3757 */ 3758 error = hn_synth_attach(sc, ifr->ifr_mtu); 3759 if (error) { 3760 HN_UNLOCK(sc); 3761 break; 3762 } 3763 3764 error = hn_rndis_get_mtu(sc, &mtu); 3765 if (error) 3766 mtu = ifr->ifr_mtu; 3767 else if (bootverbose) 3768 if_printf(ifp, "RNDIS mtu %u\n", mtu); 3769 3770 /* 3771 * Commit the requested MTU, after the synthetic parts 3772 * have been successfully attached. 3773 */ 3774 if (mtu >= ifr->ifr_mtu) { 3775 mtu = ifr->ifr_mtu; 3776 } else { 3777 if_printf(ifp, "fixup mtu %d -> %u\n", 3778 ifr->ifr_mtu, mtu); 3779 } 3780 ifp->if_mtu = mtu; 3781 3782 /* 3783 * Synthetic parts' reattach may change the chimney 3784 * sending size; update it. 3785 */ 3786 if (sc->hn_tx_ring[0].hn_chim_size > sc->hn_chim_szmax) 3787 hn_set_chim_size(sc, sc->hn_chim_szmax); 3788 3789 /* 3790 * Make sure that various parameters based on MTU are 3791 * still valid, after the MTU change. 3792 */ 3793 hn_mtu_change_fixup(sc); 3794 3795 /* 3796 * All done! Resume the interface now. 3797 */ 3798 hn_resume(sc); 3799 3800 if ((sc->hn_flags & HN_FLAG_RXVF) || 3801 (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) { 3802 /* 3803 * Since we have reattached the NVS part, 3804 * change the datapath to VF again; in case 3805 * that it is lost, after the NVS was detached. 3806 */ 3807 hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_VF); 3808 } 3809 3810 HN_UNLOCK(sc); 3811 break; 3812 3813 case SIOCSIFFLAGS: 3814 HN_LOCK(sc); 3815 3816 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) { 3817 HN_UNLOCK(sc); 3818 break; 3819 } 3820 3821 if (hn_xpnt_vf_isready(sc)) 3822 hn_xpnt_vf_saveifflags(sc); 3823 3824 if (ifp->if_flags & IFF_UP) { 3825 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 3826 /* 3827 * Caller meight hold mutex, e.g. 3828 * bpf; use busy-wait for the RNDIS 3829 * reply. 3830 */ 3831 HN_NO_SLEEPING(sc); 3832 hn_rxfilter_config(sc); 3833 HN_SLEEPING_OK(sc); 3834 3835 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) 3836 error = hn_xpnt_vf_iocsetflags(sc); 3837 } else { 3838 hn_init_locked(sc); 3839 } 3840 } else { 3841 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 3842 hn_stop(sc, false); 3843 } 3844 sc->hn_if_flags = ifp->if_flags; 3845 3846 HN_UNLOCK(sc); 3847 break; 3848 3849 case SIOCSIFCAP: 3850 HN_LOCK(sc); 3851 3852 if (hn_xpnt_vf_isready(sc)) { 3853 ifr_vf = *ifr; 3854 strlcpy(ifr_vf.ifr_name, sc->hn_vf_ifp->if_xname, 3855 sizeof(ifr_vf.ifr_name)); 3856 error = hn_xpnt_vf_iocsetcaps(sc, &ifr_vf); 3857 HN_UNLOCK(sc); 3858 break; 3859 } 3860 3861 /* 3862 * Fix up requested capabilities w/ supported capabilities, 3863 * since the supported capabilities could have been changed. 3864 */ 3865 mask = (ifr->ifr_reqcap & ifp->if_capabilities) ^ 3866 ifp->if_capenable; 3867 3868 if (mask & IFCAP_TXCSUM) { 3869 ifp->if_capenable ^= IFCAP_TXCSUM; 3870 if (ifp->if_capenable & IFCAP_TXCSUM) 3871 ifp->if_hwassist |= HN_CSUM_IP_HWASSIST(sc); 3872 else 3873 ifp->if_hwassist &= ~HN_CSUM_IP_HWASSIST(sc); 3874 } 3875 if (mask & IFCAP_TXCSUM_IPV6) { 3876 ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; 3877 if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) 3878 ifp->if_hwassist |= HN_CSUM_IP6_HWASSIST(sc); 3879 else 3880 ifp->if_hwassist &= ~HN_CSUM_IP6_HWASSIST(sc); 3881 } 3882 3883 /* TODO: flip RNDIS offload parameters for RXCSUM. */ 3884 if (mask & IFCAP_RXCSUM) 3885 ifp->if_capenable ^= IFCAP_RXCSUM; 3886 #ifdef foo 3887 /* We can't diff IPv6 packets from IPv4 packets on RX path. */ 3888 if (mask & IFCAP_RXCSUM_IPV6) 3889 ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; 3890 #endif 3891 3892 if (mask & IFCAP_LRO) 3893 ifp->if_capenable ^= IFCAP_LRO; 3894 3895 if (mask & IFCAP_TSO4) { 3896 ifp->if_capenable ^= IFCAP_TSO4; 3897 if (ifp->if_capenable & IFCAP_TSO4) 3898 ifp->if_hwassist |= CSUM_IP_TSO; 3899 else 3900 ifp->if_hwassist &= ~CSUM_IP_TSO; 3901 } 3902 if (mask & IFCAP_TSO6) { 3903 ifp->if_capenable ^= IFCAP_TSO6; 3904 if (ifp->if_capenable & IFCAP_TSO6) 3905 ifp->if_hwassist |= CSUM_IP6_TSO; 3906 else 3907 ifp->if_hwassist &= ~CSUM_IP6_TSO; 3908 } 3909 3910 HN_UNLOCK(sc); 3911 break; 3912 3913 case SIOCADDMULTI: 3914 case SIOCDELMULTI: 3915 HN_LOCK(sc); 3916 3917 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) { 3918 HN_UNLOCK(sc); 3919 break; 3920 } 3921 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 3922 /* 3923 * Multicast uses mutex; use busy-wait for 3924 * the RNDIS reply. 3925 */ 3926 HN_NO_SLEEPING(sc); 3927 hn_rxfilter_config(sc); 3928 HN_SLEEPING_OK(sc); 3929 } 3930 3931 /* XXX vlan(4) style mcast addr maintenance */ 3932 if (hn_xpnt_vf_isready(sc)) { 3933 int old_if_flags; 3934 3935 old_if_flags = sc->hn_vf_ifp->if_flags; 3936 hn_xpnt_vf_saveifflags(sc); 3937 3938 if ((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) && 3939 ((old_if_flags ^ sc->hn_vf_ifp->if_flags) & 3940 IFF_ALLMULTI)) 3941 error = hn_xpnt_vf_iocsetflags(sc); 3942 } 3943 3944 HN_UNLOCK(sc); 3945 break; 3946 3947 case SIOCSIFMEDIA: 3948 case SIOCGIFMEDIA: 3949 HN_LOCK(sc); 3950 if (hn_xpnt_vf_isready(sc)) { 3951 /* 3952 * SIOCGIFMEDIA expects ifmediareq, so don't 3953 * create and pass ifr_vf to the VF here; just 3954 * replace the ifr_name. 3955 */ 3956 vf_ifp = sc->hn_vf_ifp; 3957 strlcpy(ifr->ifr_name, vf_ifp->if_xname, 3958 sizeof(ifr->ifr_name)); 3959 error = vf_ifp->if_ioctl(vf_ifp, cmd, data); 3960 /* Restore the ifr_name. */ 3961 strlcpy(ifr->ifr_name, ifp->if_xname, 3962 sizeof(ifr->ifr_name)); 3963 HN_UNLOCK(sc); 3964 break; 3965 } 3966 HN_UNLOCK(sc); 3967 error = ifmedia_ioctl(ifp, ifr, &sc->hn_media, cmd); 3968 break; 3969 3970 case SIOCGIFRSSHASH: 3971 ifrh = (struct ifrsshash *)data; 3972 HN_LOCK(sc); 3973 if (sc->hn_rx_ring_inuse == 1) { 3974 HN_UNLOCK(sc); 3975 ifrh->ifrh_func = RSS_FUNC_NONE; 3976 ifrh->ifrh_types = 0; 3977 break; 3978 } 3979 3980 if (sc->hn_rss_hash & NDIS_HASH_FUNCTION_TOEPLITZ) 3981 ifrh->ifrh_func = RSS_FUNC_TOEPLITZ; 3982 else 3983 ifrh->ifrh_func = RSS_FUNC_PRIVATE; 3984 ifrh->ifrh_types = hn_rss_type_fromndis(sc->hn_rss_hash); 3985 HN_UNLOCK(sc); 3986 break; 3987 3988 case SIOCGIFRSSKEY: 3989 ifrk = (struct ifrsskey *)data; 3990 HN_LOCK(sc); 3991 if (sc->hn_rx_ring_inuse == 1) { 3992 HN_UNLOCK(sc); 3993 ifrk->ifrk_func = RSS_FUNC_NONE; 3994 ifrk->ifrk_keylen = 0; 3995 break; 3996 } 3997 if (sc->hn_rss_hash & NDIS_HASH_FUNCTION_TOEPLITZ) 3998 ifrk->ifrk_func = RSS_FUNC_TOEPLITZ; 3999 else 4000 ifrk->ifrk_func = RSS_FUNC_PRIVATE; 4001 ifrk->ifrk_keylen = NDIS_HASH_KEYSIZE_TOEPLITZ; 4002 memcpy(ifrk->ifrk_key, sc->hn_rss.rss_key, 4003 NDIS_HASH_KEYSIZE_TOEPLITZ); 4004 HN_UNLOCK(sc); 4005 break; 4006 4007 default: 4008 error = ether_ioctl(ifp, cmd, data); 4009 break; 4010 } 4011 return (error); 4012 } 4013 4014 static void 4015 hn_stop(struct hn_softc *sc, bool detaching) 4016 { 4017 struct ifnet *ifp = sc->hn_ifp; 4018 int i; 4019 4020 HN_LOCK_ASSERT(sc); 4021 4022 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, 4023 ("synthetic parts were not attached")); 4024 4025 /* Clear RUNNING bit ASAP. */ 4026 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_RUNNING); 4027 4028 /* Disable polling. */ 4029 hn_polling(sc, 0); 4030 4031 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) { 4032 KASSERT(sc->hn_vf_ifp != NULL, 4033 ("%s: VF is not attached", ifp->if_xname)); 4034 4035 /* Mark transparent mode VF as disabled. */ 4036 hn_xpnt_vf_setdisable(sc, false /* keep hn_vf_ifp */); 4037 4038 /* 4039 * NOTE: 4040 * Datapath setting must happen _before_ bringing 4041 * the VF down. 4042 */ 4043 hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_SYNTH); 4044 4045 /* 4046 * Bring the VF down. 4047 */ 4048 hn_xpnt_vf_saveifflags(sc); 4049 sc->hn_vf_ifp->if_flags &= ~IFF_UP; 4050 hn_xpnt_vf_iocsetflags(sc); 4051 } 4052 4053 /* Suspend data transfers. */ 4054 hn_suspend_data(sc); 4055 4056 /* Clear OACTIVE bit. */ 4057 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 4058 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) 4059 sc->hn_tx_ring[i].hn_oactive = 0; 4060 4061 /* 4062 * If the non-transparent mode VF is active, make sure 4063 * that the RX filter still allows packet reception. 4064 */ 4065 if (!detaching && (sc->hn_flags & HN_FLAG_RXVF)) 4066 hn_rxfilter_config(sc); 4067 } 4068 4069 static void 4070 hn_init_locked(struct hn_softc *sc) 4071 { 4072 struct ifnet *ifp = sc->hn_ifp; 4073 int i; 4074 4075 HN_LOCK_ASSERT(sc); 4076 4077 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) 4078 return; 4079 4080 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 4081 return; 4082 4083 /* Configure RX filter */ 4084 hn_rxfilter_config(sc); 4085 4086 /* Clear OACTIVE bit. */ 4087 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 4088 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) 4089 sc->hn_tx_ring[i].hn_oactive = 0; 4090 4091 /* Clear TX 'suspended' bit. */ 4092 hn_resume_tx(sc, sc->hn_tx_ring_inuse); 4093 4094 if (hn_xpnt_vf_isready(sc)) { 4095 /* Initialize transparent VF. */ 4096 hn_xpnt_vf_init(sc); 4097 } 4098 4099 /* Everything is ready; unleash! */ 4100 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_RUNNING); 4101 4102 /* Re-enable polling if requested. */ 4103 if (sc->hn_pollhz > 0) 4104 hn_polling(sc, sc->hn_pollhz); 4105 } 4106 4107 static void 4108 hn_init(void *xsc) 4109 { 4110 struct hn_softc *sc = xsc; 4111 4112 HN_LOCK(sc); 4113 hn_init_locked(sc); 4114 HN_UNLOCK(sc); 4115 } 4116 4117 #if __FreeBSD_version >= 1100099 4118 4119 static int 4120 hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS) 4121 { 4122 struct hn_softc *sc = arg1; 4123 unsigned int lenlim; 4124 int error; 4125 4126 lenlim = sc->hn_rx_ring[0].hn_lro.lro_length_lim; 4127 error = sysctl_handle_int(oidp, &lenlim, 0, req); 4128 if (error || req->newptr == NULL) 4129 return error; 4130 4131 HN_LOCK(sc); 4132 if (lenlim < HN_LRO_LENLIM_MIN(sc->hn_ifp) || 4133 lenlim > TCP_LRO_LENGTH_MAX) { 4134 HN_UNLOCK(sc); 4135 return EINVAL; 4136 } 4137 hn_set_lro_lenlim(sc, lenlim); 4138 HN_UNLOCK(sc); 4139 4140 return 0; 4141 } 4142 4143 static int 4144 hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS) 4145 { 4146 struct hn_softc *sc = arg1; 4147 int ackcnt, error, i; 4148 4149 /* 4150 * lro_ackcnt_lim is append count limit, 4151 * +1 to turn it into aggregation limit. 4152 */ 4153 ackcnt = sc->hn_rx_ring[0].hn_lro.lro_ackcnt_lim + 1; 4154 error = sysctl_handle_int(oidp, &ackcnt, 0, req); 4155 if (error || req->newptr == NULL) 4156 return error; 4157 4158 if (ackcnt < 2 || ackcnt > (TCP_LRO_ACKCNT_MAX + 1)) 4159 return EINVAL; 4160 4161 /* 4162 * Convert aggregation limit back to append 4163 * count limit. 4164 */ 4165 --ackcnt; 4166 HN_LOCK(sc); 4167 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 4168 sc->hn_rx_ring[i].hn_lro.lro_ackcnt_lim = ackcnt; 4169 HN_UNLOCK(sc); 4170 return 0; 4171 } 4172 4173 #endif 4174 4175 static int 4176 hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS) 4177 { 4178 struct hn_softc *sc = arg1; 4179 int hcsum = arg2; 4180 int on, error, i; 4181 4182 on = 0; 4183 if (sc->hn_rx_ring[0].hn_trust_hcsum & hcsum) 4184 on = 1; 4185 4186 error = sysctl_handle_int(oidp, &on, 0, req); 4187 if (error || req->newptr == NULL) 4188 return error; 4189 4190 HN_LOCK(sc); 4191 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4192 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 4193 4194 if (on) 4195 rxr->hn_trust_hcsum |= hcsum; 4196 else 4197 rxr->hn_trust_hcsum &= ~hcsum; 4198 } 4199 HN_UNLOCK(sc); 4200 return 0; 4201 } 4202 4203 static int 4204 hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS) 4205 { 4206 struct hn_softc *sc = arg1; 4207 int chim_size, error; 4208 4209 chim_size = sc->hn_tx_ring[0].hn_chim_size; 4210 error = sysctl_handle_int(oidp, &chim_size, 0, req); 4211 if (error || req->newptr == NULL) 4212 return error; 4213 4214 if (chim_size > sc->hn_chim_szmax || chim_size <= 0) 4215 return EINVAL; 4216 4217 HN_LOCK(sc); 4218 hn_set_chim_size(sc, chim_size); 4219 HN_UNLOCK(sc); 4220 return 0; 4221 } 4222 4223 #if __FreeBSD_version < 1100095 4224 static int 4225 hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS) 4226 { 4227 struct hn_softc *sc = arg1; 4228 int ofs = arg2, i, error; 4229 struct hn_rx_ring *rxr; 4230 uint64_t stat; 4231 4232 stat = 0; 4233 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4234 rxr = &sc->hn_rx_ring[i]; 4235 stat += *((int *)((uint8_t *)rxr + ofs)); 4236 } 4237 4238 error = sysctl_handle_64(oidp, &stat, 0, req); 4239 if (error || req->newptr == NULL) 4240 return error; 4241 4242 /* Zero out this stat. */ 4243 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4244 rxr = &sc->hn_rx_ring[i]; 4245 *((int *)((uint8_t *)rxr + ofs)) = 0; 4246 } 4247 return 0; 4248 } 4249 #else 4250 static int 4251 hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS) 4252 { 4253 struct hn_softc *sc = arg1; 4254 int ofs = arg2, i, error; 4255 struct hn_rx_ring *rxr; 4256 uint64_t stat; 4257 4258 stat = 0; 4259 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4260 rxr = &sc->hn_rx_ring[i]; 4261 stat += *((uint64_t *)((uint8_t *)rxr + ofs)); 4262 } 4263 4264 error = sysctl_handle_64(oidp, &stat, 0, req); 4265 if (error || req->newptr == NULL) 4266 return error; 4267 4268 /* Zero out this stat. */ 4269 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4270 rxr = &sc->hn_rx_ring[i]; 4271 *((uint64_t *)((uint8_t *)rxr + ofs)) = 0; 4272 } 4273 return 0; 4274 } 4275 4276 #endif 4277 4278 static int 4279 hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS) 4280 { 4281 struct hn_softc *sc = arg1; 4282 int ofs = arg2, i, error; 4283 struct hn_rx_ring *rxr; 4284 u_long stat; 4285 4286 stat = 0; 4287 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4288 rxr = &sc->hn_rx_ring[i]; 4289 stat += *((u_long *)((uint8_t *)rxr + ofs)); 4290 } 4291 4292 error = sysctl_handle_long(oidp, &stat, 0, req); 4293 if (error || req->newptr == NULL) 4294 return error; 4295 4296 /* Zero out this stat. */ 4297 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4298 rxr = &sc->hn_rx_ring[i]; 4299 *((u_long *)((uint8_t *)rxr + ofs)) = 0; 4300 } 4301 return 0; 4302 } 4303 4304 static int 4305 hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS) 4306 { 4307 struct hn_softc *sc = arg1; 4308 int ofs = arg2, i, error; 4309 struct hn_tx_ring *txr; 4310 u_long stat; 4311 4312 stat = 0; 4313 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 4314 txr = &sc->hn_tx_ring[i]; 4315 stat += *((u_long *)((uint8_t *)txr + ofs)); 4316 } 4317 4318 error = sysctl_handle_long(oidp, &stat, 0, req); 4319 if (error || req->newptr == NULL) 4320 return error; 4321 4322 /* Zero out this stat. */ 4323 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 4324 txr = &sc->hn_tx_ring[i]; 4325 *((u_long *)((uint8_t *)txr + ofs)) = 0; 4326 } 4327 return 0; 4328 } 4329 4330 static int 4331 hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS) 4332 { 4333 struct hn_softc *sc = arg1; 4334 int ofs = arg2, i, error, conf; 4335 struct hn_tx_ring *txr; 4336 4337 txr = &sc->hn_tx_ring[0]; 4338 conf = *((int *)((uint8_t *)txr + ofs)); 4339 4340 error = sysctl_handle_int(oidp, &conf, 0, req); 4341 if (error || req->newptr == NULL) 4342 return error; 4343 4344 HN_LOCK(sc); 4345 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 4346 txr = &sc->hn_tx_ring[i]; 4347 *((int *)((uint8_t *)txr + ofs)) = conf; 4348 } 4349 HN_UNLOCK(sc); 4350 4351 return 0; 4352 } 4353 4354 static int 4355 hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS) 4356 { 4357 struct hn_softc *sc = arg1; 4358 int error, size; 4359 4360 size = sc->hn_agg_size; 4361 error = sysctl_handle_int(oidp, &size, 0, req); 4362 if (error || req->newptr == NULL) 4363 return (error); 4364 4365 HN_LOCK(sc); 4366 sc->hn_agg_size = size; 4367 hn_set_txagg(sc); 4368 HN_UNLOCK(sc); 4369 4370 return (0); 4371 } 4372 4373 static int 4374 hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS) 4375 { 4376 struct hn_softc *sc = arg1; 4377 int error, pkts; 4378 4379 pkts = sc->hn_agg_pkts; 4380 error = sysctl_handle_int(oidp, &pkts, 0, req); 4381 if (error || req->newptr == NULL) 4382 return (error); 4383 4384 HN_LOCK(sc); 4385 sc->hn_agg_pkts = pkts; 4386 hn_set_txagg(sc); 4387 HN_UNLOCK(sc); 4388 4389 return (0); 4390 } 4391 4392 static int 4393 hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS) 4394 { 4395 struct hn_softc *sc = arg1; 4396 int pkts; 4397 4398 pkts = sc->hn_tx_ring[0].hn_agg_pktmax; 4399 return (sysctl_handle_int(oidp, &pkts, 0, req)); 4400 } 4401 4402 static int 4403 hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS) 4404 { 4405 struct hn_softc *sc = arg1; 4406 int align; 4407 4408 align = sc->hn_tx_ring[0].hn_agg_align; 4409 return (sysctl_handle_int(oidp, &align, 0, req)); 4410 } 4411 4412 static void 4413 hn_chan_polling(struct vmbus_channel *chan, u_int pollhz) 4414 { 4415 if (pollhz == 0) 4416 vmbus_chan_poll_disable(chan); 4417 else 4418 vmbus_chan_poll_enable(chan, pollhz); 4419 } 4420 4421 static void 4422 hn_polling(struct hn_softc *sc, u_int pollhz) 4423 { 4424 int nsubch = sc->hn_rx_ring_inuse - 1; 4425 4426 HN_LOCK_ASSERT(sc); 4427 4428 if (nsubch > 0) { 4429 struct vmbus_channel **subch; 4430 int i; 4431 4432 subch = vmbus_subchan_get(sc->hn_prichan, nsubch); 4433 for (i = 0; i < nsubch; ++i) 4434 hn_chan_polling(subch[i], pollhz); 4435 vmbus_subchan_rel(subch, nsubch); 4436 } 4437 hn_chan_polling(sc->hn_prichan, pollhz); 4438 } 4439 4440 static int 4441 hn_polling_sysctl(SYSCTL_HANDLER_ARGS) 4442 { 4443 struct hn_softc *sc = arg1; 4444 int pollhz, error; 4445 4446 pollhz = sc->hn_pollhz; 4447 error = sysctl_handle_int(oidp, &pollhz, 0, req); 4448 if (error || req->newptr == NULL) 4449 return (error); 4450 4451 if (pollhz != 0 && 4452 (pollhz < VMBUS_CHAN_POLLHZ_MIN || pollhz > VMBUS_CHAN_POLLHZ_MAX)) 4453 return (EINVAL); 4454 4455 HN_LOCK(sc); 4456 if (sc->hn_pollhz != pollhz) { 4457 sc->hn_pollhz = pollhz; 4458 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) && 4459 (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED)) 4460 hn_polling(sc, sc->hn_pollhz); 4461 } 4462 HN_UNLOCK(sc); 4463 4464 return (0); 4465 } 4466 4467 static int 4468 hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS) 4469 { 4470 struct hn_softc *sc = arg1; 4471 char verstr[16]; 4472 4473 snprintf(verstr, sizeof(verstr), "%u.%u", 4474 HN_NDIS_VERSION_MAJOR(sc->hn_ndis_ver), 4475 HN_NDIS_VERSION_MINOR(sc->hn_ndis_ver)); 4476 return sysctl_handle_string(oidp, verstr, sizeof(verstr), req); 4477 } 4478 4479 static int 4480 hn_caps_sysctl(SYSCTL_HANDLER_ARGS) 4481 { 4482 struct hn_softc *sc = arg1; 4483 char caps_str[128]; 4484 uint32_t caps; 4485 4486 HN_LOCK(sc); 4487 caps = sc->hn_caps; 4488 HN_UNLOCK(sc); 4489 snprintf(caps_str, sizeof(caps_str), "%b", caps, HN_CAP_BITS); 4490 return sysctl_handle_string(oidp, caps_str, sizeof(caps_str), req); 4491 } 4492 4493 static int 4494 hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS) 4495 { 4496 struct hn_softc *sc = arg1; 4497 char assist_str[128]; 4498 uint32_t hwassist; 4499 4500 HN_LOCK(sc); 4501 hwassist = sc->hn_ifp->if_hwassist; 4502 HN_UNLOCK(sc); 4503 snprintf(assist_str, sizeof(assist_str), "%b", hwassist, CSUM_BITS); 4504 return sysctl_handle_string(oidp, assist_str, sizeof(assist_str), req); 4505 } 4506 4507 static int 4508 hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS) 4509 { 4510 struct hn_softc *sc = arg1; 4511 char filter_str[128]; 4512 uint32_t filter; 4513 4514 HN_LOCK(sc); 4515 filter = sc->hn_rx_filter; 4516 HN_UNLOCK(sc); 4517 snprintf(filter_str, sizeof(filter_str), "%b", filter, 4518 NDIS_PACKET_TYPES); 4519 return sysctl_handle_string(oidp, filter_str, sizeof(filter_str), req); 4520 } 4521 4522 #ifndef RSS 4523 4524 static int 4525 hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS) 4526 { 4527 struct hn_softc *sc = arg1; 4528 int error; 4529 4530 HN_LOCK(sc); 4531 4532 error = SYSCTL_OUT(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key)); 4533 if (error || req->newptr == NULL) 4534 goto back; 4535 4536 if ((sc->hn_flags & HN_FLAG_RXVF) || 4537 (hn_xpnt_vf && sc->hn_vf_ifp != NULL)) { 4538 /* 4539 * RSS key is synchronized w/ VF's, don't allow users 4540 * to change it. 4541 */ 4542 error = EBUSY; 4543 goto back; 4544 } 4545 4546 error = SYSCTL_IN(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key)); 4547 if (error) 4548 goto back; 4549 sc->hn_flags |= HN_FLAG_HAS_RSSKEY; 4550 4551 if (sc->hn_rx_ring_inuse > 1) { 4552 error = hn_rss_reconfig(sc); 4553 } else { 4554 /* Not RSS capable, at least for now; just save the RSS key. */ 4555 error = 0; 4556 } 4557 back: 4558 HN_UNLOCK(sc); 4559 return (error); 4560 } 4561 4562 static int 4563 hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS) 4564 { 4565 struct hn_softc *sc = arg1; 4566 int error; 4567 4568 HN_LOCK(sc); 4569 4570 error = SYSCTL_OUT(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind)); 4571 if (error || req->newptr == NULL) 4572 goto back; 4573 4574 /* 4575 * Don't allow RSS indirect table change, if this interface is not 4576 * RSS capable currently. 4577 */ 4578 if (sc->hn_rx_ring_inuse == 1) { 4579 error = EOPNOTSUPP; 4580 goto back; 4581 } 4582 4583 error = SYSCTL_IN(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind)); 4584 if (error) 4585 goto back; 4586 sc->hn_flags |= HN_FLAG_HAS_RSSIND; 4587 4588 hn_rss_ind_fixup(sc); 4589 error = hn_rss_reconfig(sc); 4590 back: 4591 HN_UNLOCK(sc); 4592 return (error); 4593 } 4594 4595 #endif /* !RSS */ 4596 4597 static int 4598 hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS) 4599 { 4600 struct hn_softc *sc = arg1; 4601 char hash_str[128]; 4602 uint32_t hash; 4603 4604 HN_LOCK(sc); 4605 hash = sc->hn_rss_hash; 4606 HN_UNLOCK(sc); 4607 snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS); 4608 return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req); 4609 } 4610 4611 static int 4612 hn_rss_hcap_sysctl(SYSCTL_HANDLER_ARGS) 4613 { 4614 struct hn_softc *sc = arg1; 4615 char hash_str[128]; 4616 uint32_t hash; 4617 4618 HN_LOCK(sc); 4619 hash = sc->hn_rss_hcap; 4620 HN_UNLOCK(sc); 4621 snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS); 4622 return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req); 4623 } 4624 4625 static int 4626 hn_rss_mbuf_sysctl(SYSCTL_HANDLER_ARGS) 4627 { 4628 struct hn_softc *sc = arg1; 4629 char hash_str[128]; 4630 uint32_t hash; 4631 4632 HN_LOCK(sc); 4633 hash = sc->hn_rx_ring[0].hn_mbuf_hash; 4634 HN_UNLOCK(sc); 4635 snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS); 4636 return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req); 4637 } 4638 4639 static int 4640 hn_vf_sysctl(SYSCTL_HANDLER_ARGS) 4641 { 4642 struct hn_softc *sc = arg1; 4643 char vf_name[IFNAMSIZ + 1]; 4644 struct ifnet *vf_ifp; 4645 4646 HN_LOCK(sc); 4647 vf_name[0] = '\0'; 4648 vf_ifp = sc->hn_vf_ifp; 4649 if (vf_ifp != NULL) 4650 snprintf(vf_name, sizeof(vf_name), "%s", vf_ifp->if_xname); 4651 HN_UNLOCK(sc); 4652 return sysctl_handle_string(oidp, vf_name, sizeof(vf_name), req); 4653 } 4654 4655 static int 4656 hn_rxvf_sysctl(SYSCTL_HANDLER_ARGS) 4657 { 4658 struct hn_softc *sc = arg1; 4659 char vf_name[IFNAMSIZ + 1]; 4660 struct ifnet *vf_ifp; 4661 4662 HN_LOCK(sc); 4663 vf_name[0] = '\0'; 4664 vf_ifp = sc->hn_rx_ring[0].hn_rxvf_ifp; 4665 if (vf_ifp != NULL) 4666 snprintf(vf_name, sizeof(vf_name), "%s", vf_ifp->if_xname); 4667 HN_UNLOCK(sc); 4668 return sysctl_handle_string(oidp, vf_name, sizeof(vf_name), req); 4669 } 4670 4671 static int 4672 hn_vflist_sysctl(SYSCTL_HANDLER_ARGS) 4673 { 4674 struct rm_priotracker pt; 4675 struct sbuf *sb; 4676 int error, i; 4677 bool first; 4678 4679 error = sysctl_wire_old_buffer(req, 0); 4680 if (error != 0) 4681 return (error); 4682 4683 sb = sbuf_new_for_sysctl(NULL, NULL, 128, req); 4684 if (sb == NULL) 4685 return (ENOMEM); 4686 4687 rm_rlock(&hn_vfmap_lock, &pt); 4688 4689 first = true; 4690 for (i = 0; i < hn_vfmap_size; ++i) { 4691 struct ifnet *ifp; 4692 4693 if (hn_vfmap[i] == NULL) 4694 continue; 4695 4696 ifp = ifnet_byindex(i); 4697 if (ifp != NULL) { 4698 if (first) 4699 sbuf_printf(sb, "%s", ifp->if_xname); 4700 else 4701 sbuf_printf(sb, " %s", ifp->if_xname); 4702 first = false; 4703 } 4704 } 4705 4706 rm_runlock(&hn_vfmap_lock, &pt); 4707 4708 error = sbuf_finish(sb); 4709 sbuf_delete(sb); 4710 return (error); 4711 } 4712 4713 static int 4714 hn_vfmap_sysctl(SYSCTL_HANDLER_ARGS) 4715 { 4716 struct rm_priotracker pt; 4717 struct sbuf *sb; 4718 int error, i; 4719 bool first; 4720 4721 error = sysctl_wire_old_buffer(req, 0); 4722 if (error != 0) 4723 return (error); 4724 4725 sb = sbuf_new_for_sysctl(NULL, NULL, 128, req); 4726 if (sb == NULL) 4727 return (ENOMEM); 4728 4729 rm_rlock(&hn_vfmap_lock, &pt); 4730 4731 first = true; 4732 for (i = 0; i < hn_vfmap_size; ++i) { 4733 struct ifnet *ifp, *hn_ifp; 4734 4735 hn_ifp = hn_vfmap[i]; 4736 if (hn_ifp == NULL) 4737 continue; 4738 4739 ifp = ifnet_byindex(i); 4740 if (ifp != NULL) { 4741 if (first) { 4742 sbuf_printf(sb, "%s:%s", ifp->if_xname, 4743 hn_ifp->if_xname); 4744 } else { 4745 sbuf_printf(sb, " %s:%s", ifp->if_xname, 4746 hn_ifp->if_xname); 4747 } 4748 first = false; 4749 } 4750 } 4751 4752 rm_runlock(&hn_vfmap_lock, &pt); 4753 4754 error = sbuf_finish(sb); 4755 sbuf_delete(sb); 4756 return (error); 4757 } 4758 4759 static int 4760 hn_xpnt_vf_accbpf_sysctl(SYSCTL_HANDLER_ARGS) 4761 { 4762 struct hn_softc *sc = arg1; 4763 int error, onoff = 0; 4764 4765 if (sc->hn_xvf_flags & HN_XVFFLAG_ACCBPF) 4766 onoff = 1; 4767 error = sysctl_handle_int(oidp, &onoff, 0, req); 4768 if (error || req->newptr == NULL) 4769 return (error); 4770 4771 HN_LOCK(sc); 4772 /* NOTE: hn_vf_lock for hn_transmit() */ 4773 rm_wlock(&sc->hn_vf_lock); 4774 if (onoff) 4775 sc->hn_xvf_flags |= HN_XVFFLAG_ACCBPF; 4776 else 4777 sc->hn_xvf_flags &= ~HN_XVFFLAG_ACCBPF; 4778 rm_wunlock(&sc->hn_vf_lock); 4779 HN_UNLOCK(sc); 4780 4781 return (0); 4782 } 4783 4784 static int 4785 hn_xpnt_vf_enabled_sysctl(SYSCTL_HANDLER_ARGS) 4786 { 4787 struct hn_softc *sc = arg1; 4788 int enabled = 0; 4789 4790 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) 4791 enabled = 1; 4792 return (sysctl_handle_int(oidp, &enabled, 0, req)); 4793 } 4794 4795 static int 4796 hn_check_iplen(const struct mbuf *m, int hoff) 4797 { 4798 const struct ip *ip; 4799 int len, iphlen, iplen; 4800 const struct tcphdr *th; 4801 int thoff; /* TCP data offset */ 4802 4803 len = hoff + sizeof(struct ip); 4804 4805 /* The packet must be at least the size of an IP header. */ 4806 if (m->m_pkthdr.len < len) 4807 return IPPROTO_DONE; 4808 4809 /* The fixed IP header must reside completely in the first mbuf. */ 4810 if (m->m_len < len) 4811 return IPPROTO_DONE; 4812 4813 ip = mtodo(m, hoff); 4814 4815 /* Bound check the packet's stated IP header length. */ 4816 iphlen = ip->ip_hl << 2; 4817 if (iphlen < sizeof(struct ip)) /* minimum header length */ 4818 return IPPROTO_DONE; 4819 4820 /* The full IP header must reside completely in the one mbuf. */ 4821 if (m->m_len < hoff + iphlen) 4822 return IPPROTO_DONE; 4823 4824 iplen = ntohs(ip->ip_len); 4825 4826 /* 4827 * Check that the amount of data in the buffers is as 4828 * at least much as the IP header would have us expect. 4829 */ 4830 if (m->m_pkthdr.len < hoff + iplen) 4831 return IPPROTO_DONE; 4832 4833 /* 4834 * Ignore IP fragments. 4835 */ 4836 if (ntohs(ip->ip_off) & (IP_OFFMASK | IP_MF)) 4837 return IPPROTO_DONE; 4838 4839 /* 4840 * The TCP/IP or UDP/IP header must be entirely contained within 4841 * the first fragment of a packet. 4842 */ 4843 switch (ip->ip_p) { 4844 case IPPROTO_TCP: 4845 if (iplen < iphlen + sizeof(struct tcphdr)) 4846 return IPPROTO_DONE; 4847 if (m->m_len < hoff + iphlen + sizeof(struct tcphdr)) 4848 return IPPROTO_DONE; 4849 th = (const struct tcphdr *)((const uint8_t *)ip + iphlen); 4850 thoff = th->th_off << 2; 4851 if (thoff < sizeof(struct tcphdr) || thoff + iphlen > iplen) 4852 return IPPROTO_DONE; 4853 if (m->m_len < hoff + iphlen + thoff) 4854 return IPPROTO_DONE; 4855 break; 4856 case IPPROTO_UDP: 4857 if (iplen < iphlen + sizeof(struct udphdr)) 4858 return IPPROTO_DONE; 4859 if (m->m_len < hoff + iphlen + sizeof(struct udphdr)) 4860 return IPPROTO_DONE; 4861 break; 4862 default: 4863 if (iplen < iphlen) 4864 return IPPROTO_DONE; 4865 break; 4866 } 4867 return ip->ip_p; 4868 } 4869 4870 static void 4871 hn_rxpkt_proto(const struct mbuf *m_new, int *l3proto, int *l4proto) 4872 { 4873 const struct ether_header *eh; 4874 uint16_t etype; 4875 int hoff; 4876 4877 hoff = sizeof(*eh); 4878 /* Checked at the beginning of this function. */ 4879 KASSERT(m_new->m_len >= hoff, ("not ethernet frame")); 4880 4881 eh = mtod(m_new, const struct ether_header *); 4882 etype = ntohs(eh->ether_type); 4883 if (etype == ETHERTYPE_VLAN) { 4884 const struct ether_vlan_header *evl; 4885 4886 hoff = sizeof(*evl); 4887 if (m_new->m_len < hoff) 4888 return; 4889 evl = mtod(m_new, const struct ether_vlan_header *); 4890 etype = ntohs(evl->evl_proto); 4891 } 4892 *l3proto = etype; 4893 4894 if (etype == ETHERTYPE_IP) 4895 *l4proto = hn_check_iplen(m_new, hoff); 4896 else 4897 *l4proto = IPPROTO_DONE; 4898 } 4899 4900 static int 4901 hn_create_rx_data(struct hn_softc *sc, int ring_cnt) 4902 { 4903 struct sysctl_oid_list *child; 4904 struct sysctl_ctx_list *ctx; 4905 device_t dev = sc->hn_dev; 4906 #if defined(INET) || defined(INET6) 4907 #if __FreeBSD_version >= 1100095 4908 int lroent_cnt; 4909 #endif 4910 #endif 4911 int i; 4912 4913 /* 4914 * Create RXBUF for reception. 4915 * 4916 * NOTE: 4917 * - It is shared by all channels. 4918 * - A large enough buffer is allocated, certain version of NVSes 4919 * may further limit the usable space. 4920 */ 4921 sc->hn_rxbuf = hyperv_dmamem_alloc(bus_get_dma_tag(dev), 4922 PAGE_SIZE, 0, HN_RXBUF_SIZE, &sc->hn_rxbuf_dma, 4923 BUS_DMA_WAITOK | BUS_DMA_ZERO); 4924 if (sc->hn_rxbuf == NULL) { 4925 device_printf(sc->hn_dev, "allocate rxbuf failed\n"); 4926 return (ENOMEM); 4927 } 4928 4929 sc->hn_rx_ring_cnt = ring_cnt; 4930 sc->hn_rx_ring_inuse = sc->hn_rx_ring_cnt; 4931 4932 sc->hn_rx_ring = malloc(sizeof(struct hn_rx_ring) * sc->hn_rx_ring_cnt, 4933 M_DEVBUF, M_WAITOK | M_ZERO); 4934 4935 #if defined(INET) || defined(INET6) 4936 #if __FreeBSD_version >= 1100095 4937 lroent_cnt = hn_lro_entry_count; 4938 if (lroent_cnt < TCP_LRO_ENTRIES) 4939 lroent_cnt = TCP_LRO_ENTRIES; 4940 if (bootverbose) 4941 device_printf(dev, "LRO: entry count %d\n", lroent_cnt); 4942 #endif 4943 #endif /* INET || INET6 */ 4944 4945 ctx = device_get_sysctl_ctx(dev); 4946 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); 4947 4948 /* Create dev.hn.UNIT.rx sysctl tree */ 4949 sc->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rx", 4950 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 4951 4952 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4953 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 4954 4955 rxr->hn_br = hyperv_dmamem_alloc(bus_get_dma_tag(dev), 4956 PAGE_SIZE, 0, HN_TXBR_SIZE + HN_RXBR_SIZE, 4957 &rxr->hn_br_dma, BUS_DMA_WAITOK); 4958 if (rxr->hn_br == NULL) { 4959 device_printf(dev, "allocate bufring failed\n"); 4960 return (ENOMEM); 4961 } 4962 4963 if (hn_trust_hosttcp) 4964 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_TCP; 4965 if (hn_trust_hostudp) 4966 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_UDP; 4967 if (hn_trust_hostip) 4968 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_IP; 4969 rxr->hn_mbuf_hash = NDIS_HASH_ALL; 4970 rxr->hn_ifp = sc->hn_ifp; 4971 if (i < sc->hn_tx_ring_cnt) 4972 rxr->hn_txr = &sc->hn_tx_ring[i]; 4973 rxr->hn_pktbuf_len = HN_PKTBUF_LEN_DEF; 4974 rxr->hn_pktbuf = malloc(rxr->hn_pktbuf_len, M_DEVBUF, M_WAITOK); 4975 rxr->hn_rx_idx = i; 4976 rxr->hn_rxbuf = sc->hn_rxbuf; 4977 4978 /* 4979 * Initialize LRO. 4980 */ 4981 #if defined(INET) || defined(INET6) 4982 #if __FreeBSD_version >= 1100095 4983 tcp_lro_init_args(&rxr->hn_lro, sc->hn_ifp, lroent_cnt, 4984 hn_lro_mbufq_depth); 4985 #else 4986 tcp_lro_init(&rxr->hn_lro); 4987 rxr->hn_lro.ifp = sc->hn_ifp; 4988 #endif 4989 #if __FreeBSD_version >= 1100099 4990 rxr->hn_lro.lro_length_lim = HN_LRO_LENLIM_DEF; 4991 rxr->hn_lro.lro_ackcnt_lim = HN_LRO_ACKCNT_DEF; 4992 #endif 4993 #endif /* INET || INET6 */ 4994 4995 if (sc->hn_rx_sysctl_tree != NULL) { 4996 char name[16]; 4997 4998 /* 4999 * Create per RX ring sysctl tree: 5000 * dev.hn.UNIT.rx.RINGID 5001 */ 5002 snprintf(name, sizeof(name), "%d", i); 5003 rxr->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, 5004 SYSCTL_CHILDREN(sc->hn_rx_sysctl_tree), 5005 OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 5006 5007 if (rxr->hn_rx_sysctl_tree != NULL) { 5008 SYSCTL_ADD_ULONG(ctx, 5009 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), 5010 OID_AUTO, "packets", CTLFLAG_RW, 5011 &rxr->hn_pkts, "# of packets received"); 5012 SYSCTL_ADD_ULONG(ctx, 5013 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), 5014 OID_AUTO, "rss_pkts", CTLFLAG_RW, 5015 &rxr->hn_rss_pkts, 5016 "# of packets w/ RSS info received"); 5017 SYSCTL_ADD_INT(ctx, 5018 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), 5019 OID_AUTO, "pktbuf_len", CTLFLAG_RD, 5020 &rxr->hn_pktbuf_len, 0, 5021 "Temporary channel packet buffer length"); 5022 } 5023 } 5024 } 5025 5026 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_queued", 5027 CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5028 __offsetof(struct hn_rx_ring, hn_lro.lro_queued), 5029 #if __FreeBSD_version < 1100095 5030 hn_rx_stat_int_sysctl, 5031 #else 5032 hn_rx_stat_u64_sysctl, 5033 #endif 5034 "LU", "LRO queued"); 5035 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_flushed", 5036 CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5037 __offsetof(struct hn_rx_ring, hn_lro.lro_flushed), 5038 #if __FreeBSD_version < 1100095 5039 hn_rx_stat_int_sysctl, 5040 #else 5041 hn_rx_stat_u64_sysctl, 5042 #endif 5043 "LU", "LRO flushed"); 5044 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_tried", 5045 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5046 __offsetof(struct hn_rx_ring, hn_lro_tried), 5047 hn_rx_stat_ulong_sysctl, "LU", "# of LRO tries"); 5048 #if __FreeBSD_version >= 1100099 5049 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_length_lim", 5050 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 5051 hn_lro_lenlim_sysctl, "IU", 5052 "Max # of data bytes to be aggregated by LRO"); 5053 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_ackcnt_lim", 5054 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 5055 hn_lro_ackcnt_sysctl, "I", 5056 "Max # of ACKs to be aggregated by LRO"); 5057 #endif 5058 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hosttcp", 5059 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_TCP, 5060 hn_trust_hcsum_sysctl, "I", 5061 "Trust tcp segement verification on host side, " 5062 "when csum info is missing"); 5063 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostudp", 5064 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_UDP, 5065 hn_trust_hcsum_sysctl, "I", 5066 "Trust udp datagram verification on host side, " 5067 "when csum info is missing"); 5068 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostip", 5069 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_IP, 5070 hn_trust_hcsum_sysctl, "I", 5071 "Trust ip packet verification on host side, " 5072 "when csum info is missing"); 5073 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_ip", 5074 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5075 __offsetof(struct hn_rx_ring, hn_csum_ip), 5076 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM IP"); 5077 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_tcp", 5078 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5079 __offsetof(struct hn_rx_ring, hn_csum_tcp), 5080 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM TCP"); 5081 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_udp", 5082 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5083 __offsetof(struct hn_rx_ring, hn_csum_udp), 5084 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM UDP"); 5085 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_trusted", 5086 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5087 __offsetof(struct hn_rx_ring, hn_csum_trusted), 5088 hn_rx_stat_ulong_sysctl, "LU", 5089 "# of packets that we trust host's csum verification"); 5090 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "small_pkts", 5091 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5092 __offsetof(struct hn_rx_ring, hn_small_pkts), 5093 hn_rx_stat_ulong_sysctl, "LU", "# of small packets received"); 5094 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_ack_failed", 5095 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5096 __offsetof(struct hn_rx_ring, hn_ack_failed), 5097 hn_rx_stat_ulong_sysctl, "LU", "# of RXBUF ack failures"); 5098 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_cnt", 5099 CTLFLAG_RD, &sc->hn_rx_ring_cnt, 0, "# created RX rings"); 5100 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_inuse", 5101 CTLFLAG_RD, &sc->hn_rx_ring_inuse, 0, "# used RX rings"); 5102 5103 return (0); 5104 } 5105 5106 static void 5107 hn_destroy_rx_data(struct hn_softc *sc) 5108 { 5109 int i; 5110 5111 if (sc->hn_rxbuf != NULL) { 5112 if ((sc->hn_flags & HN_FLAG_RXBUF_REF) == 0) 5113 hyperv_dmamem_free(&sc->hn_rxbuf_dma, sc->hn_rxbuf); 5114 else 5115 device_printf(sc->hn_dev, "RXBUF is referenced\n"); 5116 sc->hn_rxbuf = NULL; 5117 } 5118 5119 if (sc->hn_rx_ring_cnt == 0) 5120 return; 5121 5122 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 5123 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 5124 5125 if (rxr->hn_br == NULL) 5126 continue; 5127 if ((rxr->hn_rx_flags & HN_RX_FLAG_BR_REF) == 0) { 5128 hyperv_dmamem_free(&rxr->hn_br_dma, rxr->hn_br); 5129 } else { 5130 device_printf(sc->hn_dev, 5131 "%dth channel bufring is referenced", i); 5132 } 5133 rxr->hn_br = NULL; 5134 5135 #if defined(INET) || defined(INET6) 5136 tcp_lro_free(&rxr->hn_lro); 5137 #endif 5138 free(rxr->hn_pktbuf, M_DEVBUF); 5139 } 5140 free(sc->hn_rx_ring, M_DEVBUF); 5141 sc->hn_rx_ring = NULL; 5142 5143 sc->hn_rx_ring_cnt = 0; 5144 sc->hn_rx_ring_inuse = 0; 5145 } 5146 5147 static int 5148 hn_tx_ring_create(struct hn_softc *sc, int id) 5149 { 5150 struct hn_tx_ring *txr = &sc->hn_tx_ring[id]; 5151 device_t dev = sc->hn_dev; 5152 bus_dma_tag_t parent_dtag; 5153 int error, i; 5154 5155 txr->hn_sc = sc; 5156 txr->hn_tx_idx = id; 5157 5158 #ifndef HN_USE_TXDESC_BUFRING 5159 mtx_init(&txr->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN); 5160 #endif 5161 mtx_init(&txr->hn_tx_lock, "hn tx", NULL, MTX_DEF); 5162 5163 txr->hn_txdesc_cnt = HN_TX_DESC_CNT; 5164 txr->hn_txdesc = malloc(sizeof(struct hn_txdesc) * txr->hn_txdesc_cnt, 5165 M_DEVBUF, M_WAITOK | M_ZERO); 5166 #ifndef HN_USE_TXDESC_BUFRING 5167 SLIST_INIT(&txr->hn_txlist); 5168 #else 5169 txr->hn_txdesc_br = buf_ring_alloc(txr->hn_txdesc_cnt, M_DEVBUF, 5170 M_WAITOK, &txr->hn_tx_lock); 5171 #endif 5172 5173 if (hn_tx_taskq_mode == HN_TX_TASKQ_M_EVTTQ) { 5174 txr->hn_tx_taskq = VMBUS_GET_EVENT_TASKQ( 5175 device_get_parent(dev), dev, HN_RING_IDX2CPU(sc, id)); 5176 } else { 5177 txr->hn_tx_taskq = sc->hn_tx_taskqs[id % hn_tx_taskq_cnt]; 5178 } 5179 5180 #ifdef HN_IFSTART_SUPPORT 5181 if (hn_use_if_start) { 5182 txr->hn_txeof = hn_start_txeof; 5183 TASK_INIT(&txr->hn_tx_task, 0, hn_start_taskfunc, txr); 5184 TASK_INIT(&txr->hn_txeof_task, 0, hn_start_txeof_taskfunc, txr); 5185 } else 5186 #endif 5187 { 5188 int br_depth; 5189 5190 txr->hn_txeof = hn_xmit_txeof; 5191 TASK_INIT(&txr->hn_tx_task, 0, hn_xmit_taskfunc, txr); 5192 TASK_INIT(&txr->hn_txeof_task, 0, hn_xmit_txeof_taskfunc, txr); 5193 5194 br_depth = hn_get_txswq_depth(txr); 5195 txr->hn_mbuf_br = buf_ring_alloc(br_depth, M_DEVBUF, 5196 M_WAITOK, &txr->hn_tx_lock); 5197 } 5198 5199 txr->hn_direct_tx_size = hn_direct_tx_size; 5200 5201 /* 5202 * Always schedule transmission instead of trying to do direct 5203 * transmission. This one gives the best performance so far. 5204 */ 5205 txr->hn_sched_tx = 1; 5206 5207 parent_dtag = bus_get_dma_tag(dev); 5208 5209 /* DMA tag for RNDIS packet messages. */ 5210 error = bus_dma_tag_create(parent_dtag, /* parent */ 5211 HN_RNDIS_PKT_ALIGN, /* alignment */ 5212 HN_RNDIS_PKT_BOUNDARY, /* boundary */ 5213 BUS_SPACE_MAXADDR, /* lowaddr */ 5214 BUS_SPACE_MAXADDR, /* highaddr */ 5215 NULL, NULL, /* filter, filterarg */ 5216 HN_RNDIS_PKT_LEN, /* maxsize */ 5217 1, /* nsegments */ 5218 HN_RNDIS_PKT_LEN, /* maxsegsize */ 5219 0, /* flags */ 5220 NULL, /* lockfunc */ 5221 NULL, /* lockfuncarg */ 5222 &txr->hn_tx_rndis_dtag); 5223 if (error) { 5224 device_printf(dev, "failed to create rndis dmatag\n"); 5225 return error; 5226 } 5227 5228 /* DMA tag for data. */ 5229 error = bus_dma_tag_create(parent_dtag, /* parent */ 5230 1, /* alignment */ 5231 HN_TX_DATA_BOUNDARY, /* boundary */ 5232 BUS_SPACE_MAXADDR, /* lowaddr */ 5233 BUS_SPACE_MAXADDR, /* highaddr */ 5234 NULL, NULL, /* filter, filterarg */ 5235 HN_TX_DATA_MAXSIZE, /* maxsize */ 5236 HN_TX_DATA_SEGCNT_MAX, /* nsegments */ 5237 HN_TX_DATA_SEGSIZE, /* maxsegsize */ 5238 0, /* flags */ 5239 NULL, /* lockfunc */ 5240 NULL, /* lockfuncarg */ 5241 &txr->hn_tx_data_dtag); 5242 if (error) { 5243 device_printf(dev, "failed to create data dmatag\n"); 5244 return error; 5245 } 5246 5247 for (i = 0; i < txr->hn_txdesc_cnt; ++i) { 5248 struct hn_txdesc *txd = &txr->hn_txdesc[i]; 5249 5250 txd->txr = txr; 5251 txd->chim_index = HN_NVS_CHIM_IDX_INVALID; 5252 STAILQ_INIT(&txd->agg_list); 5253 5254 /* 5255 * Allocate and load RNDIS packet message. 5256 */ 5257 error = bus_dmamem_alloc(txr->hn_tx_rndis_dtag, 5258 (void **)&txd->rndis_pkt, 5259 BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, 5260 &txd->rndis_pkt_dmap); 5261 if (error) { 5262 device_printf(dev, 5263 "failed to allocate rndis_packet_msg, %d\n", i); 5264 return error; 5265 } 5266 5267 error = bus_dmamap_load(txr->hn_tx_rndis_dtag, 5268 txd->rndis_pkt_dmap, 5269 txd->rndis_pkt, HN_RNDIS_PKT_LEN, 5270 hyperv_dma_map_paddr, &txd->rndis_pkt_paddr, 5271 BUS_DMA_NOWAIT); 5272 if (error) { 5273 device_printf(dev, 5274 "failed to load rndis_packet_msg, %d\n", i); 5275 bus_dmamem_free(txr->hn_tx_rndis_dtag, 5276 txd->rndis_pkt, txd->rndis_pkt_dmap); 5277 return error; 5278 } 5279 5280 /* DMA map for TX data. */ 5281 error = bus_dmamap_create(txr->hn_tx_data_dtag, 0, 5282 &txd->data_dmap); 5283 if (error) { 5284 device_printf(dev, 5285 "failed to allocate tx data dmamap\n"); 5286 bus_dmamap_unload(txr->hn_tx_rndis_dtag, 5287 txd->rndis_pkt_dmap); 5288 bus_dmamem_free(txr->hn_tx_rndis_dtag, 5289 txd->rndis_pkt, txd->rndis_pkt_dmap); 5290 return error; 5291 } 5292 5293 /* All set, put it to list */ 5294 txd->flags |= HN_TXD_FLAG_ONLIST; 5295 #ifndef HN_USE_TXDESC_BUFRING 5296 SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link); 5297 #else 5298 buf_ring_enqueue(txr->hn_txdesc_br, txd); 5299 #endif 5300 } 5301 txr->hn_txdesc_avail = txr->hn_txdesc_cnt; 5302 5303 if (sc->hn_tx_sysctl_tree != NULL) { 5304 struct sysctl_oid_list *child; 5305 struct sysctl_ctx_list *ctx; 5306 char name[16]; 5307 5308 /* 5309 * Create per TX ring sysctl tree: 5310 * dev.hn.UNIT.tx.RINGID 5311 */ 5312 ctx = device_get_sysctl_ctx(dev); 5313 child = SYSCTL_CHILDREN(sc->hn_tx_sysctl_tree); 5314 5315 snprintf(name, sizeof(name), "%d", id); 5316 txr->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, 5317 name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 5318 5319 if (txr->hn_tx_sysctl_tree != NULL) { 5320 child = SYSCTL_CHILDREN(txr->hn_tx_sysctl_tree); 5321 5322 #ifdef HN_DEBUG 5323 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail", 5324 CTLFLAG_RD, &txr->hn_txdesc_avail, 0, 5325 "# of available TX descs"); 5326 #endif 5327 #ifdef HN_IFSTART_SUPPORT 5328 if (!hn_use_if_start) 5329 #endif 5330 { 5331 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "oactive", 5332 CTLFLAG_RD, &txr->hn_oactive, 0, 5333 "over active"); 5334 } 5335 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "packets", 5336 CTLFLAG_RW, &txr->hn_pkts, 5337 "# of packets transmitted"); 5338 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "sends", 5339 CTLFLAG_RW, &txr->hn_sends, "# of sends"); 5340 } 5341 } 5342 5343 return 0; 5344 } 5345 5346 static void 5347 hn_txdesc_dmamap_destroy(struct hn_txdesc *txd) 5348 { 5349 struct hn_tx_ring *txr = txd->txr; 5350 5351 KASSERT(txd->m == NULL, ("still has mbuf installed")); 5352 KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("still dma mapped")); 5353 5354 bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_pkt_dmap); 5355 bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_pkt, 5356 txd->rndis_pkt_dmap); 5357 bus_dmamap_destroy(txr->hn_tx_data_dtag, txd->data_dmap); 5358 } 5359 5360 static void 5361 hn_txdesc_gc(struct hn_tx_ring *txr, struct hn_txdesc *txd) 5362 { 5363 5364 KASSERT(txd->refs == 0 || txd->refs == 1, 5365 ("invalid txd refs %d", txd->refs)); 5366 5367 /* Aggregated txds will be freed by their aggregating txd. */ 5368 if (txd->refs > 0 && (txd->flags & HN_TXD_FLAG_ONAGG) == 0) { 5369 int freed; 5370 5371 freed = hn_txdesc_put(txr, txd); 5372 KASSERT(freed, ("can't free txdesc")); 5373 } 5374 } 5375 5376 static void 5377 hn_tx_ring_destroy(struct hn_tx_ring *txr) 5378 { 5379 int i; 5380 5381 if (txr->hn_txdesc == NULL) 5382 return; 5383 5384 /* 5385 * NOTE: 5386 * Because the freeing of aggregated txds will be deferred 5387 * to the aggregating txd, two passes are used here: 5388 * - The first pass GCes any pending txds. This GC is necessary, 5389 * since if the channels are revoked, hypervisor will not 5390 * deliver send-done for all pending txds. 5391 * - The second pass frees the busdma stuffs, i.e. after all txds 5392 * were freed. 5393 */ 5394 for (i = 0; i < txr->hn_txdesc_cnt; ++i) 5395 hn_txdesc_gc(txr, &txr->hn_txdesc[i]); 5396 for (i = 0; i < txr->hn_txdesc_cnt; ++i) 5397 hn_txdesc_dmamap_destroy(&txr->hn_txdesc[i]); 5398 5399 if (txr->hn_tx_data_dtag != NULL) 5400 bus_dma_tag_destroy(txr->hn_tx_data_dtag); 5401 if (txr->hn_tx_rndis_dtag != NULL) 5402 bus_dma_tag_destroy(txr->hn_tx_rndis_dtag); 5403 5404 #ifdef HN_USE_TXDESC_BUFRING 5405 buf_ring_free(txr->hn_txdesc_br, M_DEVBUF); 5406 #endif 5407 5408 free(txr->hn_txdesc, M_DEVBUF); 5409 txr->hn_txdesc = NULL; 5410 5411 if (txr->hn_mbuf_br != NULL) 5412 buf_ring_free(txr->hn_mbuf_br, M_DEVBUF); 5413 5414 #ifndef HN_USE_TXDESC_BUFRING 5415 mtx_destroy(&txr->hn_txlist_spin); 5416 #endif 5417 mtx_destroy(&txr->hn_tx_lock); 5418 } 5419 5420 static int 5421 hn_create_tx_data(struct hn_softc *sc, int ring_cnt) 5422 { 5423 struct sysctl_oid_list *child; 5424 struct sysctl_ctx_list *ctx; 5425 int i; 5426 5427 /* 5428 * Create TXBUF for chimney sending. 5429 * 5430 * NOTE: It is shared by all channels. 5431 */ 5432 sc->hn_chim = hyperv_dmamem_alloc(bus_get_dma_tag(sc->hn_dev), 5433 PAGE_SIZE, 0, HN_CHIM_SIZE, &sc->hn_chim_dma, 5434 BUS_DMA_WAITOK | BUS_DMA_ZERO); 5435 if (sc->hn_chim == NULL) { 5436 device_printf(sc->hn_dev, "allocate txbuf failed\n"); 5437 return (ENOMEM); 5438 } 5439 5440 sc->hn_tx_ring_cnt = ring_cnt; 5441 sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt; 5442 5443 sc->hn_tx_ring = malloc(sizeof(struct hn_tx_ring) * sc->hn_tx_ring_cnt, 5444 M_DEVBUF, M_WAITOK | M_ZERO); 5445 5446 ctx = device_get_sysctl_ctx(sc->hn_dev); 5447 child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->hn_dev)); 5448 5449 /* Create dev.hn.UNIT.tx sysctl tree */ 5450 sc->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "tx", 5451 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 5452 5453 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 5454 int error; 5455 5456 error = hn_tx_ring_create(sc, i); 5457 if (error) 5458 return error; 5459 } 5460 5461 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "no_txdescs", 5462 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5463 __offsetof(struct hn_tx_ring, hn_no_txdescs), 5464 hn_tx_stat_ulong_sysctl, "LU", "# of times short of TX descs"); 5465 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "send_failed", 5466 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5467 __offsetof(struct hn_tx_ring, hn_send_failed), 5468 hn_tx_stat_ulong_sysctl, "LU", "# of hyper-v sending failure"); 5469 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txdma_failed", 5470 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5471 __offsetof(struct hn_tx_ring, hn_txdma_failed), 5472 hn_tx_stat_ulong_sysctl, "LU", "# of TX DMA failure"); 5473 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_flush_failed", 5474 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5475 __offsetof(struct hn_tx_ring, hn_flush_failed), 5476 hn_tx_stat_ulong_sysctl, "LU", 5477 "# of packet transmission aggregation flush failure"); 5478 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_collapsed", 5479 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5480 __offsetof(struct hn_tx_ring, hn_tx_collapsed), 5481 hn_tx_stat_ulong_sysctl, "LU", "# of TX mbuf collapsed"); 5482 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney", 5483 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5484 __offsetof(struct hn_tx_ring, hn_tx_chimney), 5485 hn_tx_stat_ulong_sysctl, "LU", "# of chimney send"); 5486 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_tried", 5487 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5488 __offsetof(struct hn_tx_ring, hn_tx_chimney_tried), 5489 hn_tx_stat_ulong_sysctl, "LU", "# of chimney send tries"); 5490 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt", 5491 CTLFLAG_RD, &sc->hn_tx_ring[0].hn_txdesc_cnt, 0, 5492 "# of total TX descs"); 5493 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max", 5494 CTLFLAG_RD, &sc->hn_chim_szmax, 0, 5495 "Chimney send packet size upper boundary"); 5496 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size", 5497 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 5498 hn_chim_size_sysctl, "I", "Chimney send packet size limit"); 5499 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "direct_tx_size", 5500 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5501 __offsetof(struct hn_tx_ring, hn_direct_tx_size), 5502 hn_tx_conf_int_sysctl, "I", 5503 "Size of the packet for direct transmission"); 5504 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "sched_tx", 5505 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5506 __offsetof(struct hn_tx_ring, hn_sched_tx), 5507 hn_tx_conf_int_sysctl, "I", 5508 "Always schedule transmission " 5509 "instead of doing direct transmission"); 5510 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_cnt", 5511 CTLFLAG_RD, &sc->hn_tx_ring_cnt, 0, "# created TX rings"); 5512 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_inuse", 5513 CTLFLAG_RD, &sc->hn_tx_ring_inuse, 0, "# used TX rings"); 5514 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "agg_szmax", 5515 CTLFLAG_RD, &sc->hn_tx_ring[0].hn_agg_szmax, 0, 5516 "Applied packet transmission aggregation size"); 5517 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pktmax", 5518 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 5519 hn_txagg_pktmax_sysctl, "I", 5520 "Applied packet transmission aggregation packets"); 5521 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_align", 5522 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 5523 hn_txagg_align_sysctl, "I", 5524 "Applied packet transmission aggregation alignment"); 5525 5526 return 0; 5527 } 5528 5529 static void 5530 hn_set_chim_size(struct hn_softc *sc, int chim_size) 5531 { 5532 int i; 5533 5534 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 5535 sc->hn_tx_ring[i].hn_chim_size = chim_size; 5536 } 5537 5538 static void 5539 hn_set_tso_maxsize(struct hn_softc *sc, int tso_maxlen, int mtu) 5540 { 5541 struct ifnet *ifp = sc->hn_ifp; 5542 u_int hw_tsomax; 5543 int tso_minlen; 5544 5545 HN_LOCK_ASSERT(sc); 5546 5547 if ((ifp->if_capabilities & (IFCAP_TSO4 | IFCAP_TSO6)) == 0) 5548 return; 5549 5550 KASSERT(sc->hn_ndis_tso_sgmin >= 2, 5551 ("invalid NDIS tso sgmin %d", sc->hn_ndis_tso_sgmin)); 5552 tso_minlen = sc->hn_ndis_tso_sgmin * mtu; 5553 5554 KASSERT(sc->hn_ndis_tso_szmax >= tso_minlen && 5555 sc->hn_ndis_tso_szmax <= IP_MAXPACKET, 5556 ("invalid NDIS tso szmax %d", sc->hn_ndis_tso_szmax)); 5557 5558 if (tso_maxlen < tso_minlen) 5559 tso_maxlen = tso_minlen; 5560 else if (tso_maxlen > IP_MAXPACKET) 5561 tso_maxlen = IP_MAXPACKET; 5562 if (tso_maxlen > sc->hn_ndis_tso_szmax) 5563 tso_maxlen = sc->hn_ndis_tso_szmax; 5564 hw_tsomax = tso_maxlen - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); 5565 5566 if (hn_xpnt_vf_isready(sc)) { 5567 if (hw_tsomax > sc->hn_vf_ifp->if_hw_tsomax) 5568 hw_tsomax = sc->hn_vf_ifp->if_hw_tsomax; 5569 } 5570 ifp->if_hw_tsomax = hw_tsomax; 5571 if (bootverbose) 5572 if_printf(ifp, "TSO size max %u\n", ifp->if_hw_tsomax); 5573 } 5574 5575 static void 5576 hn_fixup_tx_data(struct hn_softc *sc) 5577 { 5578 uint64_t csum_assist; 5579 int i; 5580 5581 hn_set_chim_size(sc, sc->hn_chim_szmax); 5582 if (hn_tx_chimney_size > 0 && 5583 hn_tx_chimney_size < sc->hn_chim_szmax) 5584 hn_set_chim_size(sc, hn_tx_chimney_size); 5585 5586 csum_assist = 0; 5587 if (sc->hn_caps & HN_CAP_IPCS) 5588 csum_assist |= CSUM_IP; 5589 if (sc->hn_caps & HN_CAP_TCP4CS) 5590 csum_assist |= CSUM_IP_TCP; 5591 if ((sc->hn_caps & HN_CAP_UDP4CS) && hn_enable_udp4cs) 5592 csum_assist |= CSUM_IP_UDP; 5593 if (sc->hn_caps & HN_CAP_TCP6CS) 5594 csum_assist |= CSUM_IP6_TCP; 5595 if ((sc->hn_caps & HN_CAP_UDP6CS) && hn_enable_udp6cs) 5596 csum_assist |= CSUM_IP6_UDP; 5597 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 5598 sc->hn_tx_ring[i].hn_csum_assist = csum_assist; 5599 5600 if (sc->hn_caps & HN_CAP_HASHVAL) { 5601 /* 5602 * Support HASHVAL pktinfo on TX path. 5603 */ 5604 if (bootverbose) 5605 if_printf(sc->hn_ifp, "support HASHVAL pktinfo\n"); 5606 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 5607 sc->hn_tx_ring[i].hn_tx_flags |= HN_TX_FLAG_HASHVAL; 5608 } 5609 } 5610 5611 static void 5612 hn_fixup_rx_data(struct hn_softc *sc) 5613 { 5614 5615 if (sc->hn_caps & HN_CAP_UDPHASH) { 5616 int i; 5617 5618 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 5619 sc->hn_rx_ring[i].hn_rx_flags |= HN_RX_FLAG_UDP_HASH; 5620 } 5621 } 5622 5623 static void 5624 hn_destroy_tx_data(struct hn_softc *sc) 5625 { 5626 int i; 5627 5628 if (sc->hn_chim != NULL) { 5629 if ((sc->hn_flags & HN_FLAG_CHIM_REF) == 0) { 5630 hyperv_dmamem_free(&sc->hn_chim_dma, sc->hn_chim); 5631 } else { 5632 device_printf(sc->hn_dev, 5633 "chimney sending buffer is referenced"); 5634 } 5635 sc->hn_chim = NULL; 5636 } 5637 5638 if (sc->hn_tx_ring_cnt == 0) 5639 return; 5640 5641 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 5642 hn_tx_ring_destroy(&sc->hn_tx_ring[i]); 5643 5644 free(sc->hn_tx_ring, M_DEVBUF); 5645 sc->hn_tx_ring = NULL; 5646 5647 sc->hn_tx_ring_cnt = 0; 5648 sc->hn_tx_ring_inuse = 0; 5649 } 5650 5651 #ifdef HN_IFSTART_SUPPORT 5652 5653 static void 5654 hn_start_taskfunc(void *xtxr, int pending __unused) 5655 { 5656 struct hn_tx_ring *txr = xtxr; 5657 5658 mtx_lock(&txr->hn_tx_lock); 5659 hn_start_locked(txr, 0); 5660 mtx_unlock(&txr->hn_tx_lock); 5661 } 5662 5663 static int 5664 hn_start_locked(struct hn_tx_ring *txr, int len) 5665 { 5666 struct hn_softc *sc = txr->hn_sc; 5667 struct ifnet *ifp = sc->hn_ifp; 5668 int sched = 0; 5669 5670 KASSERT(hn_use_if_start, 5671 ("hn_start_locked is called, when if_start is disabled")); 5672 KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring")); 5673 mtx_assert(&txr->hn_tx_lock, MA_OWNED); 5674 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc")); 5675 5676 if (__predict_false(txr->hn_suspended)) 5677 return (0); 5678 5679 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != 5680 IFF_DRV_RUNNING) 5681 return (0); 5682 5683 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { 5684 struct hn_txdesc *txd; 5685 struct mbuf *m_head; 5686 int error; 5687 5688 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); 5689 if (m_head == NULL) 5690 break; 5691 5692 if (len > 0 && m_head->m_pkthdr.len > len) { 5693 /* 5694 * This sending could be time consuming; let callers 5695 * dispatch this packet sending (and sending of any 5696 * following up packets) to tx taskqueue. 5697 */ 5698 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 5699 sched = 1; 5700 break; 5701 } 5702 5703 #if defined(INET6) || defined(INET) 5704 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 5705 m_head = hn_tso_fixup(m_head); 5706 if (__predict_false(m_head == NULL)) { 5707 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 5708 continue; 5709 } 5710 } else if (m_head->m_pkthdr.csum_flags & 5711 (CSUM_IP_UDP | CSUM_IP_TCP | CSUM_IP6_UDP | CSUM_IP6_TCP)) { 5712 m_head = hn_set_hlen(m_head); 5713 if (__predict_false(m_head == NULL)) { 5714 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 5715 continue; 5716 } 5717 } 5718 #endif 5719 5720 txd = hn_txdesc_get(txr); 5721 if (txd == NULL) { 5722 txr->hn_no_txdescs++; 5723 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 5724 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 5725 break; 5726 } 5727 5728 error = hn_encap(ifp, txr, txd, &m_head); 5729 if (error) { 5730 /* Both txd and m_head are freed */ 5731 KASSERT(txr->hn_agg_txd == NULL, 5732 ("encap failed w/ pending aggregating txdesc")); 5733 continue; 5734 } 5735 5736 if (txr->hn_agg_pktleft == 0) { 5737 if (txr->hn_agg_txd != NULL) { 5738 KASSERT(m_head == NULL, 5739 ("pending mbuf for aggregating txdesc")); 5740 error = hn_flush_txagg(ifp, txr); 5741 if (__predict_false(error)) { 5742 atomic_set_int(&ifp->if_drv_flags, 5743 IFF_DRV_OACTIVE); 5744 break; 5745 } 5746 } else { 5747 KASSERT(m_head != NULL, ("mbuf was freed")); 5748 error = hn_txpkt(ifp, txr, txd); 5749 if (__predict_false(error)) { 5750 /* txd is freed, but m_head is not */ 5751 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 5752 atomic_set_int(&ifp->if_drv_flags, 5753 IFF_DRV_OACTIVE); 5754 break; 5755 } 5756 } 5757 } 5758 #ifdef INVARIANTS 5759 else { 5760 KASSERT(txr->hn_agg_txd != NULL, 5761 ("no aggregating txdesc")); 5762 KASSERT(m_head == NULL, 5763 ("pending mbuf for aggregating txdesc")); 5764 } 5765 #endif 5766 } 5767 5768 /* Flush pending aggerated transmission. */ 5769 if (txr->hn_agg_txd != NULL) 5770 hn_flush_txagg(ifp, txr); 5771 return (sched); 5772 } 5773 5774 static void 5775 hn_start(struct ifnet *ifp) 5776 { 5777 struct hn_softc *sc = ifp->if_softc; 5778 struct hn_tx_ring *txr = &sc->hn_tx_ring[0]; 5779 5780 if (txr->hn_sched_tx) 5781 goto do_sched; 5782 5783 if (mtx_trylock(&txr->hn_tx_lock)) { 5784 int sched; 5785 5786 sched = hn_start_locked(txr, txr->hn_direct_tx_size); 5787 mtx_unlock(&txr->hn_tx_lock); 5788 if (!sched) 5789 return; 5790 } 5791 do_sched: 5792 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); 5793 } 5794 5795 static void 5796 hn_start_txeof_taskfunc(void *xtxr, int pending __unused) 5797 { 5798 struct hn_tx_ring *txr = xtxr; 5799 5800 mtx_lock(&txr->hn_tx_lock); 5801 atomic_clear_int(&txr->hn_sc->hn_ifp->if_drv_flags, IFF_DRV_OACTIVE); 5802 hn_start_locked(txr, 0); 5803 mtx_unlock(&txr->hn_tx_lock); 5804 } 5805 5806 static void 5807 hn_start_txeof(struct hn_tx_ring *txr) 5808 { 5809 struct hn_softc *sc = txr->hn_sc; 5810 struct ifnet *ifp = sc->hn_ifp; 5811 5812 KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring")); 5813 5814 if (txr->hn_sched_tx) 5815 goto do_sched; 5816 5817 if (mtx_trylock(&txr->hn_tx_lock)) { 5818 int sched; 5819 5820 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 5821 sched = hn_start_locked(txr, txr->hn_direct_tx_size); 5822 mtx_unlock(&txr->hn_tx_lock); 5823 if (sched) { 5824 taskqueue_enqueue(txr->hn_tx_taskq, 5825 &txr->hn_tx_task); 5826 } 5827 } else { 5828 do_sched: 5829 /* 5830 * Release the OACTIVE earlier, with the hope, that 5831 * others could catch up. The task will clear the 5832 * flag again with the hn_tx_lock to avoid possible 5833 * races. 5834 */ 5835 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 5836 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); 5837 } 5838 } 5839 5840 #endif /* HN_IFSTART_SUPPORT */ 5841 5842 static int 5843 hn_xmit(struct hn_tx_ring *txr, int len) 5844 { 5845 struct hn_softc *sc = txr->hn_sc; 5846 struct ifnet *ifp = sc->hn_ifp; 5847 struct mbuf *m_head; 5848 int sched = 0; 5849 5850 mtx_assert(&txr->hn_tx_lock, MA_OWNED); 5851 #ifdef HN_IFSTART_SUPPORT 5852 KASSERT(hn_use_if_start == 0, 5853 ("hn_xmit is called, when if_start is enabled")); 5854 #endif 5855 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc")); 5856 5857 if (__predict_false(txr->hn_suspended)) 5858 return (0); 5859 5860 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || txr->hn_oactive) 5861 return (0); 5862 5863 while ((m_head = drbr_peek(ifp, txr->hn_mbuf_br)) != NULL) { 5864 struct hn_txdesc *txd; 5865 int error; 5866 5867 if (len > 0 && m_head->m_pkthdr.len > len) { 5868 /* 5869 * This sending could be time consuming; let callers 5870 * dispatch this packet sending (and sending of any 5871 * following up packets) to tx taskqueue. 5872 */ 5873 drbr_putback(ifp, txr->hn_mbuf_br, m_head); 5874 sched = 1; 5875 break; 5876 } 5877 5878 txd = hn_txdesc_get(txr); 5879 if (txd == NULL) { 5880 txr->hn_no_txdescs++; 5881 drbr_putback(ifp, txr->hn_mbuf_br, m_head); 5882 txr->hn_oactive = 1; 5883 break; 5884 } 5885 5886 error = hn_encap(ifp, txr, txd, &m_head); 5887 if (error) { 5888 /* Both txd and m_head are freed; discard */ 5889 KASSERT(txr->hn_agg_txd == NULL, 5890 ("encap failed w/ pending aggregating txdesc")); 5891 drbr_advance(ifp, txr->hn_mbuf_br); 5892 continue; 5893 } 5894 5895 if (txr->hn_agg_pktleft == 0) { 5896 if (txr->hn_agg_txd != NULL) { 5897 KASSERT(m_head == NULL, 5898 ("pending mbuf for aggregating txdesc")); 5899 error = hn_flush_txagg(ifp, txr); 5900 if (__predict_false(error)) { 5901 txr->hn_oactive = 1; 5902 break; 5903 } 5904 } else { 5905 KASSERT(m_head != NULL, ("mbuf was freed")); 5906 error = hn_txpkt(ifp, txr, txd); 5907 if (__predict_false(error)) { 5908 /* txd is freed, but m_head is not */ 5909 drbr_putback(ifp, txr->hn_mbuf_br, 5910 m_head); 5911 txr->hn_oactive = 1; 5912 break; 5913 } 5914 } 5915 } 5916 #ifdef INVARIANTS 5917 else { 5918 KASSERT(txr->hn_agg_txd != NULL, 5919 ("no aggregating txdesc")); 5920 KASSERT(m_head == NULL, 5921 ("pending mbuf for aggregating txdesc")); 5922 } 5923 #endif 5924 5925 /* Sent */ 5926 drbr_advance(ifp, txr->hn_mbuf_br); 5927 } 5928 5929 /* Flush pending aggerated transmission. */ 5930 if (txr->hn_agg_txd != NULL) 5931 hn_flush_txagg(ifp, txr); 5932 return (sched); 5933 } 5934 5935 static int 5936 hn_transmit(struct ifnet *ifp, struct mbuf *m) 5937 { 5938 struct hn_softc *sc = ifp->if_softc; 5939 struct hn_tx_ring *txr; 5940 int error, idx = 0; 5941 5942 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) { 5943 struct rm_priotracker pt; 5944 5945 rm_rlock(&sc->hn_vf_lock, &pt); 5946 if (__predict_true(sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) { 5947 struct mbuf *m_bpf = NULL; 5948 int obytes, omcast; 5949 5950 obytes = m->m_pkthdr.len; 5951 omcast = (m->m_flags & M_MCAST) != 0; 5952 5953 if (sc->hn_xvf_flags & HN_XVFFLAG_ACCBPF) { 5954 if (bpf_peers_present(ifp->if_bpf)) { 5955 m_bpf = m_copypacket(m, M_NOWAIT); 5956 if (m_bpf == NULL) { 5957 /* 5958 * Failed to grab a shallow 5959 * copy; tap now. 5960 */ 5961 ETHER_BPF_MTAP(ifp, m); 5962 } 5963 } 5964 } else { 5965 ETHER_BPF_MTAP(ifp, m); 5966 } 5967 5968 error = sc->hn_vf_ifp->if_transmit(sc->hn_vf_ifp, m); 5969 rm_runlock(&sc->hn_vf_lock, &pt); 5970 5971 if (m_bpf != NULL) { 5972 if (!error) 5973 ETHER_BPF_MTAP(ifp, m_bpf); 5974 m_freem(m_bpf); 5975 } 5976 5977 if (error == ENOBUFS) { 5978 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); 5979 } else if (error) { 5980 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 5981 } else { 5982 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 5983 if_inc_counter(ifp, IFCOUNTER_OBYTES, obytes); 5984 if (omcast) { 5985 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 5986 omcast); 5987 } 5988 } 5989 return (error); 5990 } 5991 rm_runlock(&sc->hn_vf_lock, &pt); 5992 } 5993 5994 #if defined(INET6) || defined(INET) 5995 /* 5996 * Perform TSO packet header fixup or get l2/l3 header length now, 5997 * since packet headers should be cache-hot. 5998 */ 5999 if (m->m_pkthdr.csum_flags & CSUM_TSO) { 6000 m = hn_tso_fixup(m); 6001 if (__predict_false(m == NULL)) { 6002 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 6003 return EIO; 6004 } 6005 } else if (m->m_pkthdr.csum_flags & 6006 (CSUM_IP_UDP | CSUM_IP_TCP | CSUM_IP6_UDP | CSUM_IP6_TCP)) { 6007 m = hn_set_hlen(m); 6008 if (__predict_false(m == NULL)) { 6009 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 6010 return EIO; 6011 } 6012 } 6013 #endif 6014 6015 /* 6016 * Select the TX ring based on flowid 6017 */ 6018 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 6019 #ifdef RSS 6020 uint32_t bid; 6021 6022 if (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m), 6023 &bid) == 0) 6024 idx = bid % sc->hn_tx_ring_inuse; 6025 else 6026 #endif 6027 { 6028 #if defined(INET6) || defined(INET) 6029 int tcpsyn = 0; 6030 6031 if (m->m_pkthdr.len < 128 && 6032 (m->m_pkthdr.csum_flags & 6033 (CSUM_IP_TCP | CSUM_IP6_TCP)) && 6034 (m->m_pkthdr.csum_flags & CSUM_TSO) == 0) { 6035 m = hn_check_tcpsyn(m, &tcpsyn); 6036 if (__predict_false(m == NULL)) { 6037 if_inc_counter(ifp, 6038 IFCOUNTER_OERRORS, 1); 6039 return (EIO); 6040 } 6041 } 6042 #else 6043 const int tcpsyn = 0; 6044 #endif 6045 if (tcpsyn) 6046 idx = 0; 6047 else 6048 idx = m->m_pkthdr.flowid % sc->hn_tx_ring_inuse; 6049 } 6050 } 6051 txr = &sc->hn_tx_ring[idx]; 6052 6053 error = drbr_enqueue(ifp, txr->hn_mbuf_br, m); 6054 if (error) { 6055 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); 6056 return error; 6057 } 6058 6059 if (txr->hn_oactive) 6060 return 0; 6061 6062 if (txr->hn_sched_tx) 6063 goto do_sched; 6064 6065 if (mtx_trylock(&txr->hn_tx_lock)) { 6066 int sched; 6067 6068 sched = hn_xmit(txr, txr->hn_direct_tx_size); 6069 mtx_unlock(&txr->hn_tx_lock); 6070 if (!sched) 6071 return 0; 6072 } 6073 do_sched: 6074 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); 6075 return 0; 6076 } 6077 6078 static void 6079 hn_tx_ring_qflush(struct hn_tx_ring *txr) 6080 { 6081 struct mbuf *m; 6082 6083 mtx_lock(&txr->hn_tx_lock); 6084 while ((m = buf_ring_dequeue_sc(txr->hn_mbuf_br)) != NULL) 6085 m_freem(m); 6086 mtx_unlock(&txr->hn_tx_lock); 6087 } 6088 6089 static void 6090 hn_xmit_qflush(struct ifnet *ifp) 6091 { 6092 struct hn_softc *sc = ifp->if_softc; 6093 struct rm_priotracker pt; 6094 int i; 6095 6096 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) 6097 hn_tx_ring_qflush(&sc->hn_tx_ring[i]); 6098 if_qflush(ifp); 6099 6100 rm_rlock(&sc->hn_vf_lock, &pt); 6101 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) 6102 sc->hn_vf_ifp->if_qflush(sc->hn_vf_ifp); 6103 rm_runlock(&sc->hn_vf_lock, &pt); 6104 } 6105 6106 static void 6107 hn_xmit_txeof(struct hn_tx_ring *txr) 6108 { 6109 6110 if (txr->hn_sched_tx) 6111 goto do_sched; 6112 6113 if (mtx_trylock(&txr->hn_tx_lock)) { 6114 int sched; 6115 6116 txr->hn_oactive = 0; 6117 sched = hn_xmit(txr, txr->hn_direct_tx_size); 6118 mtx_unlock(&txr->hn_tx_lock); 6119 if (sched) { 6120 taskqueue_enqueue(txr->hn_tx_taskq, 6121 &txr->hn_tx_task); 6122 } 6123 } else { 6124 do_sched: 6125 /* 6126 * Release the oactive earlier, with the hope, that 6127 * others could catch up. The task will clear the 6128 * oactive again with the hn_tx_lock to avoid possible 6129 * races. 6130 */ 6131 txr->hn_oactive = 0; 6132 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); 6133 } 6134 } 6135 6136 static void 6137 hn_xmit_taskfunc(void *xtxr, int pending __unused) 6138 { 6139 struct hn_tx_ring *txr = xtxr; 6140 6141 mtx_lock(&txr->hn_tx_lock); 6142 hn_xmit(txr, 0); 6143 mtx_unlock(&txr->hn_tx_lock); 6144 } 6145 6146 static void 6147 hn_xmit_txeof_taskfunc(void *xtxr, int pending __unused) 6148 { 6149 struct hn_tx_ring *txr = xtxr; 6150 6151 mtx_lock(&txr->hn_tx_lock); 6152 txr->hn_oactive = 0; 6153 hn_xmit(txr, 0); 6154 mtx_unlock(&txr->hn_tx_lock); 6155 } 6156 6157 static int 6158 hn_chan_attach(struct hn_softc *sc, struct vmbus_channel *chan) 6159 { 6160 struct vmbus_chan_br cbr; 6161 struct hn_rx_ring *rxr; 6162 struct hn_tx_ring *txr = NULL; 6163 int idx, error; 6164 6165 idx = vmbus_chan_subidx(chan); 6166 6167 /* 6168 * Link this channel to RX/TX ring. 6169 */ 6170 KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse, 6171 ("invalid channel index %d, should > 0 && < %d", 6172 idx, sc->hn_rx_ring_inuse)); 6173 rxr = &sc->hn_rx_ring[idx]; 6174 KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED) == 0, 6175 ("RX ring %d already attached", idx)); 6176 rxr->hn_rx_flags |= HN_RX_FLAG_ATTACHED; 6177 rxr->hn_chan = chan; 6178 6179 if (bootverbose) { 6180 if_printf(sc->hn_ifp, "link RX ring %d to chan%u\n", 6181 idx, vmbus_chan_id(chan)); 6182 } 6183 6184 if (idx < sc->hn_tx_ring_inuse) { 6185 txr = &sc->hn_tx_ring[idx]; 6186 KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED) == 0, 6187 ("TX ring %d already attached", idx)); 6188 txr->hn_tx_flags |= HN_TX_FLAG_ATTACHED; 6189 6190 txr->hn_chan = chan; 6191 if (bootverbose) { 6192 if_printf(sc->hn_ifp, "link TX ring %d to chan%u\n", 6193 idx, vmbus_chan_id(chan)); 6194 } 6195 } 6196 6197 /* Bind this channel to a proper CPU. */ 6198 vmbus_chan_cpu_set(chan, HN_RING_IDX2CPU(sc, idx)); 6199 6200 /* 6201 * Open this channel 6202 */ 6203 cbr.cbr = rxr->hn_br; 6204 cbr.cbr_paddr = rxr->hn_br_dma.hv_paddr; 6205 cbr.cbr_txsz = HN_TXBR_SIZE; 6206 cbr.cbr_rxsz = HN_RXBR_SIZE; 6207 error = vmbus_chan_open_br(chan, &cbr, NULL, 0, hn_chan_callback, rxr); 6208 if (error) { 6209 if (error == EISCONN) { 6210 if_printf(sc->hn_ifp, "bufring is connected after " 6211 "chan%u open failure\n", vmbus_chan_id(chan)); 6212 rxr->hn_rx_flags |= HN_RX_FLAG_BR_REF; 6213 } else { 6214 if_printf(sc->hn_ifp, "open chan%u failed: %d\n", 6215 vmbus_chan_id(chan), error); 6216 } 6217 } 6218 return (error); 6219 } 6220 6221 static void 6222 hn_chan_detach(struct hn_softc *sc, struct vmbus_channel *chan) 6223 { 6224 struct hn_rx_ring *rxr; 6225 int idx, error; 6226 6227 idx = vmbus_chan_subidx(chan); 6228 6229 /* 6230 * Link this channel to RX/TX ring. 6231 */ 6232 KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse, 6233 ("invalid channel index %d, should > 0 && < %d", 6234 idx, sc->hn_rx_ring_inuse)); 6235 rxr = &sc->hn_rx_ring[idx]; 6236 KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED), 6237 ("RX ring %d is not attached", idx)); 6238 rxr->hn_rx_flags &= ~HN_RX_FLAG_ATTACHED; 6239 6240 if (idx < sc->hn_tx_ring_inuse) { 6241 struct hn_tx_ring *txr = &sc->hn_tx_ring[idx]; 6242 6243 KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED), 6244 ("TX ring %d is not attached attached", idx)); 6245 txr->hn_tx_flags &= ~HN_TX_FLAG_ATTACHED; 6246 } 6247 6248 /* 6249 * Close this channel. 6250 * 6251 * NOTE: 6252 * Channel closing does _not_ destroy the target channel. 6253 */ 6254 error = vmbus_chan_close_direct(chan); 6255 if (error == EISCONN) { 6256 if_printf(sc->hn_ifp, "chan%u bufring is connected " 6257 "after being closed\n", vmbus_chan_id(chan)); 6258 rxr->hn_rx_flags |= HN_RX_FLAG_BR_REF; 6259 } else if (error) { 6260 if_printf(sc->hn_ifp, "chan%u close failed: %d\n", 6261 vmbus_chan_id(chan), error); 6262 } 6263 } 6264 6265 static int 6266 hn_attach_subchans(struct hn_softc *sc) 6267 { 6268 struct vmbus_channel **subchans; 6269 int subchan_cnt = sc->hn_rx_ring_inuse - 1; 6270 int i, error = 0; 6271 6272 KASSERT(subchan_cnt > 0, ("no sub-channels")); 6273 6274 /* Attach the sub-channels. */ 6275 subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt); 6276 for (i = 0; i < subchan_cnt; ++i) { 6277 int error1; 6278 6279 error1 = hn_chan_attach(sc, subchans[i]); 6280 if (error1) { 6281 error = error1; 6282 /* Move on; all channels will be detached later. */ 6283 } 6284 } 6285 vmbus_subchan_rel(subchans, subchan_cnt); 6286 6287 if (error) { 6288 if_printf(sc->hn_ifp, "sub-channels attach failed: %d\n", error); 6289 } else { 6290 if (bootverbose) { 6291 if_printf(sc->hn_ifp, "%d sub-channels attached\n", 6292 subchan_cnt); 6293 } 6294 } 6295 return (error); 6296 } 6297 6298 static void 6299 hn_detach_allchans(struct hn_softc *sc) 6300 { 6301 struct vmbus_channel **subchans; 6302 int subchan_cnt = sc->hn_rx_ring_inuse - 1; 6303 int i; 6304 6305 if (subchan_cnt == 0) 6306 goto back; 6307 6308 /* Detach the sub-channels. */ 6309 subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt); 6310 for (i = 0; i < subchan_cnt; ++i) 6311 hn_chan_detach(sc, subchans[i]); 6312 vmbus_subchan_rel(subchans, subchan_cnt); 6313 6314 back: 6315 /* 6316 * Detach the primary channel, _after_ all sub-channels 6317 * are detached. 6318 */ 6319 hn_chan_detach(sc, sc->hn_prichan); 6320 6321 /* Wait for sub-channels to be destroyed, if any. */ 6322 vmbus_subchan_drain(sc->hn_prichan); 6323 6324 #ifdef INVARIANTS 6325 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 6326 KASSERT((sc->hn_rx_ring[i].hn_rx_flags & 6327 HN_RX_FLAG_ATTACHED) == 0, 6328 ("%dth RX ring is still attached", i)); 6329 } 6330 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 6331 KASSERT((sc->hn_tx_ring[i].hn_tx_flags & 6332 HN_TX_FLAG_ATTACHED) == 0, 6333 ("%dth TX ring is still attached", i)); 6334 } 6335 #endif 6336 } 6337 6338 static int 6339 hn_synth_alloc_subchans(struct hn_softc *sc, int *nsubch) 6340 { 6341 struct vmbus_channel **subchans; 6342 int nchan, rxr_cnt, error; 6343 6344 nchan = *nsubch + 1; 6345 if (nchan == 1) { 6346 /* 6347 * Multiple RX/TX rings are not requested. 6348 */ 6349 *nsubch = 0; 6350 return (0); 6351 } 6352 6353 /* 6354 * Query RSS capabilities, e.g. # of RX rings, and # of indirect 6355 * table entries. 6356 */ 6357 error = hn_rndis_query_rsscaps(sc, &rxr_cnt); 6358 if (error) { 6359 /* No RSS; this is benign. */ 6360 *nsubch = 0; 6361 return (0); 6362 } 6363 if (bootverbose) { 6364 if_printf(sc->hn_ifp, "RX rings offered %u, requested %d\n", 6365 rxr_cnt, nchan); 6366 } 6367 6368 if (nchan > rxr_cnt) 6369 nchan = rxr_cnt; 6370 if (nchan == 1) { 6371 if_printf(sc->hn_ifp, "only 1 channel is supported, no vRSS\n"); 6372 *nsubch = 0; 6373 return (0); 6374 } 6375 6376 /* 6377 * Allocate sub-channels from NVS. 6378 */ 6379 *nsubch = nchan - 1; 6380 error = hn_nvs_alloc_subchans(sc, nsubch); 6381 if (error || *nsubch == 0) { 6382 /* Failed to allocate sub-channels. */ 6383 *nsubch = 0; 6384 return (0); 6385 } 6386 6387 /* 6388 * Wait for all sub-channels to become ready before moving on. 6389 */ 6390 subchans = vmbus_subchan_get(sc->hn_prichan, *nsubch); 6391 vmbus_subchan_rel(subchans, *nsubch); 6392 return (0); 6393 } 6394 6395 static bool 6396 hn_synth_attachable(const struct hn_softc *sc) 6397 { 6398 int i; 6399 6400 if (sc->hn_flags & HN_FLAG_ERRORS) 6401 return (false); 6402 6403 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 6404 const struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 6405 6406 if (rxr->hn_rx_flags & HN_RX_FLAG_BR_REF) 6407 return (false); 6408 } 6409 return (true); 6410 } 6411 6412 /* 6413 * Make sure that the RX filter is zero after the successful 6414 * RNDIS initialization. 6415 * 6416 * NOTE: 6417 * Under certain conditions on certain versions of Hyper-V, 6418 * the RNDIS rxfilter is _not_ zero on the hypervisor side 6419 * after the successful RNDIS initialization, which breaks 6420 * the assumption of any following code (well, it breaks the 6421 * RNDIS API contract actually). Clear the RNDIS rxfilter 6422 * explicitly, drain packets sneaking through, and drain the 6423 * interrupt taskqueues scheduled due to the stealth packets. 6424 */ 6425 static void 6426 hn_rndis_init_fixat(struct hn_softc *sc, int nchan) 6427 { 6428 6429 hn_disable_rx(sc); 6430 hn_drain_rxtx(sc, nchan); 6431 } 6432 6433 static int 6434 hn_synth_attach(struct hn_softc *sc, int mtu) 6435 { 6436 #define ATTACHED_NVS 0x0002 6437 #define ATTACHED_RNDIS 0x0004 6438 6439 struct ndis_rssprm_toeplitz *rss = &sc->hn_rss; 6440 int error, nsubch, nchan = 1, i, rndis_inited; 6441 uint32_t old_caps, attached = 0; 6442 6443 KASSERT((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0, 6444 ("synthetic parts were attached")); 6445 6446 if (!hn_synth_attachable(sc)) 6447 return (ENXIO); 6448 6449 /* Save capabilities for later verification. */ 6450 old_caps = sc->hn_caps; 6451 sc->hn_caps = 0; 6452 6453 /* Clear RSS stuffs. */ 6454 sc->hn_rss_ind_size = 0; 6455 sc->hn_rss_hash = 0; 6456 sc->hn_rss_hcap = 0; 6457 6458 /* 6459 * Attach the primary channel _before_ attaching NVS and RNDIS. 6460 */ 6461 error = hn_chan_attach(sc, sc->hn_prichan); 6462 if (error) 6463 goto failed; 6464 6465 /* 6466 * Attach NVS. 6467 */ 6468 error = hn_nvs_attach(sc, mtu); 6469 if (error) 6470 goto failed; 6471 attached |= ATTACHED_NVS; 6472 6473 /* 6474 * Attach RNDIS _after_ NVS is attached. 6475 */ 6476 error = hn_rndis_attach(sc, mtu, &rndis_inited); 6477 if (rndis_inited) 6478 attached |= ATTACHED_RNDIS; 6479 if (error) 6480 goto failed; 6481 6482 /* 6483 * Make sure capabilities are not changed. 6484 */ 6485 if (device_is_attached(sc->hn_dev) && old_caps != sc->hn_caps) { 6486 if_printf(sc->hn_ifp, "caps mismatch old 0x%08x, new 0x%08x\n", 6487 old_caps, sc->hn_caps); 6488 error = ENXIO; 6489 goto failed; 6490 } 6491 6492 /* 6493 * Allocate sub-channels for multi-TX/RX rings. 6494 * 6495 * NOTE: 6496 * The # of RX rings that can be used is equivalent to the # of 6497 * channels to be requested. 6498 */ 6499 nsubch = sc->hn_rx_ring_cnt - 1; 6500 error = hn_synth_alloc_subchans(sc, &nsubch); 6501 if (error) 6502 goto failed; 6503 /* NOTE: _Full_ synthetic parts detach is required now. */ 6504 sc->hn_flags |= HN_FLAG_SYNTH_ATTACHED; 6505 6506 /* 6507 * Set the # of TX/RX rings that could be used according to 6508 * the # of channels that NVS offered. 6509 */ 6510 nchan = nsubch + 1; 6511 hn_set_ring_inuse(sc, nchan); 6512 if (nchan == 1) { 6513 /* Only the primary channel can be used; done */ 6514 goto back; 6515 } 6516 6517 /* 6518 * Attach the sub-channels. 6519 * 6520 * NOTE: hn_set_ring_inuse() _must_ have been called. 6521 */ 6522 error = hn_attach_subchans(sc); 6523 if (error) 6524 goto failed; 6525 6526 /* 6527 * Configure RSS key and indirect table _after_ all sub-channels 6528 * are attached. 6529 */ 6530 if ((sc->hn_flags & HN_FLAG_HAS_RSSKEY) == 0) { 6531 /* 6532 * RSS key is not set yet; set it to the default RSS key. 6533 */ 6534 if (bootverbose) 6535 if_printf(sc->hn_ifp, "setup default RSS key\n"); 6536 #ifdef RSS 6537 rss_getkey(rss->rss_key); 6538 #else 6539 memcpy(rss->rss_key, hn_rss_key_default, sizeof(rss->rss_key)); 6540 #endif 6541 sc->hn_flags |= HN_FLAG_HAS_RSSKEY; 6542 } 6543 6544 if ((sc->hn_flags & HN_FLAG_HAS_RSSIND) == 0) { 6545 /* 6546 * RSS indirect table is not set yet; set it up in round- 6547 * robin fashion. 6548 */ 6549 if (bootverbose) { 6550 if_printf(sc->hn_ifp, "setup default RSS indirect " 6551 "table\n"); 6552 } 6553 for (i = 0; i < NDIS_HASH_INDCNT; ++i) { 6554 uint32_t subidx; 6555 6556 #ifdef RSS 6557 subidx = rss_get_indirection_to_bucket(i); 6558 #else 6559 subidx = i; 6560 #endif 6561 rss->rss_ind[i] = subidx % nchan; 6562 } 6563 sc->hn_flags |= HN_FLAG_HAS_RSSIND; 6564 } else { 6565 /* 6566 * # of usable channels may be changed, so we have to 6567 * make sure that all entries in RSS indirect table 6568 * are valid. 6569 * 6570 * NOTE: hn_set_ring_inuse() _must_ have been called. 6571 */ 6572 hn_rss_ind_fixup(sc); 6573 } 6574 6575 sc->hn_rss_hash = sc->hn_rss_hcap; 6576 if ((sc->hn_flags & HN_FLAG_RXVF) || 6577 (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) { 6578 /* NOTE: Don't reconfigure RSS; will do immediately. */ 6579 hn_vf_rss_fixup(sc, false); 6580 } 6581 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE); 6582 if (error) 6583 goto failed; 6584 back: 6585 /* 6586 * Fixup transmission aggregation setup. 6587 */ 6588 hn_set_txagg(sc); 6589 hn_rndis_init_fixat(sc, nchan); 6590 return (0); 6591 6592 failed: 6593 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) { 6594 hn_rndis_init_fixat(sc, nchan); 6595 hn_synth_detach(sc); 6596 } else { 6597 if (attached & ATTACHED_RNDIS) { 6598 hn_rndis_init_fixat(sc, nchan); 6599 hn_rndis_detach(sc); 6600 } 6601 if (attached & ATTACHED_NVS) 6602 hn_nvs_detach(sc); 6603 hn_chan_detach(sc, sc->hn_prichan); 6604 /* Restore old capabilities. */ 6605 sc->hn_caps = old_caps; 6606 } 6607 return (error); 6608 6609 #undef ATTACHED_RNDIS 6610 #undef ATTACHED_NVS 6611 } 6612 6613 /* 6614 * NOTE: 6615 * The interface must have been suspended though hn_suspend(), before 6616 * this function get called. 6617 */ 6618 static void 6619 hn_synth_detach(struct hn_softc *sc) 6620 { 6621 6622 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, 6623 ("synthetic parts were not attached")); 6624 6625 /* Detach the RNDIS first. */ 6626 hn_rndis_detach(sc); 6627 6628 /* Detach NVS. */ 6629 hn_nvs_detach(sc); 6630 6631 /* Detach all of the channels. */ 6632 hn_detach_allchans(sc); 6633 6634 if (vmbus_current_version >= VMBUS_VERSION_WIN10 && sc->hn_rxbuf_gpadl != 0) { 6635 /* 6636 * Host is post-Win2016, disconnect RXBUF from primary channel here. 6637 */ 6638 int error; 6639 6640 error = vmbus_chan_gpadl_disconnect(sc->hn_prichan, 6641 sc->hn_rxbuf_gpadl); 6642 if (error) { 6643 if_printf(sc->hn_ifp, 6644 "rxbuf gpadl disconn failed: %d\n", error); 6645 sc->hn_flags |= HN_FLAG_RXBUF_REF; 6646 } 6647 sc->hn_rxbuf_gpadl = 0; 6648 } 6649 6650 if (vmbus_current_version >= VMBUS_VERSION_WIN10 && sc->hn_chim_gpadl != 0) { 6651 /* 6652 * Host is post-Win2016, disconnect chimney sending buffer from 6653 * primary channel here. 6654 */ 6655 int error; 6656 6657 error = vmbus_chan_gpadl_disconnect(sc->hn_prichan, 6658 sc->hn_chim_gpadl); 6659 if (error) { 6660 if_printf(sc->hn_ifp, 6661 "chim gpadl disconn failed: %d\n", error); 6662 sc->hn_flags |= HN_FLAG_CHIM_REF; 6663 } 6664 sc->hn_chim_gpadl = 0; 6665 } 6666 sc->hn_flags &= ~HN_FLAG_SYNTH_ATTACHED; 6667 } 6668 6669 static void 6670 hn_set_ring_inuse(struct hn_softc *sc, int ring_cnt) 6671 { 6672 KASSERT(ring_cnt > 0 && ring_cnt <= sc->hn_rx_ring_cnt, 6673 ("invalid ring count %d", ring_cnt)); 6674 6675 if (sc->hn_tx_ring_cnt > ring_cnt) 6676 sc->hn_tx_ring_inuse = ring_cnt; 6677 else 6678 sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt; 6679 sc->hn_rx_ring_inuse = ring_cnt; 6680 6681 #ifdef RSS 6682 if (sc->hn_rx_ring_inuse != rss_getnumbuckets()) { 6683 if_printf(sc->hn_ifp, "# of RX rings (%d) does not match " 6684 "# of RSS buckets (%d)\n", sc->hn_rx_ring_inuse, 6685 rss_getnumbuckets()); 6686 } 6687 #endif 6688 6689 if (bootverbose) { 6690 if_printf(sc->hn_ifp, "%d TX ring, %d RX ring\n", 6691 sc->hn_tx_ring_inuse, sc->hn_rx_ring_inuse); 6692 } 6693 } 6694 6695 static void 6696 hn_chan_drain(struct hn_softc *sc, struct vmbus_channel *chan) 6697 { 6698 6699 /* 6700 * NOTE: 6701 * The TX bufring will not be drained by the hypervisor, 6702 * if the primary channel is revoked. 6703 */ 6704 while (!vmbus_chan_rx_empty(chan) || 6705 (!vmbus_chan_is_revoked(sc->hn_prichan) && 6706 !vmbus_chan_tx_empty(chan))) 6707 pause("waitch", 1); 6708 vmbus_chan_intr_drain(chan); 6709 } 6710 6711 static void 6712 hn_disable_rx(struct hn_softc *sc) 6713 { 6714 6715 /* 6716 * Disable RX by clearing RX filter forcefully. 6717 */ 6718 sc->hn_rx_filter = NDIS_PACKET_TYPE_NONE; 6719 hn_rndis_set_rxfilter(sc, sc->hn_rx_filter); /* ignore error */ 6720 6721 /* 6722 * Give RNDIS enough time to flush all pending data packets. 6723 */ 6724 pause("waitrx", (200 * hz) / 1000); 6725 } 6726 6727 /* 6728 * NOTE: 6729 * RX/TX _must_ have been suspended/disabled, before this function 6730 * is called. 6731 */ 6732 static void 6733 hn_drain_rxtx(struct hn_softc *sc, int nchan) 6734 { 6735 struct vmbus_channel **subch = NULL; 6736 int nsubch; 6737 6738 /* 6739 * Drain RX/TX bufrings and interrupts. 6740 */ 6741 nsubch = nchan - 1; 6742 if (nsubch > 0) 6743 subch = vmbus_subchan_get(sc->hn_prichan, nsubch); 6744 6745 if (subch != NULL) { 6746 int i; 6747 6748 for (i = 0; i < nsubch; ++i) 6749 hn_chan_drain(sc, subch[i]); 6750 } 6751 hn_chan_drain(sc, sc->hn_prichan); 6752 6753 if (subch != NULL) 6754 vmbus_subchan_rel(subch, nsubch); 6755 } 6756 6757 static void 6758 hn_suspend_data(struct hn_softc *sc) 6759 { 6760 struct hn_tx_ring *txr; 6761 int i; 6762 6763 HN_LOCK_ASSERT(sc); 6764 6765 /* 6766 * Suspend TX. 6767 */ 6768 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { 6769 txr = &sc->hn_tx_ring[i]; 6770 6771 mtx_lock(&txr->hn_tx_lock); 6772 txr->hn_suspended = 1; 6773 mtx_unlock(&txr->hn_tx_lock); 6774 /* No one is able send more packets now. */ 6775 6776 /* 6777 * Wait for all pending sends to finish. 6778 * 6779 * NOTE: 6780 * We will _not_ receive all pending send-done, if the 6781 * primary channel is revoked. 6782 */ 6783 while (hn_tx_ring_pending(txr) && 6784 !vmbus_chan_is_revoked(sc->hn_prichan)) 6785 pause("hnwtx", 1 /* 1 tick */); 6786 } 6787 6788 /* 6789 * Disable RX. 6790 */ 6791 hn_disable_rx(sc); 6792 6793 /* 6794 * Drain RX/TX. 6795 */ 6796 hn_drain_rxtx(sc, sc->hn_rx_ring_inuse); 6797 6798 /* 6799 * Drain any pending TX tasks. 6800 * 6801 * NOTE: 6802 * The above hn_drain_rxtx() can dispatch TX tasks, so the TX 6803 * tasks will have to be drained _after_ the above hn_drain_rxtx(). 6804 */ 6805 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { 6806 txr = &sc->hn_tx_ring[i]; 6807 6808 taskqueue_drain(txr->hn_tx_taskq, &txr->hn_tx_task); 6809 taskqueue_drain(txr->hn_tx_taskq, &txr->hn_txeof_task); 6810 } 6811 } 6812 6813 static void 6814 hn_suspend_mgmt_taskfunc(void *xsc, int pending __unused) 6815 { 6816 6817 ((struct hn_softc *)xsc)->hn_mgmt_taskq = NULL; 6818 } 6819 6820 static void 6821 hn_suspend_mgmt(struct hn_softc *sc) 6822 { 6823 struct task task; 6824 6825 HN_LOCK_ASSERT(sc); 6826 6827 /* 6828 * Make sure that hn_mgmt_taskq0 can nolonger be accessed 6829 * through hn_mgmt_taskq. 6830 */ 6831 TASK_INIT(&task, 0, hn_suspend_mgmt_taskfunc, sc); 6832 vmbus_chan_run_task(sc->hn_prichan, &task); 6833 6834 /* 6835 * Make sure that all pending management tasks are completed. 6836 */ 6837 taskqueue_drain(sc->hn_mgmt_taskq0, &sc->hn_netchg_init); 6838 taskqueue_drain_timeout(sc->hn_mgmt_taskq0, &sc->hn_netchg_status); 6839 taskqueue_drain_all(sc->hn_mgmt_taskq0); 6840 } 6841 6842 static void 6843 hn_suspend(struct hn_softc *sc) 6844 { 6845 6846 /* Disable polling. */ 6847 hn_polling(sc, 0); 6848 6849 /* 6850 * If the non-transparent mode VF is activated, the synthetic 6851 * device is receiving packets, so the data path of the 6852 * synthetic device must be suspended. 6853 */ 6854 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) || 6855 (sc->hn_flags & HN_FLAG_RXVF)) 6856 hn_suspend_data(sc); 6857 hn_suspend_mgmt(sc); 6858 } 6859 6860 static void 6861 hn_resume_tx(struct hn_softc *sc, int tx_ring_cnt) 6862 { 6863 int i; 6864 6865 KASSERT(tx_ring_cnt <= sc->hn_tx_ring_cnt, 6866 ("invalid TX ring count %d", tx_ring_cnt)); 6867 6868 for (i = 0; i < tx_ring_cnt; ++i) { 6869 struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; 6870 6871 mtx_lock(&txr->hn_tx_lock); 6872 txr->hn_suspended = 0; 6873 mtx_unlock(&txr->hn_tx_lock); 6874 } 6875 } 6876 6877 static void 6878 hn_resume_data(struct hn_softc *sc) 6879 { 6880 int i; 6881 6882 HN_LOCK_ASSERT(sc); 6883 6884 /* 6885 * Re-enable RX. 6886 */ 6887 hn_rxfilter_config(sc); 6888 6889 /* 6890 * Make sure to clear suspend status on "all" TX rings, 6891 * since hn_tx_ring_inuse can be changed after 6892 * hn_suspend_data(). 6893 */ 6894 hn_resume_tx(sc, sc->hn_tx_ring_cnt); 6895 6896 #ifdef HN_IFSTART_SUPPORT 6897 if (!hn_use_if_start) 6898 #endif 6899 { 6900 /* 6901 * Flush unused drbrs, since hn_tx_ring_inuse may be 6902 * reduced. 6903 */ 6904 for (i = sc->hn_tx_ring_inuse; i < sc->hn_tx_ring_cnt; ++i) 6905 hn_tx_ring_qflush(&sc->hn_tx_ring[i]); 6906 } 6907 6908 /* 6909 * Kick start TX. 6910 */ 6911 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { 6912 struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; 6913 6914 /* 6915 * Use txeof task, so that any pending oactive can be 6916 * cleared properly. 6917 */ 6918 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); 6919 } 6920 } 6921 6922 static void 6923 hn_resume_mgmt(struct hn_softc *sc) 6924 { 6925 6926 sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0; 6927 6928 /* 6929 * Kick off network change detection, if it was pending. 6930 * If no network change was pending, start link status 6931 * checks, which is more lightweight than network change 6932 * detection. 6933 */ 6934 if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG) 6935 hn_change_network(sc); 6936 else 6937 hn_update_link_status(sc); 6938 } 6939 6940 static void 6941 hn_resume(struct hn_softc *sc) 6942 { 6943 6944 /* 6945 * If the non-transparent mode VF is activated, the synthetic 6946 * device have to receive packets, so the data path of the 6947 * synthetic device must be resumed. 6948 */ 6949 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) || 6950 (sc->hn_flags & HN_FLAG_RXVF)) 6951 hn_resume_data(sc); 6952 6953 /* 6954 * Don't resume link status change if VF is attached/activated. 6955 * - In the non-transparent VF mode, the synthetic device marks 6956 * link down until the VF is deactivated; i.e. VF is down. 6957 * - In transparent VF mode, VF's media status is used until 6958 * the VF is detached. 6959 */ 6960 if ((sc->hn_flags & HN_FLAG_RXVF) == 0 && 6961 !(hn_xpnt_vf && sc->hn_vf_ifp != NULL)) 6962 hn_resume_mgmt(sc); 6963 6964 /* 6965 * Re-enable polling if this interface is running and 6966 * the polling is requested. 6967 */ 6968 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) && sc->hn_pollhz > 0) 6969 hn_polling(sc, sc->hn_pollhz); 6970 } 6971 6972 static void 6973 hn_rndis_rx_status(struct hn_softc *sc, const void *data, int dlen) 6974 { 6975 const struct rndis_status_msg *msg; 6976 int ofs; 6977 6978 if (dlen < sizeof(*msg)) { 6979 if_printf(sc->hn_ifp, "invalid RNDIS status\n"); 6980 return; 6981 } 6982 msg = data; 6983 6984 switch (msg->rm_status) { 6985 case RNDIS_STATUS_MEDIA_CONNECT: 6986 case RNDIS_STATUS_MEDIA_DISCONNECT: 6987 hn_update_link_status(sc); 6988 break; 6989 6990 case RNDIS_STATUS_TASK_OFFLOAD_CURRENT_CONFIG: 6991 case RNDIS_STATUS_LINK_SPEED_CHANGE: 6992 /* Not really useful; ignore. */ 6993 break; 6994 6995 case RNDIS_STATUS_NETWORK_CHANGE: 6996 ofs = RNDIS_STBUFOFFSET_ABS(msg->rm_stbufoffset); 6997 if (dlen < ofs + msg->rm_stbuflen || 6998 msg->rm_stbuflen < sizeof(uint32_t)) { 6999 if_printf(sc->hn_ifp, "network changed\n"); 7000 } else { 7001 uint32_t change; 7002 7003 memcpy(&change, ((const uint8_t *)msg) + ofs, 7004 sizeof(change)); 7005 if_printf(sc->hn_ifp, "network changed, change %u\n", 7006 change); 7007 } 7008 hn_change_network(sc); 7009 break; 7010 7011 default: 7012 if_printf(sc->hn_ifp, "unknown RNDIS status 0x%08x\n", 7013 msg->rm_status); 7014 break; 7015 } 7016 } 7017 7018 static int 7019 hn_rndis_rxinfo(const void *info_data, int info_dlen, struct hn_rxinfo *info) 7020 { 7021 const struct rndis_pktinfo *pi = info_data; 7022 uint32_t mask = 0; 7023 7024 while (info_dlen != 0) { 7025 const void *data; 7026 uint32_t dlen; 7027 7028 if (__predict_false(info_dlen < sizeof(*pi))) 7029 return (EINVAL); 7030 if (__predict_false(info_dlen < pi->rm_size)) 7031 return (EINVAL); 7032 info_dlen -= pi->rm_size; 7033 7034 if (__predict_false(pi->rm_size & RNDIS_PKTINFO_SIZE_ALIGNMASK)) 7035 return (EINVAL); 7036 if (__predict_false(pi->rm_size < pi->rm_pktinfooffset)) 7037 return (EINVAL); 7038 dlen = pi->rm_size - pi->rm_pktinfooffset; 7039 data = pi->rm_data; 7040 7041 switch (pi->rm_type) { 7042 case NDIS_PKTINFO_TYPE_VLAN: 7043 if (__predict_false(dlen < NDIS_VLAN_INFO_SIZE)) 7044 return (EINVAL); 7045 info->vlan_info = *((const uint32_t *)data); 7046 mask |= HN_RXINFO_VLAN; 7047 break; 7048 7049 case NDIS_PKTINFO_TYPE_CSUM: 7050 if (__predict_false(dlen < NDIS_RXCSUM_INFO_SIZE)) 7051 return (EINVAL); 7052 info->csum_info = *((const uint32_t *)data); 7053 mask |= HN_RXINFO_CSUM; 7054 break; 7055 7056 case HN_NDIS_PKTINFO_TYPE_HASHVAL: 7057 if (__predict_false(dlen < HN_NDIS_HASH_VALUE_SIZE)) 7058 return (EINVAL); 7059 info->hash_value = *((const uint32_t *)data); 7060 mask |= HN_RXINFO_HASHVAL; 7061 break; 7062 7063 case HN_NDIS_PKTINFO_TYPE_HASHINF: 7064 if (__predict_false(dlen < HN_NDIS_HASH_INFO_SIZE)) 7065 return (EINVAL); 7066 info->hash_info = *((const uint32_t *)data); 7067 mask |= HN_RXINFO_HASHINF; 7068 break; 7069 7070 default: 7071 goto next; 7072 } 7073 7074 if (mask == HN_RXINFO_ALL) { 7075 /* All found; done */ 7076 break; 7077 } 7078 next: 7079 pi = (const struct rndis_pktinfo *) 7080 ((const uint8_t *)pi + pi->rm_size); 7081 } 7082 7083 /* 7084 * Final fixup. 7085 * - If there is no hash value, invalidate the hash info. 7086 */ 7087 if ((mask & HN_RXINFO_HASHVAL) == 0) 7088 info->hash_info = HN_NDIS_HASH_INFO_INVALID; 7089 return (0); 7090 } 7091 7092 static __inline bool 7093 hn_rndis_check_overlap(int off, int len, int check_off, int check_len) 7094 { 7095 7096 if (off < check_off) { 7097 if (__predict_true(off + len <= check_off)) 7098 return (false); 7099 } else if (off > check_off) { 7100 if (__predict_true(check_off + check_len <= off)) 7101 return (false); 7102 } 7103 return (true); 7104 } 7105 7106 static void 7107 hn_rndis_rx_data(struct hn_rx_ring *rxr, const void *data, int dlen) 7108 { 7109 const struct rndis_packet_msg *pkt; 7110 struct hn_rxinfo info; 7111 int data_off, pktinfo_off, data_len, pktinfo_len; 7112 7113 /* 7114 * Check length. 7115 */ 7116 if (__predict_false(dlen < sizeof(*pkt))) { 7117 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg\n"); 7118 return; 7119 } 7120 pkt = data; 7121 7122 if (__predict_false(dlen < pkt->rm_len)) { 7123 if_printf(rxr->hn_ifp, "truncated RNDIS packet msg, " 7124 "dlen %d, msglen %u\n", dlen, pkt->rm_len); 7125 return; 7126 } 7127 if (__predict_false(pkt->rm_len < 7128 pkt->rm_datalen + pkt->rm_oobdatalen + pkt->rm_pktinfolen)) { 7129 if_printf(rxr->hn_ifp, "invalid RNDIS packet msglen, " 7130 "msglen %u, data %u, oob %u, pktinfo %u\n", 7131 pkt->rm_len, pkt->rm_datalen, pkt->rm_oobdatalen, 7132 pkt->rm_pktinfolen); 7133 return; 7134 } 7135 if (__predict_false(pkt->rm_datalen == 0)) { 7136 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, no data\n"); 7137 return; 7138 } 7139 7140 /* 7141 * Check offests. 7142 */ 7143 #define IS_OFFSET_INVALID(ofs) \ 7144 ((ofs) < RNDIS_PACKET_MSG_OFFSET_MIN || \ 7145 ((ofs) & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK)) 7146 7147 /* XXX Hyper-V does not meet data offset alignment requirement */ 7148 if (__predict_false(pkt->rm_dataoffset < RNDIS_PACKET_MSG_OFFSET_MIN)) { 7149 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7150 "data offset %u\n", pkt->rm_dataoffset); 7151 return; 7152 } 7153 if (__predict_false(pkt->rm_oobdataoffset > 0 && 7154 IS_OFFSET_INVALID(pkt->rm_oobdataoffset))) { 7155 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7156 "oob offset %u\n", pkt->rm_oobdataoffset); 7157 return; 7158 } 7159 if (__predict_true(pkt->rm_pktinfooffset > 0) && 7160 __predict_false(IS_OFFSET_INVALID(pkt->rm_pktinfooffset))) { 7161 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7162 "pktinfo offset %u\n", pkt->rm_pktinfooffset); 7163 return; 7164 } 7165 7166 #undef IS_OFFSET_INVALID 7167 7168 data_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_dataoffset); 7169 data_len = pkt->rm_datalen; 7170 pktinfo_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_pktinfooffset); 7171 pktinfo_len = pkt->rm_pktinfolen; 7172 7173 /* 7174 * Check OOB coverage. 7175 */ 7176 if (__predict_false(pkt->rm_oobdatalen != 0)) { 7177 int oob_off, oob_len; 7178 7179 if_printf(rxr->hn_ifp, "got oobdata\n"); 7180 oob_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_oobdataoffset); 7181 oob_len = pkt->rm_oobdatalen; 7182 7183 if (__predict_false(oob_off + oob_len > pkt->rm_len)) { 7184 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7185 "oob overflow, msglen %u, oob abs %d len %d\n", 7186 pkt->rm_len, oob_off, oob_len); 7187 return; 7188 } 7189 7190 /* 7191 * Check against data. 7192 */ 7193 if (hn_rndis_check_overlap(oob_off, oob_len, 7194 data_off, data_len)) { 7195 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7196 "oob overlaps data, oob abs %d len %d, " 7197 "data abs %d len %d\n", 7198 oob_off, oob_len, data_off, data_len); 7199 return; 7200 } 7201 7202 /* 7203 * Check against pktinfo. 7204 */ 7205 if (pktinfo_len != 0 && 7206 hn_rndis_check_overlap(oob_off, oob_len, 7207 pktinfo_off, pktinfo_len)) { 7208 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7209 "oob overlaps pktinfo, oob abs %d len %d, " 7210 "pktinfo abs %d len %d\n", 7211 oob_off, oob_len, pktinfo_off, pktinfo_len); 7212 return; 7213 } 7214 } 7215 7216 /* 7217 * Check per-packet-info coverage and find useful per-packet-info. 7218 */ 7219 info.vlan_info = HN_NDIS_VLAN_INFO_INVALID; 7220 info.csum_info = HN_NDIS_RXCSUM_INFO_INVALID; 7221 info.hash_info = HN_NDIS_HASH_INFO_INVALID; 7222 if (__predict_true(pktinfo_len != 0)) { 7223 bool overlap; 7224 int error; 7225 7226 if (__predict_false(pktinfo_off + pktinfo_len > pkt->rm_len)) { 7227 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7228 "pktinfo overflow, msglen %u, " 7229 "pktinfo abs %d len %d\n", 7230 pkt->rm_len, pktinfo_off, pktinfo_len); 7231 return; 7232 } 7233 7234 /* 7235 * Check packet info coverage. 7236 */ 7237 overlap = hn_rndis_check_overlap(pktinfo_off, pktinfo_len, 7238 data_off, data_len); 7239 if (__predict_false(overlap)) { 7240 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7241 "pktinfo overlap data, pktinfo abs %d len %d, " 7242 "data abs %d len %d\n", 7243 pktinfo_off, pktinfo_len, data_off, data_len); 7244 return; 7245 } 7246 7247 /* 7248 * Find useful per-packet-info. 7249 */ 7250 error = hn_rndis_rxinfo(((const uint8_t *)pkt) + pktinfo_off, 7251 pktinfo_len, &info); 7252 if (__predict_false(error)) { 7253 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg " 7254 "pktinfo\n"); 7255 return; 7256 } 7257 } 7258 7259 if (__predict_false(data_off + data_len > pkt->rm_len)) { 7260 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7261 "data overflow, msglen %u, data abs %d len %d\n", 7262 pkt->rm_len, data_off, data_len); 7263 return; 7264 } 7265 hn_rxpkt(rxr, ((const uint8_t *)pkt) + data_off, data_len, &info); 7266 } 7267 7268 static __inline void 7269 hn_rndis_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen) 7270 { 7271 const struct rndis_msghdr *hdr; 7272 7273 if (__predict_false(dlen < sizeof(*hdr))) { 7274 if_printf(rxr->hn_ifp, "invalid RNDIS msg\n"); 7275 return; 7276 } 7277 hdr = data; 7278 7279 if (__predict_true(hdr->rm_type == REMOTE_NDIS_PACKET_MSG)) { 7280 /* Hot data path. */ 7281 hn_rndis_rx_data(rxr, data, dlen); 7282 /* Done! */ 7283 return; 7284 } 7285 7286 if (hdr->rm_type == REMOTE_NDIS_INDICATE_STATUS_MSG) 7287 hn_rndis_rx_status(rxr->hn_ifp->if_softc, data, dlen); 7288 else 7289 hn_rndis_rx_ctrl(rxr->hn_ifp->if_softc, data, dlen); 7290 } 7291 7292 static void 7293 hn_nvs_handle_notify(struct hn_softc *sc, const struct vmbus_chanpkt_hdr *pkt) 7294 { 7295 const struct hn_nvs_hdr *hdr; 7296 7297 if (VMBUS_CHANPKT_DATALEN(pkt) < sizeof(*hdr)) { 7298 if_printf(sc->hn_ifp, "invalid nvs notify\n"); 7299 return; 7300 } 7301 hdr = VMBUS_CHANPKT_CONST_DATA(pkt); 7302 7303 if (hdr->nvs_type == HN_NVS_TYPE_TXTBL_NOTE) { 7304 /* Useless; ignore */ 7305 return; 7306 } 7307 if_printf(sc->hn_ifp, "got notify, nvs type %u\n", hdr->nvs_type); 7308 } 7309 7310 static void 7311 hn_nvs_handle_comp(struct hn_softc *sc, struct vmbus_channel *chan, 7312 const struct vmbus_chanpkt_hdr *pkt) 7313 { 7314 struct hn_nvs_sendctx *sndc; 7315 7316 sndc = (struct hn_nvs_sendctx *)(uintptr_t)pkt->cph_xactid; 7317 sndc->hn_cb(sndc, sc, chan, VMBUS_CHANPKT_CONST_DATA(pkt), 7318 VMBUS_CHANPKT_DATALEN(pkt)); 7319 /* 7320 * NOTE: 7321 * 'sndc' CAN NOT be accessed anymore, since it can be freed by 7322 * its callback. 7323 */ 7324 } 7325 7326 static void 7327 hn_nvs_handle_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan, 7328 const struct vmbus_chanpkt_hdr *pkthdr) 7329 { 7330 const struct vmbus_chanpkt_rxbuf *pkt; 7331 const struct hn_nvs_hdr *nvs_hdr; 7332 int count, i, hlen; 7333 7334 if (__predict_false(VMBUS_CHANPKT_DATALEN(pkthdr) < sizeof(*nvs_hdr))) { 7335 if_printf(rxr->hn_ifp, "invalid nvs RNDIS\n"); 7336 return; 7337 } 7338 nvs_hdr = VMBUS_CHANPKT_CONST_DATA(pkthdr); 7339 7340 /* Make sure that this is a RNDIS message. */ 7341 if (__predict_false(nvs_hdr->nvs_type != HN_NVS_TYPE_RNDIS)) { 7342 if_printf(rxr->hn_ifp, "nvs type %u, not RNDIS\n", 7343 nvs_hdr->nvs_type); 7344 return; 7345 } 7346 7347 hlen = VMBUS_CHANPKT_GETLEN(pkthdr->cph_hlen); 7348 if (__predict_false(hlen < sizeof(*pkt))) { 7349 if_printf(rxr->hn_ifp, "invalid rxbuf chanpkt\n"); 7350 return; 7351 } 7352 pkt = (const struct vmbus_chanpkt_rxbuf *)pkthdr; 7353 7354 if (__predict_false(pkt->cp_rxbuf_id != HN_NVS_RXBUF_SIG)) { 7355 if_printf(rxr->hn_ifp, "invalid rxbuf_id 0x%08x\n", 7356 pkt->cp_rxbuf_id); 7357 return; 7358 } 7359 7360 count = pkt->cp_rxbuf_cnt; 7361 if (__predict_false(hlen < 7362 __offsetof(struct vmbus_chanpkt_rxbuf, cp_rxbuf[count]))) { 7363 if_printf(rxr->hn_ifp, "invalid rxbuf_cnt %d\n", count); 7364 return; 7365 } 7366 7367 /* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */ 7368 for (i = 0; i < count; ++i) { 7369 int ofs, len; 7370 7371 ofs = pkt->cp_rxbuf[i].rb_ofs; 7372 len = pkt->cp_rxbuf[i].rb_len; 7373 if (__predict_false(ofs + len > HN_RXBUF_SIZE)) { 7374 if_printf(rxr->hn_ifp, "%dth RNDIS msg overflow rxbuf, " 7375 "ofs %d, len %d\n", i, ofs, len); 7376 continue; 7377 } 7378 hn_rndis_rxpkt(rxr, rxr->hn_rxbuf + ofs, len); 7379 } 7380 7381 /* 7382 * Ack the consumed RXBUF associated w/ this channel packet, 7383 * so that this RXBUF can be recycled by the hypervisor. 7384 */ 7385 hn_nvs_ack_rxbuf(rxr, chan, pkt->cp_hdr.cph_xactid); 7386 } 7387 7388 static void 7389 hn_nvs_ack_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan, 7390 uint64_t tid) 7391 { 7392 struct hn_nvs_rndis_ack ack; 7393 int retries, error; 7394 7395 ack.nvs_type = HN_NVS_TYPE_RNDIS_ACK; 7396 ack.nvs_status = HN_NVS_STATUS_OK; 7397 7398 retries = 0; 7399 again: 7400 error = vmbus_chan_send(chan, VMBUS_CHANPKT_TYPE_COMP, 7401 VMBUS_CHANPKT_FLAG_NONE, &ack, sizeof(ack), tid); 7402 if (__predict_false(error == EAGAIN)) { 7403 /* 7404 * NOTE: 7405 * This should _not_ happen in real world, since the 7406 * consumption of the TX bufring from the TX path is 7407 * controlled. 7408 */ 7409 if (rxr->hn_ack_failed == 0) 7410 if_printf(rxr->hn_ifp, "RXBUF ack retry\n"); 7411 rxr->hn_ack_failed++; 7412 retries++; 7413 if (retries < 10) { 7414 DELAY(100); 7415 goto again; 7416 } 7417 /* RXBUF leaks! */ 7418 if_printf(rxr->hn_ifp, "RXBUF ack failed\n"); 7419 } 7420 } 7421 7422 static void 7423 hn_chan_callback(struct vmbus_channel *chan, void *xrxr) 7424 { 7425 struct hn_rx_ring *rxr = xrxr; 7426 struct hn_softc *sc = rxr->hn_ifp->if_softc; 7427 7428 for (;;) { 7429 struct vmbus_chanpkt_hdr *pkt = rxr->hn_pktbuf; 7430 int error, pktlen; 7431 7432 pktlen = rxr->hn_pktbuf_len; 7433 error = vmbus_chan_recv_pkt(chan, pkt, &pktlen); 7434 if (__predict_false(error == ENOBUFS)) { 7435 void *nbuf; 7436 int nlen; 7437 7438 /* 7439 * Expand channel packet buffer. 7440 * 7441 * XXX 7442 * Use M_WAITOK here, since allocation failure 7443 * is fatal. 7444 */ 7445 nlen = rxr->hn_pktbuf_len * 2; 7446 while (nlen < pktlen) 7447 nlen *= 2; 7448 nbuf = malloc(nlen, M_DEVBUF, M_WAITOK); 7449 7450 if_printf(rxr->hn_ifp, "expand pktbuf %d -> %d\n", 7451 rxr->hn_pktbuf_len, nlen); 7452 7453 free(rxr->hn_pktbuf, M_DEVBUF); 7454 rxr->hn_pktbuf = nbuf; 7455 rxr->hn_pktbuf_len = nlen; 7456 /* Retry! */ 7457 continue; 7458 } else if (__predict_false(error == EAGAIN)) { 7459 /* No more channel packets; done! */ 7460 break; 7461 } 7462 KASSERT(!error, ("vmbus_chan_recv_pkt failed: %d", error)); 7463 7464 switch (pkt->cph_type) { 7465 case VMBUS_CHANPKT_TYPE_COMP: 7466 hn_nvs_handle_comp(sc, chan, pkt); 7467 break; 7468 7469 case VMBUS_CHANPKT_TYPE_RXBUF: 7470 hn_nvs_handle_rxbuf(rxr, chan, pkt); 7471 break; 7472 7473 case VMBUS_CHANPKT_TYPE_INBAND: 7474 hn_nvs_handle_notify(sc, pkt); 7475 break; 7476 7477 default: 7478 if_printf(rxr->hn_ifp, "unknown chan pkt %u\n", 7479 pkt->cph_type); 7480 break; 7481 } 7482 } 7483 hn_chan_rollup(rxr, rxr->hn_txr); 7484 } 7485 7486 static void 7487 hn_sysinit(void *arg __unused) 7488 { 7489 int i; 7490 7491 hn_udpcs_fixup = counter_u64_alloc(M_WAITOK); 7492 7493 #ifdef HN_IFSTART_SUPPORT 7494 /* 7495 * Don't use ifnet.if_start if transparent VF mode is requested; 7496 * mainly due to the IFF_DRV_OACTIVE flag. 7497 */ 7498 if (hn_xpnt_vf && hn_use_if_start) { 7499 hn_use_if_start = 0; 7500 printf("hn: tranparent VF mode, if_transmit will be used, " 7501 "instead of if_start\n"); 7502 } 7503 #endif 7504 if (hn_xpnt_vf_attwait < HN_XPNT_VF_ATTWAIT_MIN) { 7505 printf("hn: invalid transparent VF attach routing " 7506 "wait timeout %d, reset to %d\n", 7507 hn_xpnt_vf_attwait, HN_XPNT_VF_ATTWAIT_MIN); 7508 hn_xpnt_vf_attwait = HN_XPNT_VF_ATTWAIT_MIN; 7509 } 7510 7511 /* 7512 * Initialize VF map. 7513 */ 7514 rm_init_flags(&hn_vfmap_lock, "hn_vfmap", RM_SLEEPABLE); 7515 hn_vfmap_size = HN_VFMAP_SIZE_DEF; 7516 hn_vfmap = malloc(sizeof(struct ifnet *) * hn_vfmap_size, M_DEVBUF, 7517 M_WAITOK | M_ZERO); 7518 7519 /* 7520 * Fix the # of TX taskqueues. 7521 */ 7522 if (hn_tx_taskq_cnt <= 0) 7523 hn_tx_taskq_cnt = 1; 7524 else if (hn_tx_taskq_cnt > mp_ncpus) 7525 hn_tx_taskq_cnt = mp_ncpus; 7526 7527 /* 7528 * Fix the TX taskqueue mode. 7529 */ 7530 switch (hn_tx_taskq_mode) { 7531 case HN_TX_TASKQ_M_INDEP: 7532 case HN_TX_TASKQ_M_GLOBAL: 7533 case HN_TX_TASKQ_M_EVTTQ: 7534 break; 7535 default: 7536 hn_tx_taskq_mode = HN_TX_TASKQ_M_INDEP; 7537 break; 7538 } 7539 7540 if (vm_guest != VM_GUEST_HV) 7541 return; 7542 7543 if (hn_tx_taskq_mode != HN_TX_TASKQ_M_GLOBAL) 7544 return; 7545 7546 hn_tx_taskque = malloc(hn_tx_taskq_cnt * sizeof(struct taskqueue *), 7547 M_DEVBUF, M_WAITOK); 7548 for (i = 0; i < hn_tx_taskq_cnt; ++i) { 7549 hn_tx_taskque[i] = taskqueue_create("hn_tx", M_WAITOK, 7550 taskqueue_thread_enqueue, &hn_tx_taskque[i]); 7551 taskqueue_start_threads(&hn_tx_taskque[i], 1, PI_NET, 7552 "hn tx%d", i); 7553 } 7554 } 7555 SYSINIT(hn_sysinit, SI_SUB_DRIVERS, SI_ORDER_SECOND, hn_sysinit, NULL); 7556 7557 static void 7558 hn_sysuninit(void *arg __unused) 7559 { 7560 7561 if (hn_tx_taskque != NULL) { 7562 int i; 7563 7564 for (i = 0; i < hn_tx_taskq_cnt; ++i) 7565 taskqueue_free(hn_tx_taskque[i]); 7566 free(hn_tx_taskque, M_DEVBUF); 7567 } 7568 7569 if (hn_vfmap != NULL) 7570 free(hn_vfmap, M_DEVBUF); 7571 rm_destroy(&hn_vfmap_lock); 7572 7573 counter_u64_free(hn_udpcs_fixup); 7574 } 7575 SYSUNINIT(hn_sysuninit, SI_SUB_DRIVERS, SI_ORDER_SECOND, hn_sysuninit, NULL); 7576