1 /*- 2 * Copyright (c) 2010-2012 Citrix Inc. 3 * Copyright (c) 2009-2012,2016-2017 Microsoft Corp. 4 * Copyright (c) 2012 NetApp Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /*- 30 * Copyright (c) 2004-2006 Kip Macy 31 * All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 45 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 52 * SUCH DAMAGE. 53 */ 54 55 #include <sys/cdefs.h> 56 __FBSDID("$FreeBSD$"); 57 58 #include "opt_hn.h" 59 #include "opt_inet6.h" 60 #include "opt_inet.h" 61 #include "opt_rss.h" 62 63 #include <sys/param.h> 64 #include <sys/systm.h> 65 #include <sys/bus.h> 66 #include <sys/counter.h> 67 #include <sys/kernel.h> 68 #include <sys/limits.h> 69 #include <sys/malloc.h> 70 #include <sys/mbuf.h> 71 #include <sys/module.h> 72 #include <sys/queue.h> 73 #include <sys/lock.h> 74 #include <sys/rmlock.h> 75 #include <sys/sbuf.h> 76 #include <sys/smp.h> 77 #include <sys/socket.h> 78 #include <sys/sockio.h> 79 #include <sys/sx.h> 80 #include <sys/sysctl.h> 81 #include <sys/taskqueue.h> 82 #include <sys/buf_ring.h> 83 #include <sys/eventhandler.h> 84 85 #include <machine/atomic.h> 86 #include <machine/in_cksum.h> 87 88 #include <net/bpf.h> 89 #include <net/ethernet.h> 90 #include <net/if.h> 91 #include <net/if_dl.h> 92 #include <net/if_media.h> 93 #include <net/if_types.h> 94 #include <net/if_var.h> 95 #include <net/rndis.h> 96 #ifdef RSS 97 #include <net/rss_config.h> 98 #endif 99 100 #include <netinet/in_systm.h> 101 #include <netinet/in.h> 102 #include <netinet/ip.h> 103 #include <netinet/ip6.h> 104 #include <netinet/tcp.h> 105 #include <netinet/tcp_lro.h> 106 #include <netinet/udp.h> 107 108 #include <dev/hyperv/include/hyperv.h> 109 #include <dev/hyperv/include/hyperv_busdma.h> 110 #include <dev/hyperv/include/vmbus.h> 111 #include <dev/hyperv/include/vmbus_xact.h> 112 113 #include <dev/hyperv/netvsc/ndis.h> 114 #include <dev/hyperv/netvsc/if_hnreg.h> 115 #include <dev/hyperv/netvsc/if_hnvar.h> 116 #include <dev/hyperv/netvsc/hn_nvs.h> 117 #include <dev/hyperv/netvsc/hn_rndis.h> 118 119 #include "vmbus_if.h" 120 121 #define HN_IFSTART_SUPPORT 122 123 #define HN_RING_CNT_DEF_MAX 8 124 125 #define HN_VFMAP_SIZE_DEF 8 126 127 #define HN_XPNT_VF_ATTWAIT_MIN 2 /* seconds */ 128 129 /* YYY should get it from the underlying channel */ 130 #define HN_TX_DESC_CNT 512 131 132 #define HN_RNDIS_PKT_LEN \ 133 (sizeof(struct rndis_packet_msg) + \ 134 HN_RNDIS_PKTINFO_SIZE(HN_NDIS_HASH_VALUE_SIZE) + \ 135 HN_RNDIS_PKTINFO_SIZE(NDIS_VLAN_INFO_SIZE) + \ 136 HN_RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) + \ 137 HN_RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE)) 138 #define HN_RNDIS_PKT_BOUNDARY PAGE_SIZE 139 #define HN_RNDIS_PKT_ALIGN CACHE_LINE_SIZE 140 141 #define HN_TX_DATA_BOUNDARY PAGE_SIZE 142 #define HN_TX_DATA_MAXSIZE IP_MAXPACKET 143 #define HN_TX_DATA_SEGSIZE PAGE_SIZE 144 /* -1 for RNDIS packet message */ 145 #define HN_TX_DATA_SEGCNT_MAX (HN_GPACNT_MAX - 1) 146 147 #define HN_DIRECT_TX_SIZE_DEF 128 148 149 #define HN_EARLY_TXEOF_THRESH 8 150 151 #define HN_PKTBUF_LEN_DEF (16 * 1024) 152 153 #define HN_LROENT_CNT_DEF 128 154 155 #define HN_LRO_LENLIM_MULTIRX_DEF (12 * ETHERMTU) 156 #define HN_LRO_LENLIM_DEF (25 * ETHERMTU) 157 /* YYY 2*MTU is a bit rough, but should be good enough. */ 158 #define HN_LRO_LENLIM_MIN(ifp) (2 * (ifp)->if_mtu) 159 160 #define HN_LRO_ACKCNT_DEF 1 161 162 #define HN_LOCK_INIT(sc) \ 163 sx_init(&(sc)->hn_lock, device_get_nameunit((sc)->hn_dev)) 164 #define HN_LOCK_DESTROY(sc) sx_destroy(&(sc)->hn_lock) 165 #define HN_LOCK_ASSERT(sc) sx_assert(&(sc)->hn_lock, SA_XLOCKED) 166 #define HN_LOCK(sc) \ 167 do { \ 168 while (sx_try_xlock(&(sc)->hn_lock) == 0) \ 169 DELAY(1000); \ 170 } while (0) 171 #define HN_UNLOCK(sc) sx_xunlock(&(sc)->hn_lock) 172 173 #define HN_CSUM_IP_MASK (CSUM_IP | CSUM_IP_TCP | CSUM_IP_UDP) 174 #define HN_CSUM_IP6_MASK (CSUM_IP6_TCP | CSUM_IP6_UDP) 175 #define HN_CSUM_IP_HWASSIST(sc) \ 176 ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP_MASK) 177 #define HN_CSUM_IP6_HWASSIST(sc) \ 178 ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP6_MASK) 179 180 #define HN_PKTSIZE_MIN(align) \ 181 roundup2(ETHER_MIN_LEN + ETHER_VLAN_ENCAP_LEN - ETHER_CRC_LEN + \ 182 HN_RNDIS_PKT_LEN, (align)) 183 #define HN_PKTSIZE(m, align) \ 184 roundup2((m)->m_pkthdr.len + HN_RNDIS_PKT_LEN, (align)) 185 186 #ifdef RSS 187 #define HN_RING_IDX2CPU(sc, idx) rss_getcpu((idx) % rss_getnumbuckets()) 188 #else 189 #define HN_RING_IDX2CPU(sc, idx) (((sc)->hn_cpu + (idx)) % mp_ncpus) 190 #endif 191 192 struct hn_txdesc { 193 #ifndef HN_USE_TXDESC_BUFRING 194 SLIST_ENTRY(hn_txdesc) link; 195 #endif 196 STAILQ_ENTRY(hn_txdesc) agg_link; 197 198 /* Aggregated txdescs, in sending order. */ 199 STAILQ_HEAD(, hn_txdesc) agg_list; 200 201 /* The oldest packet, if transmission aggregation happens. */ 202 struct mbuf *m; 203 struct hn_tx_ring *txr; 204 int refs; 205 uint32_t flags; /* HN_TXD_FLAG_ */ 206 struct hn_nvs_sendctx send_ctx; 207 uint32_t chim_index; 208 int chim_size; 209 210 bus_dmamap_t data_dmap; 211 212 bus_addr_t rndis_pkt_paddr; 213 struct rndis_packet_msg *rndis_pkt; 214 bus_dmamap_t rndis_pkt_dmap; 215 }; 216 217 #define HN_TXD_FLAG_ONLIST 0x0001 218 #define HN_TXD_FLAG_DMAMAP 0x0002 219 #define HN_TXD_FLAG_ONAGG 0x0004 220 221 struct hn_rxinfo { 222 uint32_t vlan_info; 223 uint32_t csum_info; 224 uint32_t hash_info; 225 uint32_t hash_value; 226 }; 227 228 struct hn_rxvf_setarg { 229 struct hn_rx_ring *rxr; 230 struct ifnet *vf_ifp; 231 }; 232 233 #define HN_RXINFO_VLAN 0x0001 234 #define HN_RXINFO_CSUM 0x0002 235 #define HN_RXINFO_HASHINF 0x0004 236 #define HN_RXINFO_HASHVAL 0x0008 237 #define HN_RXINFO_ALL \ 238 (HN_RXINFO_VLAN | \ 239 HN_RXINFO_CSUM | \ 240 HN_RXINFO_HASHINF | \ 241 HN_RXINFO_HASHVAL) 242 243 #define HN_NDIS_VLAN_INFO_INVALID 0xffffffff 244 #define HN_NDIS_RXCSUM_INFO_INVALID 0 245 #define HN_NDIS_HASH_INFO_INVALID 0 246 247 static int hn_probe(device_t); 248 static int hn_attach(device_t); 249 static int hn_detach(device_t); 250 static int hn_shutdown(device_t); 251 static void hn_chan_callback(struct vmbus_channel *, 252 void *); 253 254 static void hn_init(void *); 255 static int hn_ioctl(struct ifnet *, u_long, caddr_t); 256 #ifdef HN_IFSTART_SUPPORT 257 static void hn_start(struct ifnet *); 258 #endif 259 static int hn_transmit(struct ifnet *, struct mbuf *); 260 static void hn_xmit_qflush(struct ifnet *); 261 static int hn_ifmedia_upd(struct ifnet *); 262 static void hn_ifmedia_sts(struct ifnet *, 263 struct ifmediareq *); 264 265 static void hn_ifnet_event(void *, struct ifnet *, int); 266 static void hn_ifaddr_event(void *, struct ifnet *); 267 static void hn_ifnet_attevent(void *, struct ifnet *); 268 static void hn_ifnet_detevent(void *, struct ifnet *); 269 static void hn_ifnet_lnkevent(void *, struct ifnet *, int); 270 271 static bool hn_ismyvf(const struct hn_softc *, 272 const struct ifnet *); 273 static void hn_rxvf_change(struct hn_softc *, 274 struct ifnet *, bool); 275 static void hn_rxvf_set(struct hn_softc *, struct ifnet *); 276 static void hn_rxvf_set_task(void *, int); 277 static void hn_xpnt_vf_input(struct ifnet *, struct mbuf *); 278 static int hn_xpnt_vf_iocsetflags(struct hn_softc *); 279 static int hn_xpnt_vf_iocsetcaps(struct hn_softc *, 280 struct ifreq *); 281 static void hn_xpnt_vf_saveifflags(struct hn_softc *); 282 static bool hn_xpnt_vf_isready(struct hn_softc *); 283 static void hn_xpnt_vf_setready(struct hn_softc *); 284 static void hn_xpnt_vf_init_taskfunc(void *, int); 285 static void hn_xpnt_vf_init(struct hn_softc *); 286 static void hn_xpnt_vf_setenable(struct hn_softc *); 287 static void hn_xpnt_vf_setdisable(struct hn_softc *, bool); 288 static void hn_vf_rss_fixup(struct hn_softc *, bool); 289 static void hn_vf_rss_restore(struct hn_softc *); 290 291 static int hn_rndis_rxinfo(const void *, int, 292 struct hn_rxinfo *); 293 static void hn_rndis_rx_data(struct hn_rx_ring *, 294 const void *, int); 295 static void hn_rndis_rx_status(struct hn_softc *, 296 const void *, int); 297 static void hn_rndis_init_fixat(struct hn_softc *, int); 298 299 static void hn_nvs_handle_notify(struct hn_softc *, 300 const struct vmbus_chanpkt_hdr *); 301 static void hn_nvs_handle_comp(struct hn_softc *, 302 struct vmbus_channel *, 303 const struct vmbus_chanpkt_hdr *); 304 static void hn_nvs_handle_rxbuf(struct hn_rx_ring *, 305 struct vmbus_channel *, 306 const struct vmbus_chanpkt_hdr *); 307 static void hn_nvs_ack_rxbuf(struct hn_rx_ring *, 308 struct vmbus_channel *, uint64_t); 309 310 #if __FreeBSD_version >= 1100099 311 static int hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS); 312 static int hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS); 313 #endif 314 static int hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS); 315 static int hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS); 316 #if __FreeBSD_version < 1100095 317 static int hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS); 318 #else 319 static int hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS); 320 #endif 321 static int hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS); 322 static int hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS); 323 static int hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS); 324 static int hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS); 325 static int hn_caps_sysctl(SYSCTL_HANDLER_ARGS); 326 static int hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS); 327 static int hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS); 328 #ifndef RSS 329 static int hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS); 330 static int hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS); 331 #endif 332 static int hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS); 333 static int hn_rss_hcap_sysctl(SYSCTL_HANDLER_ARGS); 334 static int hn_rss_mbuf_sysctl(SYSCTL_HANDLER_ARGS); 335 static int hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS); 336 static int hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS); 337 static int hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS); 338 static int hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS); 339 static int hn_polling_sysctl(SYSCTL_HANDLER_ARGS); 340 static int hn_vf_sysctl(SYSCTL_HANDLER_ARGS); 341 static int hn_rxvf_sysctl(SYSCTL_HANDLER_ARGS); 342 static int hn_vflist_sysctl(SYSCTL_HANDLER_ARGS); 343 static int hn_vfmap_sysctl(SYSCTL_HANDLER_ARGS); 344 static int hn_xpnt_vf_accbpf_sysctl(SYSCTL_HANDLER_ARGS); 345 static int hn_xpnt_vf_enabled_sysctl(SYSCTL_HANDLER_ARGS); 346 347 static void hn_stop(struct hn_softc *, bool); 348 static void hn_init_locked(struct hn_softc *); 349 static int hn_chan_attach(struct hn_softc *, 350 struct vmbus_channel *); 351 static void hn_chan_detach(struct hn_softc *, 352 struct vmbus_channel *); 353 static int hn_attach_subchans(struct hn_softc *); 354 static void hn_detach_allchans(struct hn_softc *); 355 static void hn_chan_rollup(struct hn_rx_ring *, 356 struct hn_tx_ring *); 357 static void hn_set_ring_inuse(struct hn_softc *, int); 358 static int hn_synth_attach(struct hn_softc *, int); 359 static void hn_synth_detach(struct hn_softc *); 360 static int hn_synth_alloc_subchans(struct hn_softc *, 361 int *); 362 static bool hn_synth_attachable(const struct hn_softc *); 363 static void hn_suspend(struct hn_softc *); 364 static void hn_suspend_data(struct hn_softc *); 365 static void hn_suspend_mgmt(struct hn_softc *); 366 static void hn_resume(struct hn_softc *); 367 static void hn_resume_data(struct hn_softc *); 368 static void hn_resume_mgmt(struct hn_softc *); 369 static void hn_suspend_mgmt_taskfunc(void *, int); 370 static void hn_chan_drain(struct hn_softc *, 371 struct vmbus_channel *); 372 static void hn_disable_rx(struct hn_softc *); 373 static void hn_drain_rxtx(struct hn_softc *, int); 374 static void hn_polling(struct hn_softc *, u_int); 375 static void hn_chan_polling(struct vmbus_channel *, u_int); 376 static void hn_mtu_change_fixup(struct hn_softc *); 377 378 static void hn_update_link_status(struct hn_softc *); 379 static void hn_change_network(struct hn_softc *); 380 static void hn_link_taskfunc(void *, int); 381 static void hn_netchg_init_taskfunc(void *, int); 382 static void hn_netchg_status_taskfunc(void *, int); 383 static void hn_link_status(struct hn_softc *); 384 385 static int hn_create_rx_data(struct hn_softc *, int); 386 static void hn_destroy_rx_data(struct hn_softc *); 387 static int hn_check_iplen(const struct mbuf *, int); 388 static void hn_rxpkt_proto(const struct mbuf *, int *, int *); 389 static int hn_set_rxfilter(struct hn_softc *, uint32_t); 390 static int hn_rxfilter_config(struct hn_softc *); 391 static int hn_rss_reconfig(struct hn_softc *); 392 static void hn_rss_ind_fixup(struct hn_softc *); 393 static void hn_rss_mbuf_hash(struct hn_softc *, uint32_t); 394 static int hn_rxpkt(struct hn_rx_ring *, const void *, 395 int, const struct hn_rxinfo *); 396 static uint32_t hn_rss_type_fromndis(uint32_t); 397 static uint32_t hn_rss_type_tondis(uint32_t); 398 399 static int hn_tx_ring_create(struct hn_softc *, int); 400 static void hn_tx_ring_destroy(struct hn_tx_ring *); 401 static int hn_create_tx_data(struct hn_softc *, int); 402 static void hn_fixup_tx_data(struct hn_softc *); 403 static void hn_fixup_rx_data(struct hn_softc *); 404 static void hn_destroy_tx_data(struct hn_softc *); 405 static void hn_txdesc_dmamap_destroy(struct hn_txdesc *); 406 static void hn_txdesc_gc(struct hn_tx_ring *, 407 struct hn_txdesc *); 408 static int hn_encap(struct ifnet *, struct hn_tx_ring *, 409 struct hn_txdesc *, struct mbuf **); 410 static int hn_txpkt(struct ifnet *, struct hn_tx_ring *, 411 struct hn_txdesc *); 412 static void hn_set_chim_size(struct hn_softc *, int); 413 static void hn_set_tso_maxsize(struct hn_softc *, int, int); 414 static bool hn_tx_ring_pending(struct hn_tx_ring *); 415 static void hn_tx_ring_qflush(struct hn_tx_ring *); 416 static void hn_resume_tx(struct hn_softc *, int); 417 static void hn_set_txagg(struct hn_softc *); 418 static void *hn_try_txagg(struct ifnet *, 419 struct hn_tx_ring *, struct hn_txdesc *, 420 int); 421 static int hn_get_txswq_depth(const struct hn_tx_ring *); 422 static void hn_txpkt_done(struct hn_nvs_sendctx *, 423 struct hn_softc *, struct vmbus_channel *, 424 const void *, int); 425 static int hn_txpkt_sglist(struct hn_tx_ring *, 426 struct hn_txdesc *); 427 static int hn_txpkt_chim(struct hn_tx_ring *, 428 struct hn_txdesc *); 429 static int hn_xmit(struct hn_tx_ring *, int); 430 static void hn_xmit_taskfunc(void *, int); 431 static void hn_xmit_txeof(struct hn_tx_ring *); 432 static void hn_xmit_txeof_taskfunc(void *, int); 433 #ifdef HN_IFSTART_SUPPORT 434 static int hn_start_locked(struct hn_tx_ring *, int); 435 static void hn_start_taskfunc(void *, int); 436 static void hn_start_txeof(struct hn_tx_ring *); 437 static void hn_start_txeof_taskfunc(void *, int); 438 #endif 439 440 SYSCTL_NODE(_hw, OID_AUTO, hn, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 441 "Hyper-V network interface"); 442 443 /* Trust tcp segements verification on host side. */ 444 static int hn_trust_hosttcp = 1; 445 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hosttcp, CTLFLAG_RDTUN, 446 &hn_trust_hosttcp, 0, 447 "Trust tcp segement verification on host side, " 448 "when csum info is missing (global setting)"); 449 450 /* Trust udp datagrams verification on host side. */ 451 static int hn_trust_hostudp = 1; 452 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostudp, CTLFLAG_RDTUN, 453 &hn_trust_hostudp, 0, 454 "Trust udp datagram verification on host side, " 455 "when csum info is missing (global setting)"); 456 457 /* Trust ip packets verification on host side. */ 458 static int hn_trust_hostip = 1; 459 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostip, CTLFLAG_RDTUN, 460 &hn_trust_hostip, 0, 461 "Trust ip packet verification on host side, " 462 "when csum info is missing (global setting)"); 463 464 /* 465 * Offload UDP/IPv4 checksum. 466 */ 467 static int hn_enable_udp4cs = 1; 468 SYSCTL_INT(_hw_hn, OID_AUTO, enable_udp4cs, CTLFLAG_RDTUN, 469 &hn_enable_udp4cs, 0, "Offload UDP/IPv4 checksum"); 470 471 /* 472 * Offload UDP/IPv6 checksum. 473 */ 474 static int hn_enable_udp6cs = 1; 475 SYSCTL_INT(_hw_hn, OID_AUTO, enable_udp6cs, CTLFLAG_RDTUN, 476 &hn_enable_udp6cs, 0, "Offload UDP/IPv6 checksum"); 477 478 /* Stats. */ 479 static counter_u64_t hn_udpcs_fixup; 480 SYSCTL_COUNTER_U64(_hw_hn, OID_AUTO, udpcs_fixup, CTLFLAG_RW, 481 &hn_udpcs_fixup, "# of UDP checksum fixup"); 482 483 /* 484 * See hn_set_hlen(). 485 * 486 * This value is for Azure. For Hyper-V, set this above 487 * 65536 to disable UDP datagram checksum fixup. 488 */ 489 static int hn_udpcs_fixup_mtu = 1420; 490 SYSCTL_INT(_hw_hn, OID_AUTO, udpcs_fixup_mtu, CTLFLAG_RWTUN, 491 &hn_udpcs_fixup_mtu, 0, "UDP checksum fixup MTU threshold"); 492 493 /* Limit TSO burst size */ 494 static int hn_tso_maxlen = IP_MAXPACKET; 495 SYSCTL_INT(_hw_hn, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN, 496 &hn_tso_maxlen, 0, "TSO burst limit"); 497 498 /* Limit chimney send size */ 499 static int hn_tx_chimney_size = 0; 500 SYSCTL_INT(_hw_hn, OID_AUTO, tx_chimney_size, CTLFLAG_RDTUN, 501 &hn_tx_chimney_size, 0, "Chimney send packet size limit"); 502 503 /* Limit the size of packet for direct transmission */ 504 static int hn_direct_tx_size = HN_DIRECT_TX_SIZE_DEF; 505 SYSCTL_INT(_hw_hn, OID_AUTO, direct_tx_size, CTLFLAG_RDTUN, 506 &hn_direct_tx_size, 0, "Size of the packet for direct transmission"); 507 508 /* # of LRO entries per RX ring */ 509 #if defined(INET) || defined(INET6) 510 #if __FreeBSD_version >= 1100095 511 static int hn_lro_entry_count = HN_LROENT_CNT_DEF; 512 SYSCTL_INT(_hw_hn, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN, 513 &hn_lro_entry_count, 0, "LRO entry count"); 514 #endif 515 #endif 516 517 static int hn_tx_taskq_cnt = 1; 518 SYSCTL_INT(_hw_hn, OID_AUTO, tx_taskq_cnt, CTLFLAG_RDTUN, 519 &hn_tx_taskq_cnt, 0, "# of TX taskqueues"); 520 521 #define HN_TX_TASKQ_M_INDEP 0 522 #define HN_TX_TASKQ_M_GLOBAL 1 523 #define HN_TX_TASKQ_M_EVTTQ 2 524 525 static int hn_tx_taskq_mode = HN_TX_TASKQ_M_INDEP; 526 SYSCTL_INT(_hw_hn, OID_AUTO, tx_taskq_mode, CTLFLAG_RDTUN, 527 &hn_tx_taskq_mode, 0, "TX taskqueue modes: " 528 "0 - independent, 1 - share global tx taskqs, 2 - share event taskqs"); 529 530 #ifndef HN_USE_TXDESC_BUFRING 531 static int hn_use_txdesc_bufring = 0; 532 #else 533 static int hn_use_txdesc_bufring = 1; 534 #endif 535 SYSCTL_INT(_hw_hn, OID_AUTO, use_txdesc_bufring, CTLFLAG_RD, 536 &hn_use_txdesc_bufring, 0, "Use buf_ring for TX descriptors"); 537 538 #ifdef HN_IFSTART_SUPPORT 539 /* Use ifnet.if_start instead of ifnet.if_transmit */ 540 static int hn_use_if_start = 0; 541 SYSCTL_INT(_hw_hn, OID_AUTO, use_if_start, CTLFLAG_RDTUN, 542 &hn_use_if_start, 0, "Use if_start TX method"); 543 #endif 544 545 /* # of channels to use */ 546 static int hn_chan_cnt = 0; 547 SYSCTL_INT(_hw_hn, OID_AUTO, chan_cnt, CTLFLAG_RDTUN, 548 &hn_chan_cnt, 0, 549 "# of channels to use; each channel has one RX ring and one TX ring"); 550 551 /* # of transmit rings to use */ 552 static int hn_tx_ring_cnt = 0; 553 SYSCTL_INT(_hw_hn, OID_AUTO, tx_ring_cnt, CTLFLAG_RDTUN, 554 &hn_tx_ring_cnt, 0, "# of TX rings to use"); 555 556 /* Software TX ring deptch */ 557 static int hn_tx_swq_depth = 0; 558 SYSCTL_INT(_hw_hn, OID_AUTO, tx_swq_depth, CTLFLAG_RDTUN, 559 &hn_tx_swq_depth, 0, "Depth of IFQ or BUFRING"); 560 561 /* Enable sorted LRO, and the depth of the per-channel mbuf queue */ 562 #if __FreeBSD_version >= 1100095 563 static u_int hn_lro_mbufq_depth = 0; 564 SYSCTL_UINT(_hw_hn, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN, 565 &hn_lro_mbufq_depth, 0, "Depth of LRO mbuf queue"); 566 #endif 567 568 /* Packet transmission aggregation size limit */ 569 static int hn_tx_agg_size = -1; 570 SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_size, CTLFLAG_RDTUN, 571 &hn_tx_agg_size, 0, "Packet transmission aggregation size limit"); 572 573 /* Packet transmission aggregation count limit */ 574 static int hn_tx_agg_pkts = -1; 575 SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_pkts, CTLFLAG_RDTUN, 576 &hn_tx_agg_pkts, 0, "Packet transmission aggregation packet limit"); 577 578 /* VF list */ 579 SYSCTL_PROC(_hw_hn, OID_AUTO, vflist, CTLFLAG_RD | CTLTYPE_STRING, 580 0, 0, hn_vflist_sysctl, "A", "VF list"); 581 582 /* VF mapping */ 583 SYSCTL_PROC(_hw_hn, OID_AUTO, vfmap, CTLFLAG_RD | CTLTYPE_STRING, 584 0, 0, hn_vfmap_sysctl, "A", "VF mapping"); 585 586 /* Transparent VF */ 587 static int hn_xpnt_vf = 1; 588 SYSCTL_INT(_hw_hn, OID_AUTO, vf_transparent, CTLFLAG_RDTUN, 589 &hn_xpnt_vf, 0, "Transparent VF mod"); 590 591 /* Accurate BPF support for Transparent VF */ 592 static int hn_xpnt_vf_accbpf = 0; 593 SYSCTL_INT(_hw_hn, OID_AUTO, vf_xpnt_accbpf, CTLFLAG_RDTUN, 594 &hn_xpnt_vf_accbpf, 0, "Accurate BPF for transparent VF"); 595 596 /* Extra wait for transparent VF attach routing; unit seconds. */ 597 static int hn_xpnt_vf_attwait = HN_XPNT_VF_ATTWAIT_MIN; 598 SYSCTL_INT(_hw_hn, OID_AUTO, vf_xpnt_attwait, CTLFLAG_RWTUN, 599 &hn_xpnt_vf_attwait, 0, 600 "Extra wait for transparent VF attach routing; unit: seconds"); 601 602 static u_int hn_cpu_index; /* next CPU for channel */ 603 static struct taskqueue **hn_tx_taskque;/* shared TX taskqueues */ 604 605 static struct rmlock hn_vfmap_lock; 606 static int hn_vfmap_size; 607 static struct ifnet **hn_vfmap; 608 609 #ifndef RSS 610 static const uint8_t 611 hn_rss_key_default[NDIS_HASH_KEYSIZE_TOEPLITZ] = { 612 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 613 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 614 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, 615 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, 616 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa 617 }; 618 #endif /* !RSS */ 619 620 static const struct hyperv_guid hn_guid = { 621 .hv_guid = { 622 0x63, 0x51, 0x61, 0xf8, 0x3e, 0xdf, 0xc5, 0x46, 623 0x91, 0x3f, 0xf2, 0xd2, 0xf9, 0x65, 0xed, 0x0e } 624 }; 625 626 static device_method_t hn_methods[] = { 627 /* Device interface */ 628 DEVMETHOD(device_probe, hn_probe), 629 DEVMETHOD(device_attach, hn_attach), 630 DEVMETHOD(device_detach, hn_detach), 631 DEVMETHOD(device_shutdown, hn_shutdown), 632 DEVMETHOD_END 633 }; 634 635 static driver_t hn_driver = { 636 "hn", 637 hn_methods, 638 sizeof(struct hn_softc) 639 }; 640 641 static devclass_t hn_devclass; 642 643 DRIVER_MODULE(hn, vmbus, hn_driver, hn_devclass, 0, 0); 644 MODULE_VERSION(hn, 1); 645 MODULE_DEPEND(hn, vmbus, 1, 1, 1); 646 647 #if __FreeBSD_version >= 1100099 648 static void 649 hn_set_lro_lenlim(struct hn_softc *sc, int lenlim) 650 { 651 int i; 652 653 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 654 sc->hn_rx_ring[i].hn_lro.lro_length_lim = lenlim; 655 } 656 #endif 657 658 static int 659 hn_txpkt_sglist(struct hn_tx_ring *txr, struct hn_txdesc *txd) 660 { 661 662 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID && 663 txd->chim_size == 0, ("invalid rndis sglist txd")); 664 return (hn_nvs_send_rndis_sglist(txr->hn_chan, HN_NVS_RNDIS_MTYPE_DATA, 665 &txd->send_ctx, txr->hn_gpa, txr->hn_gpa_cnt)); 666 } 667 668 static int 669 hn_txpkt_chim(struct hn_tx_ring *txr, struct hn_txdesc *txd) 670 { 671 struct hn_nvs_rndis rndis; 672 673 KASSERT(txd->chim_index != HN_NVS_CHIM_IDX_INVALID && 674 txd->chim_size > 0, ("invalid rndis chim txd")); 675 676 rndis.nvs_type = HN_NVS_TYPE_RNDIS; 677 rndis.nvs_rndis_mtype = HN_NVS_RNDIS_MTYPE_DATA; 678 rndis.nvs_chim_idx = txd->chim_index; 679 rndis.nvs_chim_sz = txd->chim_size; 680 681 return (hn_nvs_send(txr->hn_chan, VMBUS_CHANPKT_FLAG_RC, 682 &rndis, sizeof(rndis), &txd->send_ctx)); 683 } 684 685 static __inline uint32_t 686 hn_chim_alloc(struct hn_softc *sc) 687 { 688 int i, bmap_cnt = sc->hn_chim_bmap_cnt; 689 u_long *bmap = sc->hn_chim_bmap; 690 uint32_t ret = HN_NVS_CHIM_IDX_INVALID; 691 692 for (i = 0; i < bmap_cnt; ++i) { 693 int idx; 694 695 idx = ffsl(~bmap[i]); 696 if (idx == 0) 697 continue; 698 699 --idx; /* ffsl is 1-based */ 700 KASSERT(i * LONG_BIT + idx < sc->hn_chim_cnt, 701 ("invalid i %d and idx %d", i, idx)); 702 703 if (atomic_testandset_long(&bmap[i], idx)) 704 continue; 705 706 ret = i * LONG_BIT + idx; 707 break; 708 } 709 return (ret); 710 } 711 712 static __inline void 713 hn_chim_free(struct hn_softc *sc, uint32_t chim_idx) 714 { 715 u_long mask; 716 uint32_t idx; 717 718 idx = chim_idx / LONG_BIT; 719 KASSERT(idx < sc->hn_chim_bmap_cnt, 720 ("invalid chimney index 0x%x", chim_idx)); 721 722 mask = 1UL << (chim_idx % LONG_BIT); 723 KASSERT(sc->hn_chim_bmap[idx] & mask, 724 ("index bitmap 0x%lx, chimney index %u, " 725 "bitmap idx %d, bitmask 0x%lx", 726 sc->hn_chim_bmap[idx], chim_idx, idx, mask)); 727 728 atomic_clear_long(&sc->hn_chim_bmap[idx], mask); 729 } 730 731 #if defined(INET6) || defined(INET) 732 733 #define PULLUP_HDR(m, len) \ 734 do { \ 735 if (__predict_false((m)->m_len < (len))) { \ 736 (m) = m_pullup((m), (len)); \ 737 if ((m) == NULL) \ 738 return (NULL); \ 739 } \ 740 } while (0) 741 742 /* 743 * NOTE: If this function failed, the m_head would be freed. 744 */ 745 static __inline struct mbuf * 746 hn_tso_fixup(struct mbuf *m_head) 747 { 748 struct ether_vlan_header *evl; 749 struct tcphdr *th; 750 int ehlen; 751 752 KASSERT(M_WRITABLE(m_head), ("TSO mbuf not writable")); 753 754 PULLUP_HDR(m_head, sizeof(*evl)); 755 evl = mtod(m_head, struct ether_vlan_header *); 756 if (evl->evl_encap_proto == ntohs(ETHERTYPE_VLAN)) 757 ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 758 else 759 ehlen = ETHER_HDR_LEN; 760 m_head->m_pkthdr.l2hlen = ehlen; 761 762 #ifdef INET 763 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) { 764 struct ip *ip; 765 int iphlen; 766 767 PULLUP_HDR(m_head, ehlen + sizeof(*ip)); 768 ip = mtodo(m_head, ehlen); 769 iphlen = ip->ip_hl << 2; 770 m_head->m_pkthdr.l3hlen = iphlen; 771 772 PULLUP_HDR(m_head, ehlen + iphlen + sizeof(*th)); 773 th = mtodo(m_head, ehlen + iphlen); 774 775 ip->ip_len = 0; 776 ip->ip_sum = 0; 777 th->th_sum = in_pseudo(ip->ip_src.s_addr, 778 ip->ip_dst.s_addr, htons(IPPROTO_TCP)); 779 } 780 #endif 781 #if defined(INET6) && defined(INET) 782 else 783 #endif 784 #ifdef INET6 785 { 786 struct ip6_hdr *ip6; 787 788 PULLUP_HDR(m_head, ehlen + sizeof(*ip6)); 789 ip6 = mtodo(m_head, ehlen); 790 if (ip6->ip6_nxt != IPPROTO_TCP) { 791 m_freem(m_head); 792 return (NULL); 793 } 794 m_head->m_pkthdr.l3hlen = sizeof(*ip6); 795 796 PULLUP_HDR(m_head, ehlen + sizeof(*ip6) + sizeof(*th)); 797 th = mtodo(m_head, ehlen + sizeof(*ip6)); 798 799 ip6->ip6_plen = 0; 800 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); 801 } 802 #endif 803 return (m_head); 804 } 805 806 /* 807 * NOTE: If this function failed, the m_head would be freed. 808 */ 809 static __inline struct mbuf * 810 hn_set_hlen(struct mbuf *m_head) 811 { 812 const struct ether_vlan_header *evl; 813 int ehlen; 814 815 PULLUP_HDR(m_head, sizeof(*evl)); 816 evl = mtod(m_head, const struct ether_vlan_header *); 817 if (evl->evl_encap_proto == ntohs(ETHERTYPE_VLAN)) 818 ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 819 else 820 ehlen = ETHER_HDR_LEN; 821 m_head->m_pkthdr.l2hlen = ehlen; 822 823 #ifdef INET 824 if (m_head->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP_UDP)) { 825 const struct ip *ip; 826 int iphlen; 827 828 PULLUP_HDR(m_head, ehlen + sizeof(*ip)); 829 ip = mtodo(m_head, ehlen); 830 iphlen = ip->ip_hl << 2; 831 m_head->m_pkthdr.l3hlen = iphlen; 832 833 /* 834 * UDP checksum offload does not work in Azure, if the 835 * following conditions meet: 836 * - sizeof(IP hdr + UDP hdr + payload) > 1420. 837 * - IP_DF is not set in the IP hdr. 838 * 839 * Fallback to software checksum for these UDP datagrams. 840 */ 841 if ((m_head->m_pkthdr.csum_flags & CSUM_IP_UDP) && 842 m_head->m_pkthdr.len > hn_udpcs_fixup_mtu + ehlen && 843 (ntohs(ip->ip_off) & IP_DF) == 0) { 844 uint16_t off = ehlen + iphlen; 845 846 counter_u64_add(hn_udpcs_fixup, 1); 847 PULLUP_HDR(m_head, off + sizeof(struct udphdr)); 848 *(uint16_t *)(m_head->m_data + off + 849 m_head->m_pkthdr.csum_data) = in_cksum_skip( 850 m_head, m_head->m_pkthdr.len, off); 851 m_head->m_pkthdr.csum_flags &= ~CSUM_IP_UDP; 852 } 853 } 854 #endif 855 #if defined(INET6) && defined(INET) 856 else 857 #endif 858 #ifdef INET6 859 { 860 const struct ip6_hdr *ip6; 861 862 PULLUP_HDR(m_head, ehlen + sizeof(*ip6)); 863 ip6 = mtodo(m_head, ehlen); 864 if (ip6->ip6_nxt != IPPROTO_TCP) { 865 m_freem(m_head); 866 return (NULL); 867 } 868 m_head->m_pkthdr.l3hlen = sizeof(*ip6); 869 } 870 #endif 871 return (m_head); 872 } 873 874 /* 875 * NOTE: If this function failed, the m_head would be freed. 876 */ 877 static __inline struct mbuf * 878 hn_check_tcpsyn(struct mbuf *m_head, int *tcpsyn) 879 { 880 const struct tcphdr *th; 881 int ehlen, iphlen; 882 883 *tcpsyn = 0; 884 ehlen = m_head->m_pkthdr.l2hlen; 885 iphlen = m_head->m_pkthdr.l3hlen; 886 887 PULLUP_HDR(m_head, ehlen + iphlen + sizeof(*th)); 888 th = mtodo(m_head, ehlen + iphlen); 889 if (th->th_flags & TH_SYN) 890 *tcpsyn = 1; 891 return (m_head); 892 } 893 894 #undef PULLUP_HDR 895 896 #endif /* INET6 || INET */ 897 898 static int 899 hn_set_rxfilter(struct hn_softc *sc, uint32_t filter) 900 { 901 int error = 0; 902 903 HN_LOCK_ASSERT(sc); 904 905 if (sc->hn_rx_filter != filter) { 906 error = hn_rndis_set_rxfilter(sc, filter); 907 if (!error) 908 sc->hn_rx_filter = filter; 909 } 910 return (error); 911 } 912 913 static int 914 hn_rxfilter_config(struct hn_softc *sc) 915 { 916 struct ifnet *ifp = sc->hn_ifp; 917 uint32_t filter; 918 919 HN_LOCK_ASSERT(sc); 920 921 /* 922 * If the non-transparent mode VF is activated, we don't know how 923 * its RX filter is configured, so stick the synthetic device in 924 * the promiscous mode. 925 */ 926 if ((ifp->if_flags & IFF_PROMISC) || (sc->hn_flags & HN_FLAG_RXVF)) { 927 filter = NDIS_PACKET_TYPE_PROMISCUOUS; 928 } else { 929 filter = NDIS_PACKET_TYPE_DIRECTED; 930 if (ifp->if_flags & IFF_BROADCAST) 931 filter |= NDIS_PACKET_TYPE_BROADCAST; 932 /* TODO: support multicast list */ 933 if ((ifp->if_flags & IFF_ALLMULTI) || 934 !CK_STAILQ_EMPTY(&ifp->if_multiaddrs)) 935 filter |= NDIS_PACKET_TYPE_ALL_MULTICAST; 936 } 937 return (hn_set_rxfilter(sc, filter)); 938 } 939 940 static void 941 hn_set_txagg(struct hn_softc *sc) 942 { 943 uint32_t size, pkts; 944 int i; 945 946 /* 947 * Setup aggregation size. 948 */ 949 if (sc->hn_agg_size < 0) 950 size = UINT32_MAX; 951 else 952 size = sc->hn_agg_size; 953 954 if (sc->hn_rndis_agg_size < size) 955 size = sc->hn_rndis_agg_size; 956 957 /* NOTE: We only aggregate packets using chimney sending buffers. */ 958 if (size > (uint32_t)sc->hn_chim_szmax) 959 size = sc->hn_chim_szmax; 960 961 if (size <= 2 * HN_PKTSIZE_MIN(sc->hn_rndis_agg_align)) { 962 /* Disable */ 963 size = 0; 964 pkts = 0; 965 goto done; 966 } 967 968 /* NOTE: Type of the per TX ring setting is 'int'. */ 969 if (size > INT_MAX) 970 size = INT_MAX; 971 972 /* 973 * Setup aggregation packet count. 974 */ 975 if (sc->hn_agg_pkts < 0) 976 pkts = UINT32_MAX; 977 else 978 pkts = sc->hn_agg_pkts; 979 980 if (sc->hn_rndis_agg_pkts < pkts) 981 pkts = sc->hn_rndis_agg_pkts; 982 983 if (pkts <= 1) { 984 /* Disable */ 985 size = 0; 986 pkts = 0; 987 goto done; 988 } 989 990 /* NOTE: Type of the per TX ring setting is 'short'. */ 991 if (pkts > SHRT_MAX) 992 pkts = SHRT_MAX; 993 994 done: 995 /* NOTE: Type of the per TX ring setting is 'short'. */ 996 if (sc->hn_rndis_agg_align > SHRT_MAX) { 997 /* Disable */ 998 size = 0; 999 pkts = 0; 1000 } 1001 1002 if (bootverbose) { 1003 if_printf(sc->hn_ifp, "TX agg size %u, pkts %u, align %u\n", 1004 size, pkts, sc->hn_rndis_agg_align); 1005 } 1006 1007 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 1008 struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; 1009 1010 mtx_lock(&txr->hn_tx_lock); 1011 txr->hn_agg_szmax = size; 1012 txr->hn_agg_pktmax = pkts; 1013 txr->hn_agg_align = sc->hn_rndis_agg_align; 1014 mtx_unlock(&txr->hn_tx_lock); 1015 } 1016 } 1017 1018 static int 1019 hn_get_txswq_depth(const struct hn_tx_ring *txr) 1020 { 1021 1022 KASSERT(txr->hn_txdesc_cnt > 0, ("tx ring is not setup yet")); 1023 if (hn_tx_swq_depth < txr->hn_txdesc_cnt) 1024 return txr->hn_txdesc_cnt; 1025 return hn_tx_swq_depth; 1026 } 1027 1028 static int 1029 hn_rss_reconfig(struct hn_softc *sc) 1030 { 1031 int error; 1032 1033 HN_LOCK_ASSERT(sc); 1034 1035 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) 1036 return (ENXIO); 1037 1038 /* 1039 * Disable RSS first. 1040 * 1041 * NOTE: 1042 * Direct reconfiguration by setting the UNCHG flags does 1043 * _not_ work properly. 1044 */ 1045 if (bootverbose) 1046 if_printf(sc->hn_ifp, "disable RSS\n"); 1047 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_DISABLE); 1048 if (error) { 1049 if_printf(sc->hn_ifp, "RSS disable failed\n"); 1050 return (error); 1051 } 1052 1053 /* 1054 * Reenable the RSS w/ the updated RSS key or indirect 1055 * table. 1056 */ 1057 if (bootverbose) 1058 if_printf(sc->hn_ifp, "reconfig RSS\n"); 1059 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE); 1060 if (error) { 1061 if_printf(sc->hn_ifp, "RSS reconfig failed\n"); 1062 return (error); 1063 } 1064 return (0); 1065 } 1066 1067 static void 1068 hn_rss_ind_fixup(struct hn_softc *sc) 1069 { 1070 struct ndis_rssprm_toeplitz *rss = &sc->hn_rss; 1071 int i, nchan; 1072 1073 nchan = sc->hn_rx_ring_inuse; 1074 KASSERT(nchan > 1, ("invalid # of channels %d", nchan)); 1075 1076 /* 1077 * Check indirect table to make sure that all channels in it 1078 * can be used. 1079 */ 1080 for (i = 0; i < NDIS_HASH_INDCNT; ++i) { 1081 if (rss->rss_ind[i] >= nchan) { 1082 if_printf(sc->hn_ifp, 1083 "RSS indirect table %d fixup: %u -> %d\n", 1084 i, rss->rss_ind[i], nchan - 1); 1085 rss->rss_ind[i] = nchan - 1; 1086 } 1087 } 1088 } 1089 1090 static int 1091 hn_ifmedia_upd(struct ifnet *ifp __unused) 1092 { 1093 1094 return EOPNOTSUPP; 1095 } 1096 1097 static void 1098 hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) 1099 { 1100 struct hn_softc *sc = ifp->if_softc; 1101 1102 ifmr->ifm_status = IFM_AVALID; 1103 ifmr->ifm_active = IFM_ETHER; 1104 1105 if ((sc->hn_link_flags & HN_LINK_FLAG_LINKUP) == 0) { 1106 ifmr->ifm_active |= IFM_NONE; 1107 return; 1108 } 1109 ifmr->ifm_status |= IFM_ACTIVE; 1110 ifmr->ifm_active |= IFM_10G_T | IFM_FDX; 1111 } 1112 1113 static void 1114 hn_rxvf_set_task(void *xarg, int pending __unused) 1115 { 1116 struct hn_rxvf_setarg *arg = xarg; 1117 1118 arg->rxr->hn_rxvf_ifp = arg->vf_ifp; 1119 } 1120 1121 static void 1122 hn_rxvf_set(struct hn_softc *sc, struct ifnet *vf_ifp) 1123 { 1124 struct hn_rx_ring *rxr; 1125 struct hn_rxvf_setarg arg; 1126 struct task task; 1127 int i; 1128 1129 HN_LOCK_ASSERT(sc); 1130 1131 TASK_INIT(&task, 0, hn_rxvf_set_task, &arg); 1132 1133 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 1134 rxr = &sc->hn_rx_ring[i]; 1135 1136 if (i < sc->hn_rx_ring_inuse) { 1137 arg.rxr = rxr; 1138 arg.vf_ifp = vf_ifp; 1139 vmbus_chan_run_task(rxr->hn_chan, &task); 1140 } else { 1141 rxr->hn_rxvf_ifp = vf_ifp; 1142 } 1143 } 1144 } 1145 1146 static bool 1147 hn_ismyvf(const struct hn_softc *sc, const struct ifnet *ifp) 1148 { 1149 const struct ifnet *hn_ifp; 1150 1151 hn_ifp = sc->hn_ifp; 1152 1153 if (ifp == hn_ifp) 1154 return (false); 1155 1156 if (ifp->if_alloctype != IFT_ETHER) 1157 return (false); 1158 1159 /* Ignore lagg/vlan interfaces */ 1160 if (strcmp(ifp->if_dname, "lagg") == 0 || 1161 strcmp(ifp->if_dname, "vlan") == 0) 1162 return (false); 1163 1164 /* 1165 * During detach events ifp->if_addr might be NULL. 1166 * Make sure the bcmp() below doesn't panic on that: 1167 */ 1168 if (ifp->if_addr == NULL || hn_ifp->if_addr == NULL) 1169 return (false); 1170 1171 if (bcmp(IF_LLADDR(ifp), IF_LLADDR(hn_ifp), ETHER_ADDR_LEN) != 0) 1172 return (false); 1173 1174 return (true); 1175 } 1176 1177 static void 1178 hn_rxvf_change(struct hn_softc *sc, struct ifnet *ifp, bool rxvf) 1179 { 1180 struct ifnet *hn_ifp; 1181 1182 HN_LOCK(sc); 1183 1184 if (!(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED)) 1185 goto out; 1186 1187 if (!hn_ismyvf(sc, ifp)) 1188 goto out; 1189 hn_ifp = sc->hn_ifp; 1190 1191 if (rxvf) { 1192 if (sc->hn_flags & HN_FLAG_RXVF) 1193 goto out; 1194 1195 sc->hn_flags |= HN_FLAG_RXVF; 1196 hn_rxfilter_config(sc); 1197 } else { 1198 if (!(sc->hn_flags & HN_FLAG_RXVF)) 1199 goto out; 1200 1201 sc->hn_flags &= ~HN_FLAG_RXVF; 1202 if (hn_ifp->if_drv_flags & IFF_DRV_RUNNING) 1203 hn_rxfilter_config(sc); 1204 else 1205 hn_set_rxfilter(sc, NDIS_PACKET_TYPE_NONE); 1206 } 1207 1208 hn_nvs_set_datapath(sc, 1209 rxvf ? HN_NVS_DATAPATH_VF : HN_NVS_DATAPATH_SYNTH); 1210 1211 hn_rxvf_set(sc, rxvf ? ifp : NULL); 1212 1213 if (rxvf) { 1214 hn_vf_rss_fixup(sc, true); 1215 hn_suspend_mgmt(sc); 1216 sc->hn_link_flags &= 1217 ~(HN_LINK_FLAG_LINKUP | HN_LINK_FLAG_NETCHG); 1218 if_link_state_change(hn_ifp, LINK_STATE_DOWN); 1219 } else { 1220 hn_vf_rss_restore(sc); 1221 hn_resume_mgmt(sc); 1222 } 1223 1224 devctl_notify("HYPERV_NIC_VF", hn_ifp->if_xname, 1225 rxvf ? "VF_UP" : "VF_DOWN", NULL); 1226 1227 if (bootverbose) { 1228 if_printf(hn_ifp, "datapath is switched %s %s\n", 1229 rxvf ? "to" : "from", ifp->if_xname); 1230 } 1231 out: 1232 HN_UNLOCK(sc); 1233 } 1234 1235 static void 1236 hn_ifnet_event(void *arg, struct ifnet *ifp, int event) 1237 { 1238 1239 if (event != IFNET_EVENT_UP && event != IFNET_EVENT_DOWN) 1240 return; 1241 hn_rxvf_change(arg, ifp, event == IFNET_EVENT_UP); 1242 } 1243 1244 static void 1245 hn_ifaddr_event(void *arg, struct ifnet *ifp) 1246 { 1247 1248 hn_rxvf_change(arg, ifp, ifp->if_flags & IFF_UP); 1249 } 1250 1251 static int 1252 hn_xpnt_vf_iocsetcaps(struct hn_softc *sc, struct ifreq *ifr) 1253 { 1254 struct ifnet *ifp, *vf_ifp; 1255 uint64_t tmp; 1256 int error; 1257 1258 HN_LOCK_ASSERT(sc); 1259 ifp = sc->hn_ifp; 1260 vf_ifp = sc->hn_vf_ifp; 1261 1262 /* 1263 * Fix up requested capabilities w/ supported capabilities, 1264 * since the supported capabilities could have been changed. 1265 */ 1266 ifr->ifr_reqcap &= ifp->if_capabilities; 1267 /* Pass SIOCSIFCAP to VF. */ 1268 error = vf_ifp->if_ioctl(vf_ifp, SIOCSIFCAP, (caddr_t)ifr); 1269 1270 /* 1271 * NOTE: 1272 * The error will be propagated to the callers, however, it 1273 * is _not_ useful here. 1274 */ 1275 1276 /* 1277 * Merge VF's enabled capabilities. 1278 */ 1279 ifp->if_capenable = vf_ifp->if_capenable & ifp->if_capabilities; 1280 1281 tmp = vf_ifp->if_hwassist & HN_CSUM_IP_HWASSIST(sc); 1282 if (ifp->if_capenable & IFCAP_TXCSUM) 1283 ifp->if_hwassist |= tmp; 1284 else 1285 ifp->if_hwassist &= ~tmp; 1286 1287 tmp = vf_ifp->if_hwassist & HN_CSUM_IP6_HWASSIST(sc); 1288 if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) 1289 ifp->if_hwassist |= tmp; 1290 else 1291 ifp->if_hwassist &= ~tmp; 1292 1293 tmp = vf_ifp->if_hwassist & CSUM_IP_TSO; 1294 if (ifp->if_capenable & IFCAP_TSO4) 1295 ifp->if_hwassist |= tmp; 1296 else 1297 ifp->if_hwassist &= ~tmp; 1298 1299 tmp = vf_ifp->if_hwassist & CSUM_IP6_TSO; 1300 if (ifp->if_capenable & IFCAP_TSO6) 1301 ifp->if_hwassist |= tmp; 1302 else 1303 ifp->if_hwassist &= ~tmp; 1304 1305 return (error); 1306 } 1307 1308 static int 1309 hn_xpnt_vf_iocsetflags(struct hn_softc *sc) 1310 { 1311 struct ifnet *vf_ifp; 1312 struct ifreq ifr; 1313 1314 HN_LOCK_ASSERT(sc); 1315 vf_ifp = sc->hn_vf_ifp; 1316 1317 memset(&ifr, 0, sizeof(ifr)); 1318 strlcpy(ifr.ifr_name, vf_ifp->if_xname, sizeof(ifr.ifr_name)); 1319 ifr.ifr_flags = vf_ifp->if_flags & 0xffff; 1320 ifr.ifr_flagshigh = vf_ifp->if_flags >> 16; 1321 return (vf_ifp->if_ioctl(vf_ifp, SIOCSIFFLAGS, (caddr_t)&ifr)); 1322 } 1323 1324 static void 1325 hn_xpnt_vf_saveifflags(struct hn_softc *sc) 1326 { 1327 struct ifnet *ifp = sc->hn_ifp; 1328 int allmulti = 0; 1329 1330 HN_LOCK_ASSERT(sc); 1331 1332 /* XXX vlan(4) style mcast addr maintenance */ 1333 if (!CK_STAILQ_EMPTY(&ifp->if_multiaddrs)) 1334 allmulti = IFF_ALLMULTI; 1335 1336 /* Always set the VF's if_flags */ 1337 sc->hn_vf_ifp->if_flags = ifp->if_flags | allmulti; 1338 } 1339 1340 static void 1341 hn_xpnt_vf_input(struct ifnet *vf_ifp, struct mbuf *m) 1342 { 1343 struct rm_priotracker pt; 1344 struct ifnet *hn_ifp = NULL; 1345 struct mbuf *mn; 1346 1347 /* 1348 * XXX racy, if hn(4) ever detached. 1349 */ 1350 rm_rlock(&hn_vfmap_lock, &pt); 1351 if (vf_ifp->if_index < hn_vfmap_size) 1352 hn_ifp = hn_vfmap[vf_ifp->if_index]; 1353 rm_runlock(&hn_vfmap_lock, &pt); 1354 1355 if (hn_ifp != NULL) { 1356 for (mn = m; mn != NULL; mn = mn->m_nextpkt) { 1357 /* 1358 * Allow tapping on the VF. 1359 */ 1360 ETHER_BPF_MTAP(vf_ifp, mn); 1361 1362 /* 1363 * Update VF stats. 1364 */ 1365 if ((vf_ifp->if_capenable & IFCAP_HWSTATS) == 0) { 1366 if_inc_counter(vf_ifp, IFCOUNTER_IBYTES, 1367 mn->m_pkthdr.len); 1368 } 1369 /* 1370 * XXX IFCOUNTER_IMCAST 1371 * This stat updating is kinda invasive, since it 1372 * requires two checks on the mbuf: the length check 1373 * and the ethernet header check. As of this write, 1374 * all multicast packets go directly to hn(4), which 1375 * makes imcast stat updating in the VF a try in vian. 1376 */ 1377 1378 /* 1379 * Fix up rcvif and increase hn(4)'s ipackets. 1380 */ 1381 mn->m_pkthdr.rcvif = hn_ifp; 1382 if_inc_counter(hn_ifp, IFCOUNTER_IPACKETS, 1); 1383 } 1384 /* 1385 * Go through hn(4)'s if_input. 1386 */ 1387 hn_ifp->if_input(hn_ifp, m); 1388 } else { 1389 /* 1390 * In the middle of the transition; free this 1391 * mbuf chain. 1392 */ 1393 while (m != NULL) { 1394 mn = m->m_nextpkt; 1395 m->m_nextpkt = NULL; 1396 m_freem(m); 1397 m = mn; 1398 } 1399 } 1400 } 1401 1402 static void 1403 hn_mtu_change_fixup(struct hn_softc *sc) 1404 { 1405 struct ifnet *ifp; 1406 1407 HN_LOCK_ASSERT(sc); 1408 ifp = sc->hn_ifp; 1409 1410 hn_set_tso_maxsize(sc, hn_tso_maxlen, ifp->if_mtu); 1411 #if __FreeBSD_version >= 1100099 1412 if (sc->hn_rx_ring[0].hn_lro.lro_length_lim < HN_LRO_LENLIM_MIN(ifp)) 1413 hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MIN(ifp)); 1414 #endif 1415 } 1416 1417 static uint32_t 1418 hn_rss_type_fromndis(uint32_t rss_hash) 1419 { 1420 uint32_t types = 0; 1421 1422 if (rss_hash & NDIS_HASH_IPV4) 1423 types |= RSS_TYPE_IPV4; 1424 if (rss_hash & NDIS_HASH_TCP_IPV4) 1425 types |= RSS_TYPE_TCP_IPV4; 1426 if (rss_hash & NDIS_HASH_IPV6) 1427 types |= RSS_TYPE_IPV6; 1428 if (rss_hash & NDIS_HASH_IPV6_EX) 1429 types |= RSS_TYPE_IPV6_EX; 1430 if (rss_hash & NDIS_HASH_TCP_IPV6) 1431 types |= RSS_TYPE_TCP_IPV6; 1432 if (rss_hash & NDIS_HASH_TCP_IPV6_EX) 1433 types |= RSS_TYPE_TCP_IPV6_EX; 1434 if (rss_hash & NDIS_HASH_UDP_IPV4_X) 1435 types |= RSS_TYPE_UDP_IPV4; 1436 return (types); 1437 } 1438 1439 static uint32_t 1440 hn_rss_type_tondis(uint32_t types) 1441 { 1442 uint32_t rss_hash = 0; 1443 1444 KASSERT((types & (RSS_TYPE_UDP_IPV6 | RSS_TYPE_UDP_IPV6_EX)) == 0, 1445 ("UDP6 and UDP6EX are not supported")); 1446 1447 if (types & RSS_TYPE_IPV4) 1448 rss_hash |= NDIS_HASH_IPV4; 1449 if (types & RSS_TYPE_TCP_IPV4) 1450 rss_hash |= NDIS_HASH_TCP_IPV4; 1451 if (types & RSS_TYPE_IPV6) 1452 rss_hash |= NDIS_HASH_IPV6; 1453 if (types & RSS_TYPE_IPV6_EX) 1454 rss_hash |= NDIS_HASH_IPV6_EX; 1455 if (types & RSS_TYPE_TCP_IPV6) 1456 rss_hash |= NDIS_HASH_TCP_IPV6; 1457 if (types & RSS_TYPE_TCP_IPV6_EX) 1458 rss_hash |= NDIS_HASH_TCP_IPV6_EX; 1459 if (types & RSS_TYPE_UDP_IPV4) 1460 rss_hash |= NDIS_HASH_UDP_IPV4_X; 1461 return (rss_hash); 1462 } 1463 1464 static void 1465 hn_rss_mbuf_hash(struct hn_softc *sc, uint32_t mbuf_hash) 1466 { 1467 int i; 1468 1469 HN_LOCK_ASSERT(sc); 1470 1471 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 1472 sc->hn_rx_ring[i].hn_mbuf_hash = mbuf_hash; 1473 } 1474 1475 static void 1476 hn_vf_rss_fixup(struct hn_softc *sc, bool reconf) 1477 { 1478 struct ifnet *ifp, *vf_ifp; 1479 struct ifrsshash ifrh; 1480 struct ifrsskey ifrk; 1481 int error; 1482 uint32_t my_types, diff_types, mbuf_types = 0; 1483 1484 HN_LOCK_ASSERT(sc); 1485 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, 1486 ("%s: synthetic parts are not attached", sc->hn_ifp->if_xname)); 1487 1488 if (sc->hn_rx_ring_inuse == 1) { 1489 /* No RSS on synthetic parts; done. */ 1490 return; 1491 } 1492 if ((sc->hn_rss_hcap & NDIS_HASH_FUNCTION_TOEPLITZ) == 0) { 1493 /* Synthetic parts do not support Toeplitz; done. */ 1494 return; 1495 } 1496 1497 ifp = sc->hn_ifp; 1498 vf_ifp = sc->hn_vf_ifp; 1499 1500 /* 1501 * Extract VF's RSS key. Only 40 bytes key for Toeplitz is 1502 * supported. 1503 */ 1504 memset(&ifrk, 0, sizeof(ifrk)); 1505 strlcpy(ifrk.ifrk_name, vf_ifp->if_xname, sizeof(ifrk.ifrk_name)); 1506 error = vf_ifp->if_ioctl(vf_ifp, SIOCGIFRSSKEY, (caddr_t)&ifrk); 1507 if (error) { 1508 if_printf(ifp, "%s SIOCGRSSKEY failed: %d\n", 1509 vf_ifp->if_xname, error); 1510 goto done; 1511 } 1512 if (ifrk.ifrk_func != RSS_FUNC_TOEPLITZ) { 1513 if_printf(ifp, "%s RSS function %u is not Toeplitz\n", 1514 vf_ifp->if_xname, ifrk.ifrk_func); 1515 goto done; 1516 } 1517 if (ifrk.ifrk_keylen != NDIS_HASH_KEYSIZE_TOEPLITZ) { 1518 if_printf(ifp, "%s invalid RSS Toeplitz key length %d\n", 1519 vf_ifp->if_xname, ifrk.ifrk_keylen); 1520 goto done; 1521 } 1522 1523 /* 1524 * Extract VF's RSS hash. Only Toeplitz is supported. 1525 */ 1526 memset(&ifrh, 0, sizeof(ifrh)); 1527 strlcpy(ifrh.ifrh_name, vf_ifp->if_xname, sizeof(ifrh.ifrh_name)); 1528 error = vf_ifp->if_ioctl(vf_ifp, SIOCGIFRSSHASH, (caddr_t)&ifrh); 1529 if (error) { 1530 if_printf(ifp, "%s SIOCGRSSHASH failed: %d\n", 1531 vf_ifp->if_xname, error); 1532 goto done; 1533 } 1534 if (ifrh.ifrh_func != RSS_FUNC_TOEPLITZ) { 1535 if_printf(ifp, "%s RSS function %u is not Toeplitz\n", 1536 vf_ifp->if_xname, ifrh.ifrh_func); 1537 goto done; 1538 } 1539 1540 my_types = hn_rss_type_fromndis(sc->hn_rss_hcap); 1541 if ((ifrh.ifrh_types & my_types) == 0) { 1542 /* This disables RSS; ignore it then */ 1543 if_printf(ifp, "%s intersection of RSS types failed. " 1544 "VF %#x, mine %#x\n", vf_ifp->if_xname, 1545 ifrh.ifrh_types, my_types); 1546 goto done; 1547 } 1548 1549 diff_types = my_types ^ ifrh.ifrh_types; 1550 my_types &= ifrh.ifrh_types; 1551 mbuf_types = my_types; 1552 1553 /* 1554 * Detect RSS hash value/type confliction. 1555 * 1556 * NOTE: 1557 * We don't disable the hash type, but stop delivery the hash 1558 * value/type through mbufs on RX path. 1559 * 1560 * XXX If HN_CAP_UDPHASH is set in hn_caps, then UDP 4-tuple 1561 * hash is delivered with type of TCP_IPV4. This means if 1562 * UDP_IPV4 is enabled, then TCP_IPV4 should be forced, at 1563 * least to hn_mbuf_hash. However, given that _all_ of the 1564 * NICs implement TCP_IPV4, this will _not_ impose any issues 1565 * here. 1566 */ 1567 if ((my_types & RSS_TYPE_IPV4) && 1568 (diff_types & ifrh.ifrh_types & 1569 (RSS_TYPE_TCP_IPV4 | RSS_TYPE_UDP_IPV4))) { 1570 /* Conflict; disable IPV4 hash type/value delivery. */ 1571 if_printf(ifp, "disable IPV4 mbuf hash delivery\n"); 1572 mbuf_types &= ~RSS_TYPE_IPV4; 1573 } 1574 if ((my_types & RSS_TYPE_IPV6) && 1575 (diff_types & ifrh.ifrh_types & 1576 (RSS_TYPE_TCP_IPV6 | RSS_TYPE_UDP_IPV6 | 1577 RSS_TYPE_TCP_IPV6_EX | RSS_TYPE_UDP_IPV6_EX | 1578 RSS_TYPE_IPV6_EX))) { 1579 /* Conflict; disable IPV6 hash type/value delivery. */ 1580 if_printf(ifp, "disable IPV6 mbuf hash delivery\n"); 1581 mbuf_types &= ~RSS_TYPE_IPV6; 1582 } 1583 if ((my_types & RSS_TYPE_IPV6_EX) && 1584 (diff_types & ifrh.ifrh_types & 1585 (RSS_TYPE_TCP_IPV6 | RSS_TYPE_UDP_IPV6 | 1586 RSS_TYPE_TCP_IPV6_EX | RSS_TYPE_UDP_IPV6_EX | 1587 RSS_TYPE_IPV6))) { 1588 /* Conflict; disable IPV6_EX hash type/value delivery. */ 1589 if_printf(ifp, "disable IPV6_EX mbuf hash delivery\n"); 1590 mbuf_types &= ~RSS_TYPE_IPV6_EX; 1591 } 1592 if ((my_types & RSS_TYPE_TCP_IPV6) && 1593 (diff_types & ifrh.ifrh_types & RSS_TYPE_TCP_IPV6_EX)) { 1594 /* Conflict; disable TCP_IPV6 hash type/value delivery. */ 1595 if_printf(ifp, "disable TCP_IPV6 mbuf hash delivery\n"); 1596 mbuf_types &= ~RSS_TYPE_TCP_IPV6; 1597 } 1598 if ((my_types & RSS_TYPE_TCP_IPV6_EX) && 1599 (diff_types & ifrh.ifrh_types & RSS_TYPE_TCP_IPV6)) { 1600 /* Conflict; disable TCP_IPV6_EX hash type/value delivery. */ 1601 if_printf(ifp, "disable TCP_IPV6_EX mbuf hash delivery\n"); 1602 mbuf_types &= ~RSS_TYPE_TCP_IPV6_EX; 1603 } 1604 if ((my_types & RSS_TYPE_UDP_IPV6) && 1605 (diff_types & ifrh.ifrh_types & RSS_TYPE_UDP_IPV6_EX)) { 1606 /* Conflict; disable UDP_IPV6 hash type/value delivery. */ 1607 if_printf(ifp, "disable UDP_IPV6 mbuf hash delivery\n"); 1608 mbuf_types &= ~RSS_TYPE_UDP_IPV6; 1609 } 1610 if ((my_types & RSS_TYPE_UDP_IPV6_EX) && 1611 (diff_types & ifrh.ifrh_types & RSS_TYPE_UDP_IPV6)) { 1612 /* Conflict; disable UDP_IPV6_EX hash type/value delivery. */ 1613 if_printf(ifp, "disable UDP_IPV6_EX mbuf hash delivery\n"); 1614 mbuf_types &= ~RSS_TYPE_UDP_IPV6_EX; 1615 } 1616 1617 /* 1618 * Indirect table does not matter. 1619 */ 1620 1621 sc->hn_rss_hash = (sc->hn_rss_hcap & NDIS_HASH_FUNCTION_MASK) | 1622 hn_rss_type_tondis(my_types); 1623 memcpy(sc->hn_rss.rss_key, ifrk.ifrk_key, sizeof(sc->hn_rss.rss_key)); 1624 sc->hn_flags |= HN_FLAG_HAS_RSSKEY; 1625 1626 if (reconf) { 1627 error = hn_rss_reconfig(sc); 1628 if (error) { 1629 /* XXX roll-back? */ 1630 if_printf(ifp, "hn_rss_reconfig failed: %d\n", error); 1631 /* XXX keep going. */ 1632 } 1633 } 1634 done: 1635 /* Hash deliverability for mbufs. */ 1636 hn_rss_mbuf_hash(sc, hn_rss_type_tondis(mbuf_types)); 1637 } 1638 1639 static void 1640 hn_vf_rss_restore(struct hn_softc *sc) 1641 { 1642 1643 HN_LOCK_ASSERT(sc); 1644 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, 1645 ("%s: synthetic parts are not attached", sc->hn_ifp->if_xname)); 1646 1647 if (sc->hn_rx_ring_inuse == 1) 1648 goto done; 1649 1650 /* 1651 * Restore hash types. Key does _not_ matter. 1652 */ 1653 if (sc->hn_rss_hash != sc->hn_rss_hcap) { 1654 int error; 1655 1656 sc->hn_rss_hash = sc->hn_rss_hcap; 1657 error = hn_rss_reconfig(sc); 1658 if (error) { 1659 if_printf(sc->hn_ifp, "hn_rss_reconfig failed: %d\n", 1660 error); 1661 /* XXX keep going. */ 1662 } 1663 } 1664 done: 1665 /* Hash deliverability for mbufs. */ 1666 hn_rss_mbuf_hash(sc, NDIS_HASH_ALL); 1667 } 1668 1669 static void 1670 hn_xpnt_vf_setready(struct hn_softc *sc) 1671 { 1672 struct ifnet *ifp, *vf_ifp; 1673 struct ifreq ifr; 1674 1675 HN_LOCK_ASSERT(sc); 1676 ifp = sc->hn_ifp; 1677 vf_ifp = sc->hn_vf_ifp; 1678 1679 /* 1680 * Mark the VF ready. 1681 */ 1682 sc->hn_vf_rdytick = 0; 1683 1684 /* 1685 * Save information for restoration. 1686 */ 1687 sc->hn_saved_caps = ifp->if_capabilities; 1688 sc->hn_saved_tsomax = ifp->if_hw_tsomax; 1689 sc->hn_saved_tsosegcnt = ifp->if_hw_tsomaxsegcount; 1690 sc->hn_saved_tsosegsz = ifp->if_hw_tsomaxsegsize; 1691 1692 /* 1693 * Intersect supported/enabled capabilities. 1694 * 1695 * NOTE: 1696 * if_hwassist is not changed here. 1697 */ 1698 ifp->if_capabilities &= vf_ifp->if_capabilities; 1699 ifp->if_capenable &= ifp->if_capabilities; 1700 1701 /* 1702 * Fix TSO settings. 1703 */ 1704 if (ifp->if_hw_tsomax > vf_ifp->if_hw_tsomax) 1705 ifp->if_hw_tsomax = vf_ifp->if_hw_tsomax; 1706 if (ifp->if_hw_tsomaxsegcount > vf_ifp->if_hw_tsomaxsegcount) 1707 ifp->if_hw_tsomaxsegcount = vf_ifp->if_hw_tsomaxsegcount; 1708 if (ifp->if_hw_tsomaxsegsize > vf_ifp->if_hw_tsomaxsegsize) 1709 ifp->if_hw_tsomaxsegsize = vf_ifp->if_hw_tsomaxsegsize; 1710 1711 /* 1712 * Change VF's enabled capabilities. 1713 */ 1714 memset(&ifr, 0, sizeof(ifr)); 1715 strlcpy(ifr.ifr_name, vf_ifp->if_xname, sizeof(ifr.ifr_name)); 1716 ifr.ifr_reqcap = ifp->if_capenable; 1717 hn_xpnt_vf_iocsetcaps(sc, &ifr); 1718 1719 if (ifp->if_mtu != ETHERMTU) { 1720 int error; 1721 1722 /* 1723 * Change VF's MTU. 1724 */ 1725 memset(&ifr, 0, sizeof(ifr)); 1726 strlcpy(ifr.ifr_name, vf_ifp->if_xname, sizeof(ifr.ifr_name)); 1727 ifr.ifr_mtu = ifp->if_mtu; 1728 error = vf_ifp->if_ioctl(vf_ifp, SIOCSIFMTU, (caddr_t)&ifr); 1729 if (error) { 1730 if_printf(ifp, "%s SIOCSIFMTU %u failed\n", 1731 vf_ifp->if_xname, ifp->if_mtu); 1732 if (ifp->if_mtu > ETHERMTU) { 1733 if_printf(ifp, "change MTU to %d\n", ETHERMTU); 1734 1735 /* 1736 * XXX 1737 * No need to adjust the synthetic parts' MTU; 1738 * failure of the adjustment will cause us 1739 * infinite headache. 1740 */ 1741 ifp->if_mtu = ETHERMTU; 1742 hn_mtu_change_fixup(sc); 1743 } 1744 } 1745 } 1746 } 1747 1748 static bool 1749 hn_xpnt_vf_isready(struct hn_softc *sc) 1750 { 1751 1752 HN_LOCK_ASSERT(sc); 1753 1754 if (!hn_xpnt_vf || sc->hn_vf_ifp == NULL) 1755 return (false); 1756 1757 if (sc->hn_vf_rdytick == 0) 1758 return (true); 1759 1760 if (sc->hn_vf_rdytick > ticks) 1761 return (false); 1762 1763 /* Mark VF as ready. */ 1764 hn_xpnt_vf_setready(sc); 1765 return (true); 1766 } 1767 1768 static void 1769 hn_xpnt_vf_setenable(struct hn_softc *sc) 1770 { 1771 int i; 1772 1773 HN_LOCK_ASSERT(sc); 1774 1775 /* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */ 1776 rm_wlock(&sc->hn_vf_lock); 1777 sc->hn_xvf_flags |= HN_XVFFLAG_ENABLED; 1778 rm_wunlock(&sc->hn_vf_lock); 1779 1780 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 1781 sc->hn_rx_ring[i].hn_rx_flags |= HN_RX_FLAG_XPNT_VF; 1782 } 1783 1784 static void 1785 hn_xpnt_vf_setdisable(struct hn_softc *sc, bool clear_vf) 1786 { 1787 int i; 1788 1789 HN_LOCK_ASSERT(sc); 1790 1791 /* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */ 1792 rm_wlock(&sc->hn_vf_lock); 1793 sc->hn_xvf_flags &= ~HN_XVFFLAG_ENABLED; 1794 if (clear_vf) 1795 sc->hn_vf_ifp = NULL; 1796 rm_wunlock(&sc->hn_vf_lock); 1797 1798 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 1799 sc->hn_rx_ring[i].hn_rx_flags &= ~HN_RX_FLAG_XPNT_VF; 1800 } 1801 1802 static void 1803 hn_xpnt_vf_init(struct hn_softc *sc) 1804 { 1805 int error; 1806 1807 HN_LOCK_ASSERT(sc); 1808 1809 KASSERT((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) == 0, 1810 ("%s: transparent VF was enabled", sc->hn_ifp->if_xname)); 1811 1812 if (bootverbose) { 1813 if_printf(sc->hn_ifp, "try bringing up %s\n", 1814 sc->hn_vf_ifp->if_xname); 1815 } 1816 1817 /* 1818 * Bring the VF up. 1819 */ 1820 hn_xpnt_vf_saveifflags(sc); 1821 sc->hn_vf_ifp->if_flags |= IFF_UP; 1822 error = hn_xpnt_vf_iocsetflags(sc); 1823 if (error) { 1824 if_printf(sc->hn_ifp, "bringing up %s failed: %d\n", 1825 sc->hn_vf_ifp->if_xname, error); 1826 return; 1827 } 1828 1829 /* 1830 * NOTE: 1831 * Datapath setting must happen _after_ bringing the VF up. 1832 */ 1833 hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_VF); 1834 1835 /* 1836 * NOTE: 1837 * Fixup RSS related bits _after_ the VF is brought up, since 1838 * many VFs generate RSS key during it's initialization. 1839 */ 1840 hn_vf_rss_fixup(sc, true); 1841 1842 /* Mark transparent mode VF as enabled. */ 1843 hn_xpnt_vf_setenable(sc); 1844 } 1845 1846 static void 1847 hn_xpnt_vf_init_taskfunc(void *xsc, int pending __unused) 1848 { 1849 struct hn_softc *sc = xsc; 1850 1851 HN_LOCK(sc); 1852 1853 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) 1854 goto done; 1855 if (sc->hn_vf_ifp == NULL) 1856 goto done; 1857 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) 1858 goto done; 1859 1860 if (sc->hn_vf_rdytick != 0) { 1861 /* Mark VF as ready. */ 1862 hn_xpnt_vf_setready(sc); 1863 } 1864 1865 if (sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) { 1866 /* 1867 * Delayed VF initialization. 1868 */ 1869 if (bootverbose) { 1870 if_printf(sc->hn_ifp, "delayed initialize %s\n", 1871 sc->hn_vf_ifp->if_xname); 1872 } 1873 hn_xpnt_vf_init(sc); 1874 } 1875 done: 1876 HN_UNLOCK(sc); 1877 } 1878 1879 static void 1880 hn_ifnet_attevent(void *xsc, struct ifnet *ifp) 1881 { 1882 struct hn_softc *sc = xsc; 1883 1884 HN_LOCK(sc); 1885 1886 if (!(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED)) 1887 goto done; 1888 1889 if (!hn_ismyvf(sc, ifp)) 1890 goto done; 1891 1892 if (sc->hn_vf_ifp != NULL) { 1893 if_printf(sc->hn_ifp, "%s was attached as VF\n", 1894 sc->hn_vf_ifp->if_xname); 1895 goto done; 1896 } 1897 1898 if (hn_xpnt_vf && ifp->if_start != NULL) { 1899 /* 1900 * ifnet.if_start is _not_ supported by transparent 1901 * mode VF; mainly due to the IFF_DRV_OACTIVE flag. 1902 */ 1903 if_printf(sc->hn_ifp, "%s uses if_start, which is unsupported " 1904 "in transparent VF mode.\n", ifp->if_xname); 1905 goto done; 1906 } 1907 1908 rm_wlock(&hn_vfmap_lock); 1909 1910 if (ifp->if_index >= hn_vfmap_size) { 1911 struct ifnet **newmap; 1912 int newsize; 1913 1914 newsize = ifp->if_index + HN_VFMAP_SIZE_DEF; 1915 newmap = malloc(sizeof(struct ifnet *) * newsize, M_DEVBUF, 1916 M_WAITOK | M_ZERO); 1917 1918 memcpy(newmap, hn_vfmap, 1919 sizeof(struct ifnet *) * hn_vfmap_size); 1920 free(hn_vfmap, M_DEVBUF); 1921 hn_vfmap = newmap; 1922 hn_vfmap_size = newsize; 1923 } 1924 KASSERT(hn_vfmap[ifp->if_index] == NULL, 1925 ("%s: ifindex %d was mapped to %s", 1926 ifp->if_xname, ifp->if_index, hn_vfmap[ifp->if_index]->if_xname)); 1927 hn_vfmap[ifp->if_index] = sc->hn_ifp; 1928 1929 rm_wunlock(&hn_vfmap_lock); 1930 1931 /* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */ 1932 rm_wlock(&sc->hn_vf_lock); 1933 KASSERT((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) == 0, 1934 ("%s: transparent VF was enabled", sc->hn_ifp->if_xname)); 1935 sc->hn_vf_ifp = ifp; 1936 rm_wunlock(&sc->hn_vf_lock); 1937 1938 if (hn_xpnt_vf) { 1939 int wait_ticks; 1940 1941 /* 1942 * Install if_input for vf_ifp, which does vf_ifp -> hn_ifp. 1943 * Save vf_ifp's current if_input for later restoration. 1944 */ 1945 sc->hn_vf_input = ifp->if_input; 1946 ifp->if_input = hn_xpnt_vf_input; 1947 1948 /* 1949 * Stop link status management; use the VF's. 1950 */ 1951 hn_suspend_mgmt(sc); 1952 1953 /* 1954 * Give VF sometime to complete its attach routing. 1955 */ 1956 wait_ticks = hn_xpnt_vf_attwait * hz; 1957 sc->hn_vf_rdytick = ticks + wait_ticks; 1958 1959 taskqueue_enqueue_timeout(sc->hn_vf_taskq, &sc->hn_vf_init, 1960 wait_ticks); 1961 } 1962 done: 1963 HN_UNLOCK(sc); 1964 } 1965 1966 static void 1967 hn_ifnet_detevent(void *xsc, struct ifnet *ifp) 1968 { 1969 struct hn_softc *sc = xsc; 1970 1971 HN_LOCK(sc); 1972 1973 if (sc->hn_vf_ifp == NULL) 1974 goto done; 1975 1976 if (!hn_ismyvf(sc, ifp)) 1977 goto done; 1978 1979 if (hn_xpnt_vf) { 1980 /* 1981 * Make sure that the delayed initialization is not running. 1982 * 1983 * NOTE: 1984 * - This lock _must_ be released, since the hn_vf_init task 1985 * will try holding this lock. 1986 * - It is safe to release this lock here, since the 1987 * hn_ifnet_attevent() is interlocked by the hn_vf_ifp. 1988 * 1989 * XXX racy, if hn(4) ever detached. 1990 */ 1991 HN_UNLOCK(sc); 1992 taskqueue_drain_timeout(sc->hn_vf_taskq, &sc->hn_vf_init); 1993 HN_LOCK(sc); 1994 1995 KASSERT(sc->hn_vf_input != NULL, ("%s VF input is not saved", 1996 sc->hn_ifp->if_xname)); 1997 ifp->if_input = sc->hn_vf_input; 1998 sc->hn_vf_input = NULL; 1999 2000 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) && 2001 (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) 2002 hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_SYNTH); 2003 2004 if (sc->hn_vf_rdytick == 0) { 2005 /* 2006 * The VF was ready; restore some settings. 2007 */ 2008 sc->hn_ifp->if_capabilities = sc->hn_saved_caps; 2009 /* 2010 * NOTE: 2011 * There is _no_ need to fixup if_capenable and 2012 * if_hwassist, since the if_capabilities before 2013 * restoration was an intersection of the VF's 2014 * if_capabilites and the synthetic device's 2015 * if_capabilites. 2016 */ 2017 sc->hn_ifp->if_hw_tsomax = sc->hn_saved_tsomax; 2018 sc->hn_ifp->if_hw_tsomaxsegcount = 2019 sc->hn_saved_tsosegcnt; 2020 sc->hn_ifp->if_hw_tsomaxsegsize = sc->hn_saved_tsosegsz; 2021 } 2022 2023 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) { 2024 /* 2025 * Restore RSS settings. 2026 */ 2027 hn_vf_rss_restore(sc); 2028 2029 /* 2030 * Resume link status management, which was suspended 2031 * by hn_ifnet_attevent(). 2032 */ 2033 hn_resume_mgmt(sc); 2034 } 2035 } 2036 2037 /* Mark transparent mode VF as disabled. */ 2038 hn_xpnt_vf_setdisable(sc, true /* clear hn_vf_ifp */); 2039 2040 rm_wlock(&hn_vfmap_lock); 2041 2042 KASSERT(ifp->if_index < hn_vfmap_size, 2043 ("ifindex %d, vfmapsize %d", ifp->if_index, hn_vfmap_size)); 2044 if (hn_vfmap[ifp->if_index] != NULL) { 2045 KASSERT(hn_vfmap[ifp->if_index] == sc->hn_ifp, 2046 ("%s: ifindex %d was mapped to %s", 2047 ifp->if_xname, ifp->if_index, 2048 hn_vfmap[ifp->if_index]->if_xname)); 2049 hn_vfmap[ifp->if_index] = NULL; 2050 } 2051 2052 rm_wunlock(&hn_vfmap_lock); 2053 done: 2054 HN_UNLOCK(sc); 2055 } 2056 2057 static void 2058 hn_ifnet_lnkevent(void *xsc, struct ifnet *ifp, int link_state) 2059 { 2060 struct hn_softc *sc = xsc; 2061 2062 if (sc->hn_vf_ifp == ifp) 2063 if_link_state_change(sc->hn_ifp, link_state); 2064 } 2065 2066 static int 2067 hn_probe(device_t dev) 2068 { 2069 2070 if (VMBUS_PROBE_GUID(device_get_parent(dev), dev, &hn_guid) == 0) { 2071 device_set_desc(dev, "Hyper-V Network Interface"); 2072 return BUS_PROBE_DEFAULT; 2073 } 2074 return ENXIO; 2075 } 2076 2077 static int 2078 hn_attach(device_t dev) 2079 { 2080 struct hn_softc *sc = device_get_softc(dev); 2081 struct sysctl_oid_list *child; 2082 struct sysctl_ctx_list *ctx; 2083 uint8_t eaddr[ETHER_ADDR_LEN]; 2084 struct ifnet *ifp = NULL; 2085 int error, ring_cnt, tx_ring_cnt; 2086 uint32_t mtu; 2087 2088 sc->hn_dev = dev; 2089 sc->hn_prichan = vmbus_get_channel(dev); 2090 HN_LOCK_INIT(sc); 2091 rm_init(&sc->hn_vf_lock, "hnvf"); 2092 if (hn_xpnt_vf && hn_xpnt_vf_accbpf) 2093 sc->hn_xvf_flags |= HN_XVFFLAG_ACCBPF; 2094 2095 /* 2096 * Initialize these tunables once. 2097 */ 2098 sc->hn_agg_size = hn_tx_agg_size; 2099 sc->hn_agg_pkts = hn_tx_agg_pkts; 2100 2101 /* 2102 * Setup taskqueue for transmission. 2103 */ 2104 if (hn_tx_taskq_mode == HN_TX_TASKQ_M_INDEP) { 2105 int i; 2106 2107 sc->hn_tx_taskqs = 2108 malloc(hn_tx_taskq_cnt * sizeof(struct taskqueue *), 2109 M_DEVBUF, M_WAITOK); 2110 for (i = 0; i < hn_tx_taskq_cnt; ++i) { 2111 sc->hn_tx_taskqs[i] = taskqueue_create("hn_tx", 2112 M_WAITOK, taskqueue_thread_enqueue, 2113 &sc->hn_tx_taskqs[i]); 2114 taskqueue_start_threads(&sc->hn_tx_taskqs[i], 1, PI_NET, 2115 "%s tx%d", device_get_nameunit(dev), i); 2116 } 2117 } else if (hn_tx_taskq_mode == HN_TX_TASKQ_M_GLOBAL) { 2118 sc->hn_tx_taskqs = hn_tx_taskque; 2119 } 2120 2121 /* 2122 * Setup taskqueue for mangement tasks, e.g. link status. 2123 */ 2124 sc->hn_mgmt_taskq0 = taskqueue_create("hn_mgmt", M_WAITOK, 2125 taskqueue_thread_enqueue, &sc->hn_mgmt_taskq0); 2126 taskqueue_start_threads(&sc->hn_mgmt_taskq0, 1, PI_NET, "%s mgmt", 2127 device_get_nameunit(dev)); 2128 TASK_INIT(&sc->hn_link_task, 0, hn_link_taskfunc, sc); 2129 TASK_INIT(&sc->hn_netchg_init, 0, hn_netchg_init_taskfunc, sc); 2130 TIMEOUT_TASK_INIT(sc->hn_mgmt_taskq0, &sc->hn_netchg_status, 0, 2131 hn_netchg_status_taskfunc, sc); 2132 2133 if (hn_xpnt_vf) { 2134 /* 2135 * Setup taskqueue for VF tasks, e.g. delayed VF bringing up. 2136 */ 2137 sc->hn_vf_taskq = taskqueue_create("hn_vf", M_WAITOK, 2138 taskqueue_thread_enqueue, &sc->hn_vf_taskq); 2139 taskqueue_start_threads(&sc->hn_vf_taskq, 1, PI_NET, "%s vf", 2140 device_get_nameunit(dev)); 2141 TIMEOUT_TASK_INIT(sc->hn_vf_taskq, &sc->hn_vf_init, 0, 2142 hn_xpnt_vf_init_taskfunc, sc); 2143 } 2144 2145 /* 2146 * Allocate ifnet and setup its name earlier, so that if_printf 2147 * can be used by functions, which will be called after 2148 * ether_ifattach(). 2149 */ 2150 ifp = sc->hn_ifp = if_alloc(IFT_ETHER); 2151 ifp->if_softc = sc; 2152 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 2153 2154 /* 2155 * Initialize ifmedia earlier so that it can be unconditionally 2156 * destroyed, if error happened later on. 2157 */ 2158 ifmedia_init(&sc->hn_media, 0, hn_ifmedia_upd, hn_ifmedia_sts); 2159 2160 /* 2161 * Figure out the # of RX rings (ring_cnt) and the # of TX rings 2162 * to use (tx_ring_cnt). 2163 * 2164 * NOTE: 2165 * The # of RX rings to use is same as the # of channels to use. 2166 */ 2167 ring_cnt = hn_chan_cnt; 2168 if (ring_cnt <= 0) { 2169 /* Default */ 2170 ring_cnt = mp_ncpus; 2171 if (ring_cnt > HN_RING_CNT_DEF_MAX) 2172 ring_cnt = HN_RING_CNT_DEF_MAX; 2173 } else if (ring_cnt > mp_ncpus) { 2174 ring_cnt = mp_ncpus; 2175 } 2176 #ifdef RSS 2177 if (ring_cnt > rss_getnumbuckets()) 2178 ring_cnt = rss_getnumbuckets(); 2179 #endif 2180 2181 tx_ring_cnt = hn_tx_ring_cnt; 2182 if (tx_ring_cnt <= 0 || tx_ring_cnt > ring_cnt) 2183 tx_ring_cnt = ring_cnt; 2184 #ifdef HN_IFSTART_SUPPORT 2185 if (hn_use_if_start) { 2186 /* ifnet.if_start only needs one TX ring. */ 2187 tx_ring_cnt = 1; 2188 } 2189 #endif 2190 2191 /* 2192 * Set the leader CPU for channels. 2193 */ 2194 sc->hn_cpu = atomic_fetchadd_int(&hn_cpu_index, ring_cnt) % mp_ncpus; 2195 2196 /* 2197 * Create enough TX/RX rings, even if only limited number of 2198 * channels can be allocated. 2199 */ 2200 error = hn_create_tx_data(sc, tx_ring_cnt); 2201 if (error) 2202 goto failed; 2203 error = hn_create_rx_data(sc, ring_cnt); 2204 if (error) 2205 goto failed; 2206 2207 /* 2208 * Create transaction context for NVS and RNDIS transactions. 2209 */ 2210 sc->hn_xact = vmbus_xact_ctx_create(bus_get_dma_tag(dev), 2211 HN_XACT_REQ_SIZE, HN_XACT_RESP_SIZE, 0); 2212 if (sc->hn_xact == NULL) { 2213 error = ENXIO; 2214 goto failed; 2215 } 2216 2217 /* 2218 * Install orphan handler for the revocation of this device's 2219 * primary channel. 2220 * 2221 * NOTE: 2222 * The processing order is critical here: 2223 * Install the orphan handler, _before_ testing whether this 2224 * device's primary channel has been revoked or not. 2225 */ 2226 vmbus_chan_set_orphan(sc->hn_prichan, sc->hn_xact); 2227 if (vmbus_chan_is_revoked(sc->hn_prichan)) { 2228 error = ENXIO; 2229 goto failed; 2230 } 2231 2232 /* 2233 * Attach the synthetic parts, i.e. NVS and RNDIS. 2234 */ 2235 error = hn_synth_attach(sc, ETHERMTU); 2236 if (error) 2237 goto failed; 2238 2239 error = hn_rndis_get_eaddr(sc, eaddr); 2240 if (error) 2241 goto failed; 2242 2243 error = hn_rndis_get_mtu(sc, &mtu); 2244 if (error) 2245 mtu = ETHERMTU; 2246 else if (bootverbose) 2247 device_printf(dev, "RNDIS mtu %u\n", mtu); 2248 2249 #if __FreeBSD_version >= 1100099 2250 if (sc->hn_rx_ring_inuse > 1) { 2251 /* 2252 * Reduce TCP segment aggregation limit for multiple 2253 * RX rings to increase ACK timeliness. 2254 */ 2255 hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MULTIRX_DEF); 2256 } 2257 #endif 2258 2259 /* 2260 * Fixup TX/RX stuffs after synthetic parts are attached. 2261 */ 2262 hn_fixup_tx_data(sc); 2263 hn_fixup_rx_data(sc); 2264 2265 ctx = device_get_sysctl_ctx(dev); 2266 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); 2267 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "nvs_version", CTLFLAG_RD, 2268 &sc->hn_nvs_ver, 0, "NVS version"); 2269 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "ndis_version", 2270 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2271 hn_ndis_version_sysctl, "A", "NDIS version"); 2272 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "caps", 2273 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2274 hn_caps_sysctl, "A", "capabilities"); 2275 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "hwassist", 2276 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2277 hn_hwassist_sysctl, "A", "hwassist"); 2278 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tso_max", 2279 CTLFLAG_RD, &ifp->if_hw_tsomax, 0, "max TSO size"); 2280 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tso_maxsegcnt", 2281 CTLFLAG_RD, &ifp->if_hw_tsomaxsegcount, 0, 2282 "max # of TSO segments"); 2283 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tso_maxsegsz", 2284 CTLFLAG_RD, &ifp->if_hw_tsomaxsegsize, 0, 2285 "max size of TSO segment"); 2286 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxfilter", 2287 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2288 hn_rxfilter_sysctl, "A", "rxfilter"); 2289 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_hash", 2290 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2291 hn_rss_hash_sysctl, "A", "RSS hash"); 2292 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_hashcap", 2293 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2294 hn_rss_hcap_sysctl, "A", "RSS hash capabilities"); 2295 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "mbuf_hash", 2296 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2297 hn_rss_mbuf_sysctl, "A", "RSS hash for mbufs"); 2298 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rss_ind_size", 2299 CTLFLAG_RD, &sc->hn_rss_ind_size, 0, "RSS indirect entry count"); 2300 #ifndef RSS 2301 /* 2302 * Don't allow RSS key/indirect table changes, if RSS is defined. 2303 */ 2304 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_key", 2305 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 2306 hn_rss_key_sysctl, "IU", "RSS key"); 2307 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_ind", 2308 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 2309 hn_rss_ind_sysctl, "IU", "RSS indirect table"); 2310 #endif 2311 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_size", 2312 CTLFLAG_RD, &sc->hn_rndis_agg_size, 0, 2313 "RNDIS offered packet transmission aggregation size limit"); 2314 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_pkts", 2315 CTLFLAG_RD, &sc->hn_rndis_agg_pkts, 0, 2316 "RNDIS offered packet transmission aggregation count limit"); 2317 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_align", 2318 CTLFLAG_RD, &sc->hn_rndis_agg_align, 0, 2319 "RNDIS packet transmission aggregation alignment"); 2320 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_size", 2321 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 2322 hn_txagg_size_sysctl, "I", 2323 "Packet transmission aggregation size, 0 -- disable, -1 -- auto"); 2324 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pkts", 2325 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 2326 hn_txagg_pkts_sysctl, "I", 2327 "Packet transmission aggregation packets, " 2328 "0 -- disable, -1 -- auto"); 2329 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "polling", 2330 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 2331 hn_polling_sysctl, "I", 2332 "Polling frequency: [100,1000000], 0 disable polling"); 2333 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf", 2334 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2335 hn_vf_sysctl, "A", "Virtual Function's name"); 2336 if (!hn_xpnt_vf) { 2337 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxvf", 2338 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2339 hn_rxvf_sysctl, "A", "activated Virtual Function's name"); 2340 } else { 2341 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf_xpnt_enabled", 2342 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2343 hn_xpnt_vf_enabled_sysctl, "I", 2344 "Transparent VF enabled"); 2345 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf_xpnt_accbpf", 2346 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 2347 hn_xpnt_vf_accbpf_sysctl, "I", 2348 "Accurate BPF for transparent VF"); 2349 } 2350 2351 /* 2352 * Setup the ifmedia, which has been initialized earlier. 2353 */ 2354 ifmedia_add(&sc->hn_media, IFM_ETHER | IFM_AUTO, 0, NULL); 2355 ifmedia_set(&sc->hn_media, IFM_ETHER | IFM_AUTO); 2356 /* XXX ifmedia_set really should do this for us */ 2357 sc->hn_media.ifm_media = sc->hn_media.ifm_cur->ifm_media; 2358 2359 /* 2360 * Setup the ifnet for this interface. 2361 */ 2362 2363 ifp->if_baudrate = IF_Gbps(10); 2364 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 2365 ifp->if_ioctl = hn_ioctl; 2366 ifp->if_init = hn_init; 2367 #ifdef HN_IFSTART_SUPPORT 2368 if (hn_use_if_start) { 2369 int qdepth = hn_get_txswq_depth(&sc->hn_tx_ring[0]); 2370 2371 ifp->if_start = hn_start; 2372 IFQ_SET_MAXLEN(&ifp->if_snd, qdepth); 2373 ifp->if_snd.ifq_drv_maxlen = qdepth - 1; 2374 IFQ_SET_READY(&ifp->if_snd); 2375 } else 2376 #endif 2377 { 2378 ifp->if_transmit = hn_transmit; 2379 ifp->if_qflush = hn_xmit_qflush; 2380 } 2381 2382 ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_LRO | IFCAP_LINKSTATE; 2383 #ifdef foo 2384 /* We can't diff IPv6 packets from IPv4 packets on RX path. */ 2385 ifp->if_capabilities |= IFCAP_RXCSUM_IPV6; 2386 #endif 2387 if (sc->hn_caps & HN_CAP_VLAN) { 2388 /* XXX not sure about VLAN_MTU. */ 2389 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU; 2390 } 2391 2392 ifp->if_hwassist = sc->hn_tx_ring[0].hn_csum_assist; 2393 if (ifp->if_hwassist & HN_CSUM_IP_MASK) 2394 ifp->if_capabilities |= IFCAP_TXCSUM; 2395 if (ifp->if_hwassist & HN_CSUM_IP6_MASK) 2396 ifp->if_capabilities |= IFCAP_TXCSUM_IPV6; 2397 if (sc->hn_caps & HN_CAP_TSO4) { 2398 ifp->if_capabilities |= IFCAP_TSO4; 2399 ifp->if_hwassist |= CSUM_IP_TSO; 2400 } 2401 if (sc->hn_caps & HN_CAP_TSO6) { 2402 ifp->if_capabilities |= IFCAP_TSO6; 2403 ifp->if_hwassist |= CSUM_IP6_TSO; 2404 } 2405 2406 /* Enable all available capabilities by default. */ 2407 ifp->if_capenable = ifp->if_capabilities; 2408 2409 /* 2410 * Disable IPv6 TSO and TXCSUM by default, they still can 2411 * be enabled through SIOCSIFCAP. 2412 */ 2413 ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6 | IFCAP_TSO6); 2414 ifp->if_hwassist &= ~(HN_CSUM_IP6_MASK | CSUM_IP6_TSO); 2415 2416 if (ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) { 2417 /* 2418 * Lock hn_set_tso_maxsize() to simplify its 2419 * internal logic. 2420 */ 2421 HN_LOCK(sc); 2422 hn_set_tso_maxsize(sc, hn_tso_maxlen, ETHERMTU); 2423 HN_UNLOCK(sc); 2424 ifp->if_hw_tsomaxsegcount = HN_TX_DATA_SEGCNT_MAX; 2425 ifp->if_hw_tsomaxsegsize = PAGE_SIZE; 2426 } 2427 2428 ether_ifattach(ifp, eaddr); 2429 2430 if ((ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) && bootverbose) { 2431 if_printf(ifp, "TSO segcnt %u segsz %u\n", 2432 ifp->if_hw_tsomaxsegcount, ifp->if_hw_tsomaxsegsize); 2433 } 2434 if (mtu < ETHERMTU) { 2435 if_printf(ifp, "fixup mtu %u -> %u\n", ifp->if_mtu, mtu); 2436 ifp->if_mtu = mtu; 2437 } 2438 2439 /* Inform the upper layer about the long frame support. */ 2440 ifp->if_hdrlen = sizeof(struct ether_vlan_header); 2441 2442 /* 2443 * Kick off link status check. 2444 */ 2445 sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0; 2446 hn_update_link_status(sc); 2447 2448 if (!hn_xpnt_vf) { 2449 sc->hn_ifnet_evthand = EVENTHANDLER_REGISTER(ifnet_event, 2450 hn_ifnet_event, sc, EVENTHANDLER_PRI_ANY); 2451 sc->hn_ifaddr_evthand = EVENTHANDLER_REGISTER(ifaddr_event, 2452 hn_ifaddr_event, sc, EVENTHANDLER_PRI_ANY); 2453 } else { 2454 sc->hn_ifnet_lnkhand = EVENTHANDLER_REGISTER(ifnet_link_event, 2455 hn_ifnet_lnkevent, sc, EVENTHANDLER_PRI_ANY); 2456 } 2457 2458 /* 2459 * NOTE: 2460 * Subscribe ether_ifattach event, instead of ifnet_arrival event, 2461 * since interface's LLADDR is needed; interface LLADDR is not 2462 * available when ifnet_arrival event is triggered. 2463 */ 2464 sc->hn_ifnet_atthand = EVENTHANDLER_REGISTER(ether_ifattach_event, 2465 hn_ifnet_attevent, sc, EVENTHANDLER_PRI_ANY); 2466 sc->hn_ifnet_dethand = EVENTHANDLER_REGISTER(ifnet_departure_event, 2467 hn_ifnet_detevent, sc, EVENTHANDLER_PRI_ANY); 2468 2469 return (0); 2470 failed: 2471 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) 2472 hn_synth_detach(sc); 2473 hn_detach(dev); 2474 return (error); 2475 } 2476 2477 static int 2478 hn_detach(device_t dev) 2479 { 2480 struct hn_softc *sc = device_get_softc(dev); 2481 struct ifnet *ifp = sc->hn_ifp, *vf_ifp; 2482 2483 if (sc->hn_xact != NULL && vmbus_chan_is_revoked(sc->hn_prichan)) { 2484 /* 2485 * In case that the vmbus missed the orphan handler 2486 * installation. 2487 */ 2488 vmbus_xact_ctx_orphan(sc->hn_xact); 2489 } 2490 2491 if (sc->hn_ifaddr_evthand != NULL) 2492 EVENTHANDLER_DEREGISTER(ifaddr_event, sc->hn_ifaddr_evthand); 2493 if (sc->hn_ifnet_evthand != NULL) 2494 EVENTHANDLER_DEREGISTER(ifnet_event, sc->hn_ifnet_evthand); 2495 if (sc->hn_ifnet_atthand != NULL) { 2496 EVENTHANDLER_DEREGISTER(ether_ifattach_event, 2497 sc->hn_ifnet_atthand); 2498 } 2499 if (sc->hn_ifnet_dethand != NULL) { 2500 EVENTHANDLER_DEREGISTER(ifnet_departure_event, 2501 sc->hn_ifnet_dethand); 2502 } 2503 if (sc->hn_ifnet_lnkhand != NULL) 2504 EVENTHANDLER_DEREGISTER(ifnet_link_event, sc->hn_ifnet_lnkhand); 2505 2506 vf_ifp = sc->hn_vf_ifp; 2507 __compiler_membar(); 2508 if (vf_ifp != NULL) 2509 hn_ifnet_detevent(sc, vf_ifp); 2510 2511 if (device_is_attached(dev)) { 2512 HN_LOCK(sc); 2513 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) { 2514 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 2515 hn_stop(sc, true); 2516 /* 2517 * NOTE: 2518 * hn_stop() only suspends data, so managment 2519 * stuffs have to be suspended manually here. 2520 */ 2521 hn_suspend_mgmt(sc); 2522 hn_synth_detach(sc); 2523 } 2524 HN_UNLOCK(sc); 2525 ether_ifdetach(ifp); 2526 } 2527 2528 ifmedia_removeall(&sc->hn_media); 2529 hn_destroy_rx_data(sc); 2530 hn_destroy_tx_data(sc); 2531 2532 if (sc->hn_tx_taskqs != NULL && sc->hn_tx_taskqs != hn_tx_taskque) { 2533 int i; 2534 2535 for (i = 0; i < hn_tx_taskq_cnt; ++i) 2536 taskqueue_free(sc->hn_tx_taskqs[i]); 2537 free(sc->hn_tx_taskqs, M_DEVBUF); 2538 } 2539 taskqueue_free(sc->hn_mgmt_taskq0); 2540 if (sc->hn_vf_taskq != NULL) 2541 taskqueue_free(sc->hn_vf_taskq); 2542 2543 if (sc->hn_xact != NULL) { 2544 /* 2545 * Uninstall the orphan handler _before_ the xact is 2546 * destructed. 2547 */ 2548 vmbus_chan_unset_orphan(sc->hn_prichan); 2549 vmbus_xact_ctx_destroy(sc->hn_xact); 2550 } 2551 2552 if_free(ifp); 2553 2554 HN_LOCK_DESTROY(sc); 2555 rm_destroy(&sc->hn_vf_lock); 2556 return (0); 2557 } 2558 2559 static int 2560 hn_shutdown(device_t dev) 2561 { 2562 2563 return (0); 2564 } 2565 2566 static void 2567 hn_link_status(struct hn_softc *sc) 2568 { 2569 uint32_t link_status; 2570 int error; 2571 2572 error = hn_rndis_get_linkstatus(sc, &link_status); 2573 if (error) { 2574 /* XXX what to do? */ 2575 return; 2576 } 2577 2578 if (link_status == NDIS_MEDIA_STATE_CONNECTED) 2579 sc->hn_link_flags |= HN_LINK_FLAG_LINKUP; 2580 else 2581 sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP; 2582 if_link_state_change(sc->hn_ifp, 2583 (sc->hn_link_flags & HN_LINK_FLAG_LINKUP) ? 2584 LINK_STATE_UP : LINK_STATE_DOWN); 2585 } 2586 2587 static void 2588 hn_link_taskfunc(void *xsc, int pending __unused) 2589 { 2590 struct hn_softc *sc = xsc; 2591 2592 if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG) 2593 return; 2594 hn_link_status(sc); 2595 } 2596 2597 static void 2598 hn_netchg_init_taskfunc(void *xsc, int pending __unused) 2599 { 2600 struct hn_softc *sc = xsc; 2601 2602 /* Prevent any link status checks from running. */ 2603 sc->hn_link_flags |= HN_LINK_FLAG_NETCHG; 2604 2605 /* 2606 * Fake up a [link down --> link up] state change; 5 seconds 2607 * delay is used, which closely simulates miibus reaction 2608 * upon link down event. 2609 */ 2610 sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP; 2611 if_link_state_change(sc->hn_ifp, LINK_STATE_DOWN); 2612 taskqueue_enqueue_timeout(sc->hn_mgmt_taskq0, 2613 &sc->hn_netchg_status, 5 * hz); 2614 } 2615 2616 static void 2617 hn_netchg_status_taskfunc(void *xsc, int pending __unused) 2618 { 2619 struct hn_softc *sc = xsc; 2620 2621 /* Re-allow link status checks. */ 2622 sc->hn_link_flags &= ~HN_LINK_FLAG_NETCHG; 2623 hn_link_status(sc); 2624 } 2625 2626 static void 2627 hn_update_link_status(struct hn_softc *sc) 2628 { 2629 2630 if (sc->hn_mgmt_taskq != NULL) 2631 taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_link_task); 2632 } 2633 2634 static void 2635 hn_change_network(struct hn_softc *sc) 2636 { 2637 2638 if (sc->hn_mgmt_taskq != NULL) 2639 taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_netchg_init); 2640 } 2641 2642 static __inline int 2643 hn_txdesc_dmamap_load(struct hn_tx_ring *txr, struct hn_txdesc *txd, 2644 struct mbuf **m_head, bus_dma_segment_t *segs, int *nsegs) 2645 { 2646 struct mbuf *m = *m_head; 2647 int error; 2648 2649 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID, ("txd uses chim")); 2650 2651 error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap, 2652 m, segs, nsegs, BUS_DMA_NOWAIT); 2653 if (error == EFBIG) { 2654 struct mbuf *m_new; 2655 2656 m_new = m_collapse(m, M_NOWAIT, HN_TX_DATA_SEGCNT_MAX); 2657 if (m_new == NULL) 2658 return ENOBUFS; 2659 else 2660 *m_head = m = m_new; 2661 txr->hn_tx_collapsed++; 2662 2663 error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, 2664 txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT); 2665 } 2666 if (!error) { 2667 bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap, 2668 BUS_DMASYNC_PREWRITE); 2669 txd->flags |= HN_TXD_FLAG_DMAMAP; 2670 } 2671 return error; 2672 } 2673 2674 static __inline int 2675 hn_txdesc_put(struct hn_tx_ring *txr, struct hn_txdesc *txd) 2676 { 2677 2678 KASSERT((txd->flags & HN_TXD_FLAG_ONLIST) == 0, 2679 ("put an onlist txd %#x", txd->flags)); 2680 KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0, 2681 ("put an onagg txd %#x", txd->flags)); 2682 2683 KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs)); 2684 if (atomic_fetchadd_int(&txd->refs, -1) != 1) 2685 return 0; 2686 2687 if (!STAILQ_EMPTY(&txd->agg_list)) { 2688 struct hn_txdesc *tmp_txd; 2689 2690 while ((tmp_txd = STAILQ_FIRST(&txd->agg_list)) != NULL) { 2691 int freed; 2692 2693 KASSERT(STAILQ_EMPTY(&tmp_txd->agg_list), 2694 ("resursive aggregation on aggregated txdesc")); 2695 KASSERT((tmp_txd->flags & HN_TXD_FLAG_ONAGG), 2696 ("not aggregated txdesc")); 2697 KASSERT((tmp_txd->flags & HN_TXD_FLAG_DMAMAP) == 0, 2698 ("aggregated txdesc uses dmamap")); 2699 KASSERT(tmp_txd->chim_index == HN_NVS_CHIM_IDX_INVALID, 2700 ("aggregated txdesc consumes " 2701 "chimney sending buffer")); 2702 KASSERT(tmp_txd->chim_size == 0, 2703 ("aggregated txdesc has non-zero " 2704 "chimney sending size")); 2705 2706 STAILQ_REMOVE_HEAD(&txd->agg_list, agg_link); 2707 tmp_txd->flags &= ~HN_TXD_FLAG_ONAGG; 2708 freed = hn_txdesc_put(txr, tmp_txd); 2709 KASSERT(freed, ("failed to free aggregated txdesc")); 2710 } 2711 } 2712 2713 if (txd->chim_index != HN_NVS_CHIM_IDX_INVALID) { 2714 KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, 2715 ("chim txd uses dmamap")); 2716 hn_chim_free(txr->hn_sc, txd->chim_index); 2717 txd->chim_index = HN_NVS_CHIM_IDX_INVALID; 2718 txd->chim_size = 0; 2719 } else if (txd->flags & HN_TXD_FLAG_DMAMAP) { 2720 bus_dmamap_sync(txr->hn_tx_data_dtag, 2721 txd->data_dmap, BUS_DMASYNC_POSTWRITE); 2722 bus_dmamap_unload(txr->hn_tx_data_dtag, 2723 txd->data_dmap); 2724 txd->flags &= ~HN_TXD_FLAG_DMAMAP; 2725 } 2726 2727 if (txd->m != NULL) { 2728 m_freem(txd->m); 2729 txd->m = NULL; 2730 } 2731 2732 txd->flags |= HN_TXD_FLAG_ONLIST; 2733 #ifndef HN_USE_TXDESC_BUFRING 2734 mtx_lock_spin(&txr->hn_txlist_spin); 2735 KASSERT(txr->hn_txdesc_avail >= 0 && 2736 txr->hn_txdesc_avail < txr->hn_txdesc_cnt, 2737 ("txdesc_put: invalid txd avail %d", txr->hn_txdesc_avail)); 2738 txr->hn_txdesc_avail++; 2739 SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link); 2740 mtx_unlock_spin(&txr->hn_txlist_spin); 2741 #else /* HN_USE_TXDESC_BUFRING */ 2742 #ifdef HN_DEBUG 2743 atomic_add_int(&txr->hn_txdesc_avail, 1); 2744 #endif 2745 buf_ring_enqueue(txr->hn_txdesc_br, txd); 2746 #endif /* !HN_USE_TXDESC_BUFRING */ 2747 2748 return 1; 2749 } 2750 2751 static __inline struct hn_txdesc * 2752 hn_txdesc_get(struct hn_tx_ring *txr) 2753 { 2754 struct hn_txdesc *txd; 2755 2756 #ifndef HN_USE_TXDESC_BUFRING 2757 mtx_lock_spin(&txr->hn_txlist_spin); 2758 txd = SLIST_FIRST(&txr->hn_txlist); 2759 if (txd != NULL) { 2760 KASSERT(txr->hn_txdesc_avail > 0, 2761 ("txdesc_get: invalid txd avail %d", txr->hn_txdesc_avail)); 2762 txr->hn_txdesc_avail--; 2763 SLIST_REMOVE_HEAD(&txr->hn_txlist, link); 2764 } 2765 mtx_unlock_spin(&txr->hn_txlist_spin); 2766 #else 2767 txd = buf_ring_dequeue_sc(txr->hn_txdesc_br); 2768 #endif 2769 2770 if (txd != NULL) { 2771 #ifdef HN_USE_TXDESC_BUFRING 2772 #ifdef HN_DEBUG 2773 atomic_subtract_int(&txr->hn_txdesc_avail, 1); 2774 #endif 2775 #endif /* HN_USE_TXDESC_BUFRING */ 2776 KASSERT(txd->m == NULL && txd->refs == 0 && 2777 STAILQ_EMPTY(&txd->agg_list) && 2778 txd->chim_index == HN_NVS_CHIM_IDX_INVALID && 2779 txd->chim_size == 0 && 2780 (txd->flags & HN_TXD_FLAG_ONLIST) && 2781 (txd->flags & HN_TXD_FLAG_ONAGG) == 0 && 2782 (txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("invalid txd")); 2783 txd->flags &= ~HN_TXD_FLAG_ONLIST; 2784 txd->refs = 1; 2785 } 2786 return txd; 2787 } 2788 2789 static __inline void 2790 hn_txdesc_hold(struct hn_txdesc *txd) 2791 { 2792 2793 /* 0->1 transition will never work */ 2794 KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs)); 2795 atomic_add_int(&txd->refs, 1); 2796 } 2797 2798 static __inline void 2799 hn_txdesc_agg(struct hn_txdesc *agg_txd, struct hn_txdesc *txd) 2800 { 2801 2802 KASSERT((agg_txd->flags & HN_TXD_FLAG_ONAGG) == 0, 2803 ("recursive aggregation on aggregating txdesc")); 2804 2805 KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0, 2806 ("already aggregated")); 2807 KASSERT(STAILQ_EMPTY(&txd->agg_list), 2808 ("recursive aggregation on to-be-aggregated txdesc")); 2809 2810 txd->flags |= HN_TXD_FLAG_ONAGG; 2811 STAILQ_INSERT_TAIL(&agg_txd->agg_list, txd, agg_link); 2812 } 2813 2814 static bool 2815 hn_tx_ring_pending(struct hn_tx_ring *txr) 2816 { 2817 bool pending = false; 2818 2819 #ifndef HN_USE_TXDESC_BUFRING 2820 mtx_lock_spin(&txr->hn_txlist_spin); 2821 if (txr->hn_txdesc_avail != txr->hn_txdesc_cnt) 2822 pending = true; 2823 mtx_unlock_spin(&txr->hn_txlist_spin); 2824 #else 2825 if (!buf_ring_full(txr->hn_txdesc_br)) 2826 pending = true; 2827 #endif 2828 return (pending); 2829 } 2830 2831 static __inline void 2832 hn_txeof(struct hn_tx_ring *txr) 2833 { 2834 txr->hn_has_txeof = 0; 2835 txr->hn_txeof(txr); 2836 } 2837 2838 static void 2839 hn_txpkt_done(struct hn_nvs_sendctx *sndc, struct hn_softc *sc, 2840 struct vmbus_channel *chan, const void *data __unused, int dlen __unused) 2841 { 2842 struct hn_txdesc *txd = sndc->hn_cbarg; 2843 struct hn_tx_ring *txr; 2844 2845 txr = txd->txr; 2846 KASSERT(txr->hn_chan == chan, 2847 ("channel mismatch, on chan%u, should be chan%u", 2848 vmbus_chan_id(chan), vmbus_chan_id(txr->hn_chan))); 2849 2850 txr->hn_has_txeof = 1; 2851 hn_txdesc_put(txr, txd); 2852 2853 ++txr->hn_txdone_cnt; 2854 if (txr->hn_txdone_cnt >= HN_EARLY_TXEOF_THRESH) { 2855 txr->hn_txdone_cnt = 0; 2856 if (txr->hn_oactive) 2857 hn_txeof(txr); 2858 } 2859 } 2860 2861 static void 2862 hn_chan_rollup(struct hn_rx_ring *rxr, struct hn_tx_ring *txr) 2863 { 2864 #if defined(INET) || defined(INET6) 2865 tcp_lro_flush_all(&rxr->hn_lro); 2866 #endif 2867 2868 /* 2869 * NOTE: 2870 * 'txr' could be NULL, if multiple channels and 2871 * ifnet.if_start method are enabled. 2872 */ 2873 if (txr == NULL || !txr->hn_has_txeof) 2874 return; 2875 2876 txr->hn_txdone_cnt = 0; 2877 hn_txeof(txr); 2878 } 2879 2880 static __inline uint32_t 2881 hn_rndis_pktmsg_offset(uint32_t ofs) 2882 { 2883 2884 KASSERT(ofs >= sizeof(struct rndis_packet_msg), 2885 ("invalid RNDIS packet msg offset %u", ofs)); 2886 return (ofs - __offsetof(struct rndis_packet_msg, rm_dataoffset)); 2887 } 2888 2889 static __inline void * 2890 hn_rndis_pktinfo_append(struct rndis_packet_msg *pkt, size_t pktsize, 2891 size_t pi_dlen, uint32_t pi_type) 2892 { 2893 const size_t pi_size = HN_RNDIS_PKTINFO_SIZE(pi_dlen); 2894 struct rndis_pktinfo *pi; 2895 2896 KASSERT((pi_size & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK) == 0, 2897 ("unaligned pktinfo size %zu, pktinfo dlen %zu", pi_size, pi_dlen)); 2898 2899 /* 2900 * Per-packet-info does not move; it only grows. 2901 * 2902 * NOTE: 2903 * rm_pktinfooffset in this phase counts from the beginning 2904 * of rndis_packet_msg. 2905 */ 2906 KASSERT(pkt->rm_pktinfooffset + pkt->rm_pktinfolen + pi_size <= pktsize, 2907 ("%u pktinfo overflows RNDIS packet msg", pi_type)); 2908 pi = (struct rndis_pktinfo *)((uint8_t *)pkt + pkt->rm_pktinfooffset + 2909 pkt->rm_pktinfolen); 2910 pkt->rm_pktinfolen += pi_size; 2911 2912 pi->rm_size = pi_size; 2913 pi->rm_type = pi_type; 2914 pi->rm_pktinfooffset = RNDIS_PKTINFO_OFFSET; 2915 2916 return (pi->rm_data); 2917 } 2918 2919 static __inline int 2920 hn_flush_txagg(struct ifnet *ifp, struct hn_tx_ring *txr) 2921 { 2922 struct hn_txdesc *txd; 2923 struct mbuf *m; 2924 int error, pkts; 2925 2926 txd = txr->hn_agg_txd; 2927 KASSERT(txd != NULL, ("no aggregate txdesc")); 2928 2929 /* 2930 * Since hn_txpkt() will reset this temporary stat, save 2931 * it now, so that oerrors can be updated properly, if 2932 * hn_txpkt() ever fails. 2933 */ 2934 pkts = txr->hn_stat_pkts; 2935 2936 /* 2937 * Since txd's mbuf will _not_ be freed upon hn_txpkt() 2938 * failure, save it for later freeing, if hn_txpkt() ever 2939 * fails. 2940 */ 2941 m = txd->m; 2942 error = hn_txpkt(ifp, txr, txd); 2943 if (__predict_false(error)) { 2944 /* txd is freed, but m is not. */ 2945 m_freem(m); 2946 2947 txr->hn_flush_failed++; 2948 if_inc_counter(ifp, IFCOUNTER_OERRORS, pkts); 2949 } 2950 2951 /* Reset all aggregation states. */ 2952 txr->hn_agg_txd = NULL; 2953 txr->hn_agg_szleft = 0; 2954 txr->hn_agg_pktleft = 0; 2955 txr->hn_agg_prevpkt = NULL; 2956 2957 return (error); 2958 } 2959 2960 static void * 2961 hn_try_txagg(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd, 2962 int pktsize) 2963 { 2964 void *chim; 2965 2966 if (txr->hn_agg_txd != NULL) { 2967 if (txr->hn_agg_pktleft >= 1 && txr->hn_agg_szleft > pktsize) { 2968 struct hn_txdesc *agg_txd = txr->hn_agg_txd; 2969 struct rndis_packet_msg *pkt = txr->hn_agg_prevpkt; 2970 int olen; 2971 2972 /* 2973 * Update the previous RNDIS packet's total length, 2974 * it can be increased due to the mandatory alignment 2975 * padding for this RNDIS packet. And update the 2976 * aggregating txdesc's chimney sending buffer size 2977 * accordingly. 2978 * 2979 * XXX 2980 * Zero-out the padding, as required by the RNDIS spec. 2981 */ 2982 olen = pkt->rm_len; 2983 pkt->rm_len = roundup2(olen, txr->hn_agg_align); 2984 agg_txd->chim_size += pkt->rm_len - olen; 2985 2986 /* Link this txdesc to the parent. */ 2987 hn_txdesc_agg(agg_txd, txd); 2988 2989 chim = (uint8_t *)pkt + pkt->rm_len; 2990 /* Save the current packet for later fixup. */ 2991 txr->hn_agg_prevpkt = chim; 2992 2993 txr->hn_agg_pktleft--; 2994 txr->hn_agg_szleft -= pktsize; 2995 if (txr->hn_agg_szleft <= 2996 HN_PKTSIZE_MIN(txr->hn_agg_align)) { 2997 /* 2998 * Probably can't aggregate more packets, 2999 * flush this aggregating txdesc proactively. 3000 */ 3001 txr->hn_agg_pktleft = 0; 3002 } 3003 /* Done! */ 3004 return (chim); 3005 } 3006 hn_flush_txagg(ifp, txr); 3007 } 3008 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc")); 3009 3010 txr->hn_tx_chimney_tried++; 3011 txd->chim_index = hn_chim_alloc(txr->hn_sc); 3012 if (txd->chim_index == HN_NVS_CHIM_IDX_INVALID) 3013 return (NULL); 3014 txr->hn_tx_chimney++; 3015 3016 chim = txr->hn_sc->hn_chim + 3017 (txd->chim_index * txr->hn_sc->hn_chim_szmax); 3018 3019 if (txr->hn_agg_pktmax > 1 && 3020 txr->hn_agg_szmax > pktsize + HN_PKTSIZE_MIN(txr->hn_agg_align)) { 3021 txr->hn_agg_txd = txd; 3022 txr->hn_agg_pktleft = txr->hn_agg_pktmax - 1; 3023 txr->hn_agg_szleft = txr->hn_agg_szmax - pktsize; 3024 txr->hn_agg_prevpkt = chim; 3025 } 3026 return (chim); 3027 } 3028 3029 /* 3030 * NOTE: 3031 * If this function fails, then both txd and m_head0 will be freed. 3032 */ 3033 static int 3034 hn_encap(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd, 3035 struct mbuf **m_head0) 3036 { 3037 bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX]; 3038 int error, nsegs, i; 3039 struct mbuf *m_head = *m_head0; 3040 struct rndis_packet_msg *pkt; 3041 uint32_t *pi_data; 3042 void *chim = NULL; 3043 int pkt_hlen, pkt_size; 3044 3045 pkt = txd->rndis_pkt; 3046 pkt_size = HN_PKTSIZE(m_head, txr->hn_agg_align); 3047 if (pkt_size < txr->hn_chim_size) { 3048 chim = hn_try_txagg(ifp, txr, txd, pkt_size); 3049 if (chim != NULL) 3050 pkt = chim; 3051 } else { 3052 if (txr->hn_agg_txd != NULL) 3053 hn_flush_txagg(ifp, txr); 3054 } 3055 3056 pkt->rm_type = REMOTE_NDIS_PACKET_MSG; 3057 pkt->rm_len = m_head->m_pkthdr.len; 3058 pkt->rm_dataoffset = 0; 3059 pkt->rm_datalen = m_head->m_pkthdr.len; 3060 pkt->rm_oobdataoffset = 0; 3061 pkt->rm_oobdatalen = 0; 3062 pkt->rm_oobdataelements = 0; 3063 pkt->rm_pktinfooffset = sizeof(*pkt); 3064 pkt->rm_pktinfolen = 0; 3065 pkt->rm_vchandle = 0; 3066 pkt->rm_reserved = 0; 3067 3068 if (txr->hn_tx_flags & HN_TX_FLAG_HASHVAL) { 3069 /* 3070 * Set the hash value for this packet, so that the host could 3071 * dispatch the TX done event for this packet back to this TX 3072 * ring's channel. 3073 */ 3074 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 3075 HN_NDIS_HASH_VALUE_SIZE, HN_NDIS_PKTINFO_TYPE_HASHVAL); 3076 *pi_data = txr->hn_tx_idx; 3077 } 3078 3079 if (m_head->m_flags & M_VLANTAG) { 3080 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 3081 NDIS_VLAN_INFO_SIZE, NDIS_PKTINFO_TYPE_VLAN); 3082 *pi_data = NDIS_VLAN_INFO_MAKE( 3083 EVL_VLANOFTAG(m_head->m_pkthdr.ether_vtag), 3084 EVL_PRIOFTAG(m_head->m_pkthdr.ether_vtag), 3085 EVL_CFIOFTAG(m_head->m_pkthdr.ether_vtag)); 3086 } 3087 3088 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 3089 #if defined(INET6) || defined(INET) 3090 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 3091 NDIS_LSO2_INFO_SIZE, NDIS_PKTINFO_TYPE_LSO); 3092 #ifdef INET 3093 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) { 3094 *pi_data = NDIS_LSO2_INFO_MAKEIPV4( 3095 m_head->m_pkthdr.l2hlen + m_head->m_pkthdr.l3hlen, 3096 m_head->m_pkthdr.tso_segsz); 3097 } 3098 #endif 3099 #if defined(INET6) && defined(INET) 3100 else 3101 #endif 3102 #ifdef INET6 3103 { 3104 *pi_data = NDIS_LSO2_INFO_MAKEIPV6( 3105 m_head->m_pkthdr.l2hlen + m_head->m_pkthdr.l3hlen, 3106 m_head->m_pkthdr.tso_segsz); 3107 } 3108 #endif 3109 #endif /* INET6 || INET */ 3110 } else if (m_head->m_pkthdr.csum_flags & txr->hn_csum_assist) { 3111 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 3112 NDIS_TXCSUM_INFO_SIZE, NDIS_PKTINFO_TYPE_CSUM); 3113 if (m_head->m_pkthdr.csum_flags & 3114 (CSUM_IP6_TCP | CSUM_IP6_UDP)) { 3115 *pi_data = NDIS_TXCSUM_INFO_IPV6; 3116 } else { 3117 *pi_data = NDIS_TXCSUM_INFO_IPV4; 3118 if (m_head->m_pkthdr.csum_flags & CSUM_IP) 3119 *pi_data |= NDIS_TXCSUM_INFO_IPCS; 3120 } 3121 3122 if (m_head->m_pkthdr.csum_flags & 3123 (CSUM_IP_TCP | CSUM_IP6_TCP)) { 3124 *pi_data |= NDIS_TXCSUM_INFO_MKTCPCS( 3125 m_head->m_pkthdr.l2hlen + m_head->m_pkthdr.l3hlen); 3126 } else if (m_head->m_pkthdr.csum_flags & 3127 (CSUM_IP_UDP | CSUM_IP6_UDP)) { 3128 *pi_data |= NDIS_TXCSUM_INFO_MKUDPCS( 3129 m_head->m_pkthdr.l2hlen + m_head->m_pkthdr.l3hlen); 3130 } 3131 } 3132 3133 pkt_hlen = pkt->rm_pktinfooffset + pkt->rm_pktinfolen; 3134 /* Fixup RNDIS packet message total length */ 3135 pkt->rm_len += pkt_hlen; 3136 /* Convert RNDIS packet message offsets */ 3137 pkt->rm_dataoffset = hn_rndis_pktmsg_offset(pkt_hlen); 3138 pkt->rm_pktinfooffset = hn_rndis_pktmsg_offset(pkt->rm_pktinfooffset); 3139 3140 /* 3141 * Fast path: Chimney sending. 3142 */ 3143 if (chim != NULL) { 3144 struct hn_txdesc *tgt_txd = txd; 3145 3146 if (txr->hn_agg_txd != NULL) { 3147 tgt_txd = txr->hn_agg_txd; 3148 #ifdef INVARIANTS 3149 *m_head0 = NULL; 3150 #endif 3151 } 3152 3153 KASSERT(pkt == chim, 3154 ("RNDIS pkt not in chimney sending buffer")); 3155 KASSERT(tgt_txd->chim_index != HN_NVS_CHIM_IDX_INVALID, 3156 ("chimney sending buffer is not used")); 3157 tgt_txd->chim_size += pkt->rm_len; 3158 3159 m_copydata(m_head, 0, m_head->m_pkthdr.len, 3160 ((uint8_t *)chim) + pkt_hlen); 3161 3162 txr->hn_gpa_cnt = 0; 3163 txr->hn_sendpkt = hn_txpkt_chim; 3164 goto done; 3165 } 3166 3167 KASSERT(txr->hn_agg_txd == NULL, ("aggregating sglist txdesc")); 3168 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID, 3169 ("chimney buffer is used")); 3170 KASSERT(pkt == txd->rndis_pkt, ("RNDIS pkt not in txdesc")); 3171 3172 error = hn_txdesc_dmamap_load(txr, txd, &m_head, segs, &nsegs); 3173 if (__predict_false(error)) { 3174 int freed; 3175 3176 /* 3177 * This mbuf is not linked w/ the txd yet, so free it now. 3178 */ 3179 m_freem(m_head); 3180 *m_head0 = NULL; 3181 3182 freed = hn_txdesc_put(txr, txd); 3183 KASSERT(freed != 0, 3184 ("fail to free txd upon txdma error")); 3185 3186 txr->hn_txdma_failed++; 3187 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 3188 return error; 3189 } 3190 *m_head0 = m_head; 3191 3192 /* +1 RNDIS packet message */ 3193 txr->hn_gpa_cnt = nsegs + 1; 3194 3195 /* send packet with page buffer */ 3196 txr->hn_gpa[0].gpa_page = atop(txd->rndis_pkt_paddr); 3197 txr->hn_gpa[0].gpa_ofs = txd->rndis_pkt_paddr & PAGE_MASK; 3198 txr->hn_gpa[0].gpa_len = pkt_hlen; 3199 3200 /* 3201 * Fill the page buffers with mbuf info after the page 3202 * buffer for RNDIS packet message. 3203 */ 3204 for (i = 0; i < nsegs; ++i) { 3205 struct vmbus_gpa *gpa = &txr->hn_gpa[i + 1]; 3206 3207 gpa->gpa_page = atop(segs[i].ds_addr); 3208 gpa->gpa_ofs = segs[i].ds_addr & PAGE_MASK; 3209 gpa->gpa_len = segs[i].ds_len; 3210 } 3211 3212 txd->chim_index = HN_NVS_CHIM_IDX_INVALID; 3213 txd->chim_size = 0; 3214 txr->hn_sendpkt = hn_txpkt_sglist; 3215 done: 3216 txd->m = m_head; 3217 3218 /* Set the completion routine */ 3219 hn_nvs_sendctx_init(&txd->send_ctx, hn_txpkt_done, txd); 3220 3221 /* Update temporary stats for later use. */ 3222 txr->hn_stat_pkts++; 3223 txr->hn_stat_size += m_head->m_pkthdr.len; 3224 if (m_head->m_flags & M_MCAST) 3225 txr->hn_stat_mcasts++; 3226 3227 return 0; 3228 } 3229 3230 /* 3231 * NOTE: 3232 * If this function fails, then txd will be freed, but the mbuf 3233 * associated w/ the txd will _not_ be freed. 3234 */ 3235 static int 3236 hn_txpkt(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd) 3237 { 3238 int error, send_failed = 0, has_bpf; 3239 3240 again: 3241 has_bpf = bpf_peers_present(ifp->if_bpf); 3242 if (has_bpf) { 3243 /* 3244 * Make sure that this txd and any aggregated txds are not 3245 * freed before ETHER_BPF_MTAP. 3246 */ 3247 hn_txdesc_hold(txd); 3248 } 3249 error = txr->hn_sendpkt(txr, txd); 3250 if (!error) { 3251 if (has_bpf) { 3252 const struct hn_txdesc *tmp_txd; 3253 3254 ETHER_BPF_MTAP(ifp, txd->m); 3255 STAILQ_FOREACH(tmp_txd, &txd->agg_list, agg_link) 3256 ETHER_BPF_MTAP(ifp, tmp_txd->m); 3257 } 3258 3259 if_inc_counter(ifp, IFCOUNTER_OPACKETS, txr->hn_stat_pkts); 3260 #ifdef HN_IFSTART_SUPPORT 3261 if (!hn_use_if_start) 3262 #endif 3263 { 3264 if_inc_counter(ifp, IFCOUNTER_OBYTES, 3265 txr->hn_stat_size); 3266 if (txr->hn_stat_mcasts != 0) { 3267 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 3268 txr->hn_stat_mcasts); 3269 } 3270 } 3271 txr->hn_pkts += txr->hn_stat_pkts; 3272 txr->hn_sends++; 3273 } 3274 if (has_bpf) 3275 hn_txdesc_put(txr, txd); 3276 3277 if (__predict_false(error)) { 3278 int freed; 3279 3280 /* 3281 * This should "really rarely" happen. 3282 * 3283 * XXX Too many RX to be acked or too many sideband 3284 * commands to run? Ask netvsc_channel_rollup() 3285 * to kick start later. 3286 */ 3287 txr->hn_has_txeof = 1; 3288 if (!send_failed) { 3289 txr->hn_send_failed++; 3290 send_failed = 1; 3291 /* 3292 * Try sending again after set hn_has_txeof; 3293 * in case that we missed the last 3294 * netvsc_channel_rollup(). 3295 */ 3296 goto again; 3297 } 3298 if_printf(ifp, "send failed\n"); 3299 3300 /* 3301 * Caller will perform further processing on the 3302 * associated mbuf, so don't free it in hn_txdesc_put(); 3303 * only unload it from the DMA map in hn_txdesc_put(), 3304 * if it was loaded. 3305 */ 3306 txd->m = NULL; 3307 freed = hn_txdesc_put(txr, txd); 3308 KASSERT(freed != 0, 3309 ("fail to free txd upon send error")); 3310 3311 txr->hn_send_failed++; 3312 } 3313 3314 /* Reset temporary stats, after this sending is done. */ 3315 txr->hn_stat_size = 0; 3316 txr->hn_stat_pkts = 0; 3317 txr->hn_stat_mcasts = 0; 3318 3319 return (error); 3320 } 3321 3322 /* 3323 * Append the specified data to the indicated mbuf chain, 3324 * Extend the mbuf chain if the new data does not fit in 3325 * existing space. 3326 * 3327 * This is a minor rewrite of m_append() from sys/kern/uipc_mbuf.c. 3328 * There should be an equivalent in the kernel mbuf code, 3329 * but there does not appear to be one yet. 3330 * 3331 * Differs from m_append() in that additional mbufs are 3332 * allocated with cluster size MJUMPAGESIZE, and filled 3333 * accordingly. 3334 * 3335 * Return 1 if able to complete the job; otherwise 0. 3336 */ 3337 static int 3338 hv_m_append(struct mbuf *m0, int len, c_caddr_t cp) 3339 { 3340 struct mbuf *m, *n; 3341 int remainder, space; 3342 3343 for (m = m0; m->m_next != NULL; m = m->m_next) 3344 ; 3345 remainder = len; 3346 space = M_TRAILINGSPACE(m); 3347 if (space > 0) { 3348 /* 3349 * Copy into available space. 3350 */ 3351 if (space > remainder) 3352 space = remainder; 3353 bcopy(cp, mtod(m, caddr_t) + m->m_len, space); 3354 m->m_len += space; 3355 cp += space; 3356 remainder -= space; 3357 } 3358 while (remainder > 0) { 3359 /* 3360 * Allocate a new mbuf; could check space 3361 * and allocate a cluster instead. 3362 */ 3363 n = m_getjcl(M_NOWAIT, m->m_type, 0, MJUMPAGESIZE); 3364 if (n == NULL) 3365 break; 3366 n->m_len = min(MJUMPAGESIZE, remainder); 3367 bcopy(cp, mtod(n, caddr_t), n->m_len); 3368 cp += n->m_len; 3369 remainder -= n->m_len; 3370 m->m_next = n; 3371 m = n; 3372 } 3373 if (m0->m_flags & M_PKTHDR) 3374 m0->m_pkthdr.len += len - remainder; 3375 3376 return (remainder == 0); 3377 } 3378 3379 #if defined(INET) || defined(INET6) 3380 static __inline int 3381 hn_lro_rx(struct lro_ctrl *lc, struct mbuf *m) 3382 { 3383 #if __FreeBSD_version >= 1100095 3384 if (hn_lro_mbufq_depth) { 3385 tcp_lro_queue_mbuf(lc, m); 3386 return 0; 3387 } 3388 #endif 3389 return tcp_lro_rx(lc, m, 0); 3390 } 3391 #endif 3392 3393 static int 3394 hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen, 3395 const struct hn_rxinfo *info) 3396 { 3397 struct ifnet *ifp, *hn_ifp = rxr->hn_ifp; 3398 struct mbuf *m_new; 3399 int size, do_lro = 0, do_csum = 1, is_vf = 0; 3400 int hash_type = M_HASHTYPE_NONE; 3401 int l3proto = ETHERTYPE_MAX, l4proto = IPPROTO_DONE; 3402 3403 ifp = hn_ifp; 3404 if (rxr->hn_rxvf_ifp != NULL) { 3405 /* 3406 * Non-transparent mode VF; pretend this packet is from 3407 * the VF. 3408 */ 3409 ifp = rxr->hn_rxvf_ifp; 3410 is_vf = 1; 3411 } else if (rxr->hn_rx_flags & HN_RX_FLAG_XPNT_VF) { 3412 /* Transparent mode VF. */ 3413 is_vf = 1; 3414 } 3415 3416 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 3417 /* 3418 * NOTE: 3419 * See the NOTE of hn_rndis_init_fixat(). This 3420 * function can be reached, immediately after the 3421 * RNDIS is initialized but before the ifnet is 3422 * setup on the hn_attach() path; drop the unexpected 3423 * packets. 3424 */ 3425 return (0); 3426 } 3427 3428 if (__predict_false(dlen < ETHER_HDR_LEN)) { 3429 if_inc_counter(hn_ifp, IFCOUNTER_IERRORS, 1); 3430 return (0); 3431 } 3432 3433 if (dlen <= MHLEN) { 3434 m_new = m_gethdr(M_NOWAIT, MT_DATA); 3435 if (m_new == NULL) { 3436 if_inc_counter(hn_ifp, IFCOUNTER_IQDROPS, 1); 3437 return (0); 3438 } 3439 memcpy(mtod(m_new, void *), data, dlen); 3440 m_new->m_pkthdr.len = m_new->m_len = dlen; 3441 rxr->hn_small_pkts++; 3442 } else { 3443 /* 3444 * Get an mbuf with a cluster. For packets 2K or less, 3445 * get a standard 2K cluster. For anything larger, get a 3446 * 4K cluster. Any buffers larger than 4K can cause problems 3447 * if looped around to the Hyper-V TX channel, so avoid them. 3448 */ 3449 size = MCLBYTES; 3450 if (dlen > MCLBYTES) { 3451 /* 4096 */ 3452 size = MJUMPAGESIZE; 3453 } 3454 3455 m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size); 3456 if (m_new == NULL) { 3457 if_inc_counter(hn_ifp, IFCOUNTER_IQDROPS, 1); 3458 return (0); 3459 } 3460 3461 hv_m_append(m_new, dlen, data); 3462 } 3463 m_new->m_pkthdr.rcvif = ifp; 3464 3465 if (__predict_false((hn_ifp->if_capenable & IFCAP_RXCSUM) == 0)) 3466 do_csum = 0; 3467 3468 /* receive side checksum offload */ 3469 if (info->csum_info != HN_NDIS_RXCSUM_INFO_INVALID) { 3470 /* IP csum offload */ 3471 if ((info->csum_info & NDIS_RXCSUM_INFO_IPCS_OK) && do_csum) { 3472 m_new->m_pkthdr.csum_flags |= 3473 (CSUM_IP_CHECKED | CSUM_IP_VALID); 3474 rxr->hn_csum_ip++; 3475 } 3476 3477 /* TCP/UDP csum offload */ 3478 if ((info->csum_info & (NDIS_RXCSUM_INFO_UDPCS_OK | 3479 NDIS_RXCSUM_INFO_TCPCS_OK)) && do_csum) { 3480 m_new->m_pkthdr.csum_flags |= 3481 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 3482 m_new->m_pkthdr.csum_data = 0xffff; 3483 if (info->csum_info & NDIS_RXCSUM_INFO_TCPCS_OK) 3484 rxr->hn_csum_tcp++; 3485 else 3486 rxr->hn_csum_udp++; 3487 } 3488 3489 /* 3490 * XXX 3491 * As of this write (Oct 28th, 2016), host side will turn 3492 * on only TCPCS_OK and IPCS_OK even for UDP datagrams, so 3493 * the do_lro setting here is actually _not_ accurate. We 3494 * depend on the RSS hash type check to reset do_lro. 3495 */ 3496 if ((info->csum_info & 3497 (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) == 3498 (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) 3499 do_lro = 1; 3500 } else { 3501 hn_rxpkt_proto(m_new, &l3proto, &l4proto); 3502 if (l3proto == ETHERTYPE_IP) { 3503 if (l4proto == IPPROTO_TCP) { 3504 if (do_csum && 3505 (rxr->hn_trust_hcsum & 3506 HN_TRUST_HCSUM_TCP)) { 3507 rxr->hn_csum_trusted++; 3508 m_new->m_pkthdr.csum_flags |= 3509 (CSUM_IP_CHECKED | CSUM_IP_VALID | 3510 CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 3511 m_new->m_pkthdr.csum_data = 0xffff; 3512 } 3513 do_lro = 1; 3514 } else if (l4proto == IPPROTO_UDP) { 3515 if (do_csum && 3516 (rxr->hn_trust_hcsum & 3517 HN_TRUST_HCSUM_UDP)) { 3518 rxr->hn_csum_trusted++; 3519 m_new->m_pkthdr.csum_flags |= 3520 (CSUM_IP_CHECKED | CSUM_IP_VALID | 3521 CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 3522 m_new->m_pkthdr.csum_data = 0xffff; 3523 } 3524 } else if (l4proto != IPPROTO_DONE && do_csum && 3525 (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_IP)) { 3526 rxr->hn_csum_trusted++; 3527 m_new->m_pkthdr.csum_flags |= 3528 (CSUM_IP_CHECKED | CSUM_IP_VALID); 3529 } 3530 } 3531 } 3532 3533 if (info->vlan_info != HN_NDIS_VLAN_INFO_INVALID) { 3534 m_new->m_pkthdr.ether_vtag = EVL_MAKETAG( 3535 NDIS_VLAN_INFO_ID(info->vlan_info), 3536 NDIS_VLAN_INFO_PRI(info->vlan_info), 3537 NDIS_VLAN_INFO_CFI(info->vlan_info)); 3538 m_new->m_flags |= M_VLANTAG; 3539 } 3540 3541 /* 3542 * If VF is activated (tranparent/non-transparent mode does not 3543 * matter here). 3544 * 3545 * - Disable LRO 3546 * 3547 * hn(4) will only receive broadcast packets, multicast packets, 3548 * TCP SYN and SYN|ACK (in Azure), LRO is useless for these 3549 * packet types. 3550 * 3551 * For non-transparent, we definitely _cannot_ enable LRO at 3552 * all, since the LRO flush will use hn(4) as the receiving 3553 * interface; i.e. hn_ifp->if_input(hn_ifp, m). 3554 */ 3555 if (is_vf) 3556 do_lro = 0; 3557 3558 /* 3559 * If VF is activated (tranparent/non-transparent mode does not 3560 * matter here), do _not_ mess with unsupported hash types or 3561 * functions. 3562 */ 3563 if (info->hash_info != HN_NDIS_HASH_INFO_INVALID) { 3564 rxr->hn_rss_pkts++; 3565 m_new->m_pkthdr.flowid = info->hash_value; 3566 if (!is_vf) 3567 hash_type = M_HASHTYPE_OPAQUE_HASH; 3568 if ((info->hash_info & NDIS_HASH_FUNCTION_MASK) == 3569 NDIS_HASH_FUNCTION_TOEPLITZ) { 3570 uint32_t type = (info->hash_info & NDIS_HASH_TYPE_MASK & 3571 rxr->hn_mbuf_hash); 3572 3573 /* 3574 * NOTE: 3575 * do_lro is resetted, if the hash types are not TCP 3576 * related. See the comment in the above csum_flags 3577 * setup section. 3578 */ 3579 switch (type) { 3580 case NDIS_HASH_IPV4: 3581 hash_type = M_HASHTYPE_RSS_IPV4; 3582 do_lro = 0; 3583 break; 3584 3585 case NDIS_HASH_TCP_IPV4: 3586 hash_type = M_HASHTYPE_RSS_TCP_IPV4; 3587 if (rxr->hn_rx_flags & HN_RX_FLAG_UDP_HASH) { 3588 int def_htype = M_HASHTYPE_OPAQUE_HASH; 3589 3590 if (is_vf) 3591 def_htype = M_HASHTYPE_NONE; 3592 3593 /* 3594 * UDP 4-tuple hash is delivered as 3595 * TCP 4-tuple hash. 3596 */ 3597 if (l3proto == ETHERTYPE_MAX) { 3598 hn_rxpkt_proto(m_new, 3599 &l3proto, &l4proto); 3600 } 3601 if (l3proto == ETHERTYPE_IP) { 3602 if (l4proto == IPPROTO_UDP && 3603 (rxr->hn_mbuf_hash & 3604 NDIS_HASH_UDP_IPV4_X)) { 3605 hash_type = 3606 M_HASHTYPE_RSS_UDP_IPV4; 3607 do_lro = 0; 3608 } else if (l4proto != 3609 IPPROTO_TCP) { 3610 hash_type = def_htype; 3611 do_lro = 0; 3612 } 3613 } else { 3614 hash_type = def_htype; 3615 do_lro = 0; 3616 } 3617 } 3618 break; 3619 3620 case NDIS_HASH_IPV6: 3621 hash_type = M_HASHTYPE_RSS_IPV6; 3622 do_lro = 0; 3623 break; 3624 3625 case NDIS_HASH_IPV6_EX: 3626 hash_type = M_HASHTYPE_RSS_IPV6_EX; 3627 do_lro = 0; 3628 break; 3629 3630 case NDIS_HASH_TCP_IPV6: 3631 hash_type = M_HASHTYPE_RSS_TCP_IPV6; 3632 break; 3633 3634 case NDIS_HASH_TCP_IPV6_EX: 3635 hash_type = M_HASHTYPE_RSS_TCP_IPV6_EX; 3636 break; 3637 } 3638 } 3639 } else if (!is_vf) { 3640 m_new->m_pkthdr.flowid = rxr->hn_rx_idx; 3641 hash_type = M_HASHTYPE_OPAQUE; 3642 } 3643 M_HASHTYPE_SET(m_new, hash_type); 3644 3645 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); 3646 if (hn_ifp != ifp) { 3647 const struct ether_header *eh; 3648 3649 /* 3650 * Non-transparent mode VF is activated. 3651 */ 3652 3653 /* 3654 * Allow tapping on hn(4). 3655 */ 3656 ETHER_BPF_MTAP(hn_ifp, m_new); 3657 3658 /* 3659 * Update hn(4)'s stats. 3660 */ 3661 if_inc_counter(hn_ifp, IFCOUNTER_IPACKETS, 1); 3662 if_inc_counter(hn_ifp, IFCOUNTER_IBYTES, m_new->m_pkthdr.len); 3663 /* Checked at the beginning of this function. */ 3664 KASSERT(m_new->m_len >= ETHER_HDR_LEN, ("not ethernet frame")); 3665 eh = mtod(m_new, struct ether_header *); 3666 if (ETHER_IS_MULTICAST(eh->ether_dhost)) 3667 if_inc_counter(hn_ifp, IFCOUNTER_IMCASTS, 1); 3668 } 3669 rxr->hn_pkts++; 3670 3671 if ((hn_ifp->if_capenable & IFCAP_LRO) && do_lro) { 3672 #if defined(INET) || defined(INET6) 3673 struct lro_ctrl *lro = &rxr->hn_lro; 3674 3675 if (lro->lro_cnt) { 3676 rxr->hn_lro_tried++; 3677 if (hn_lro_rx(lro, m_new) == 0) { 3678 /* DONE! */ 3679 return 0; 3680 } 3681 } 3682 #endif 3683 } 3684 ifp->if_input(ifp, m_new); 3685 3686 return (0); 3687 } 3688 3689 static int 3690 hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 3691 { 3692 struct hn_softc *sc = ifp->if_softc; 3693 struct ifreq *ifr = (struct ifreq *)data, ifr_vf; 3694 struct ifnet *vf_ifp; 3695 int mask, error = 0; 3696 struct ifrsskey *ifrk; 3697 struct ifrsshash *ifrh; 3698 uint32_t mtu; 3699 3700 switch (cmd) { 3701 case SIOCSIFMTU: 3702 if (ifr->ifr_mtu > HN_MTU_MAX) { 3703 error = EINVAL; 3704 break; 3705 } 3706 3707 HN_LOCK(sc); 3708 3709 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) { 3710 HN_UNLOCK(sc); 3711 break; 3712 } 3713 3714 if ((sc->hn_caps & HN_CAP_MTU) == 0) { 3715 /* Can't change MTU */ 3716 HN_UNLOCK(sc); 3717 error = EOPNOTSUPP; 3718 break; 3719 } 3720 3721 if (ifp->if_mtu == ifr->ifr_mtu) { 3722 HN_UNLOCK(sc); 3723 break; 3724 } 3725 3726 if (hn_xpnt_vf_isready(sc)) { 3727 vf_ifp = sc->hn_vf_ifp; 3728 ifr_vf = *ifr; 3729 strlcpy(ifr_vf.ifr_name, vf_ifp->if_xname, 3730 sizeof(ifr_vf.ifr_name)); 3731 error = vf_ifp->if_ioctl(vf_ifp, SIOCSIFMTU, 3732 (caddr_t)&ifr_vf); 3733 if (error) { 3734 HN_UNLOCK(sc); 3735 if_printf(ifp, "%s SIOCSIFMTU %d failed: %d\n", 3736 vf_ifp->if_xname, ifr->ifr_mtu, error); 3737 break; 3738 } 3739 } 3740 3741 /* 3742 * Suspend this interface before the synthetic parts 3743 * are ripped. 3744 */ 3745 hn_suspend(sc); 3746 3747 /* 3748 * Detach the synthetics parts, i.e. NVS and RNDIS. 3749 */ 3750 hn_synth_detach(sc); 3751 3752 /* 3753 * Reattach the synthetic parts, i.e. NVS and RNDIS, 3754 * with the new MTU setting. 3755 */ 3756 error = hn_synth_attach(sc, ifr->ifr_mtu); 3757 if (error) { 3758 HN_UNLOCK(sc); 3759 break; 3760 } 3761 3762 error = hn_rndis_get_mtu(sc, &mtu); 3763 if (error) 3764 mtu = ifr->ifr_mtu; 3765 else if (bootverbose) 3766 if_printf(ifp, "RNDIS mtu %u\n", mtu); 3767 3768 /* 3769 * Commit the requested MTU, after the synthetic parts 3770 * have been successfully attached. 3771 */ 3772 if (mtu >= ifr->ifr_mtu) { 3773 mtu = ifr->ifr_mtu; 3774 } else { 3775 if_printf(ifp, "fixup mtu %d -> %u\n", 3776 ifr->ifr_mtu, mtu); 3777 } 3778 ifp->if_mtu = mtu; 3779 3780 /* 3781 * Synthetic parts' reattach may change the chimney 3782 * sending size; update it. 3783 */ 3784 if (sc->hn_tx_ring[0].hn_chim_size > sc->hn_chim_szmax) 3785 hn_set_chim_size(sc, sc->hn_chim_szmax); 3786 3787 /* 3788 * Make sure that various parameters based on MTU are 3789 * still valid, after the MTU change. 3790 */ 3791 hn_mtu_change_fixup(sc); 3792 3793 /* 3794 * All done! Resume the interface now. 3795 */ 3796 hn_resume(sc); 3797 3798 if ((sc->hn_flags & HN_FLAG_RXVF) || 3799 (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) { 3800 /* 3801 * Since we have reattached the NVS part, 3802 * change the datapath to VF again; in case 3803 * that it is lost, after the NVS was detached. 3804 */ 3805 hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_VF); 3806 } 3807 3808 HN_UNLOCK(sc); 3809 break; 3810 3811 case SIOCSIFFLAGS: 3812 HN_LOCK(sc); 3813 3814 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) { 3815 HN_UNLOCK(sc); 3816 break; 3817 } 3818 3819 if (hn_xpnt_vf_isready(sc)) 3820 hn_xpnt_vf_saveifflags(sc); 3821 3822 if (ifp->if_flags & IFF_UP) { 3823 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 3824 /* 3825 * Caller meight hold mutex, e.g. 3826 * bpf; use busy-wait for the RNDIS 3827 * reply. 3828 */ 3829 HN_NO_SLEEPING(sc); 3830 hn_rxfilter_config(sc); 3831 HN_SLEEPING_OK(sc); 3832 3833 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) 3834 error = hn_xpnt_vf_iocsetflags(sc); 3835 } else { 3836 hn_init_locked(sc); 3837 } 3838 } else { 3839 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 3840 hn_stop(sc, false); 3841 } 3842 sc->hn_if_flags = ifp->if_flags; 3843 3844 HN_UNLOCK(sc); 3845 break; 3846 3847 case SIOCSIFCAP: 3848 HN_LOCK(sc); 3849 3850 if (hn_xpnt_vf_isready(sc)) { 3851 ifr_vf = *ifr; 3852 strlcpy(ifr_vf.ifr_name, sc->hn_vf_ifp->if_xname, 3853 sizeof(ifr_vf.ifr_name)); 3854 error = hn_xpnt_vf_iocsetcaps(sc, &ifr_vf); 3855 HN_UNLOCK(sc); 3856 break; 3857 } 3858 3859 /* 3860 * Fix up requested capabilities w/ supported capabilities, 3861 * since the supported capabilities could have been changed. 3862 */ 3863 mask = (ifr->ifr_reqcap & ifp->if_capabilities) ^ 3864 ifp->if_capenable; 3865 3866 if (mask & IFCAP_TXCSUM) { 3867 ifp->if_capenable ^= IFCAP_TXCSUM; 3868 if (ifp->if_capenable & IFCAP_TXCSUM) 3869 ifp->if_hwassist |= HN_CSUM_IP_HWASSIST(sc); 3870 else 3871 ifp->if_hwassist &= ~HN_CSUM_IP_HWASSIST(sc); 3872 } 3873 if (mask & IFCAP_TXCSUM_IPV6) { 3874 ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; 3875 if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) 3876 ifp->if_hwassist |= HN_CSUM_IP6_HWASSIST(sc); 3877 else 3878 ifp->if_hwassist &= ~HN_CSUM_IP6_HWASSIST(sc); 3879 } 3880 3881 /* TODO: flip RNDIS offload parameters for RXCSUM. */ 3882 if (mask & IFCAP_RXCSUM) 3883 ifp->if_capenable ^= IFCAP_RXCSUM; 3884 #ifdef foo 3885 /* We can't diff IPv6 packets from IPv4 packets on RX path. */ 3886 if (mask & IFCAP_RXCSUM_IPV6) 3887 ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; 3888 #endif 3889 3890 if (mask & IFCAP_LRO) 3891 ifp->if_capenable ^= IFCAP_LRO; 3892 3893 if (mask & IFCAP_TSO4) { 3894 ifp->if_capenable ^= IFCAP_TSO4; 3895 if (ifp->if_capenable & IFCAP_TSO4) 3896 ifp->if_hwassist |= CSUM_IP_TSO; 3897 else 3898 ifp->if_hwassist &= ~CSUM_IP_TSO; 3899 } 3900 if (mask & IFCAP_TSO6) { 3901 ifp->if_capenable ^= IFCAP_TSO6; 3902 if (ifp->if_capenable & IFCAP_TSO6) 3903 ifp->if_hwassist |= CSUM_IP6_TSO; 3904 else 3905 ifp->if_hwassist &= ~CSUM_IP6_TSO; 3906 } 3907 3908 HN_UNLOCK(sc); 3909 break; 3910 3911 case SIOCADDMULTI: 3912 case SIOCDELMULTI: 3913 HN_LOCK(sc); 3914 3915 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) { 3916 HN_UNLOCK(sc); 3917 break; 3918 } 3919 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 3920 /* 3921 * Multicast uses mutex; use busy-wait for 3922 * the RNDIS reply. 3923 */ 3924 HN_NO_SLEEPING(sc); 3925 hn_rxfilter_config(sc); 3926 HN_SLEEPING_OK(sc); 3927 } 3928 3929 /* XXX vlan(4) style mcast addr maintenance */ 3930 if (hn_xpnt_vf_isready(sc)) { 3931 int old_if_flags; 3932 3933 old_if_flags = sc->hn_vf_ifp->if_flags; 3934 hn_xpnt_vf_saveifflags(sc); 3935 3936 if ((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) && 3937 ((old_if_flags ^ sc->hn_vf_ifp->if_flags) & 3938 IFF_ALLMULTI)) 3939 error = hn_xpnt_vf_iocsetflags(sc); 3940 } 3941 3942 HN_UNLOCK(sc); 3943 break; 3944 3945 case SIOCSIFMEDIA: 3946 case SIOCGIFMEDIA: 3947 HN_LOCK(sc); 3948 if (hn_xpnt_vf_isready(sc)) { 3949 /* 3950 * SIOCGIFMEDIA expects ifmediareq, so don't 3951 * create and pass ifr_vf to the VF here; just 3952 * replace the ifr_name. 3953 */ 3954 vf_ifp = sc->hn_vf_ifp; 3955 strlcpy(ifr->ifr_name, vf_ifp->if_xname, 3956 sizeof(ifr->ifr_name)); 3957 error = vf_ifp->if_ioctl(vf_ifp, cmd, data); 3958 /* Restore the ifr_name. */ 3959 strlcpy(ifr->ifr_name, ifp->if_xname, 3960 sizeof(ifr->ifr_name)); 3961 HN_UNLOCK(sc); 3962 break; 3963 } 3964 HN_UNLOCK(sc); 3965 error = ifmedia_ioctl(ifp, ifr, &sc->hn_media, cmd); 3966 break; 3967 3968 case SIOCGIFRSSHASH: 3969 ifrh = (struct ifrsshash *)data; 3970 HN_LOCK(sc); 3971 if (sc->hn_rx_ring_inuse == 1) { 3972 HN_UNLOCK(sc); 3973 ifrh->ifrh_func = RSS_FUNC_NONE; 3974 ifrh->ifrh_types = 0; 3975 break; 3976 } 3977 3978 if (sc->hn_rss_hash & NDIS_HASH_FUNCTION_TOEPLITZ) 3979 ifrh->ifrh_func = RSS_FUNC_TOEPLITZ; 3980 else 3981 ifrh->ifrh_func = RSS_FUNC_PRIVATE; 3982 ifrh->ifrh_types = hn_rss_type_fromndis(sc->hn_rss_hash); 3983 HN_UNLOCK(sc); 3984 break; 3985 3986 case SIOCGIFRSSKEY: 3987 ifrk = (struct ifrsskey *)data; 3988 HN_LOCK(sc); 3989 if (sc->hn_rx_ring_inuse == 1) { 3990 HN_UNLOCK(sc); 3991 ifrk->ifrk_func = RSS_FUNC_NONE; 3992 ifrk->ifrk_keylen = 0; 3993 break; 3994 } 3995 if (sc->hn_rss_hash & NDIS_HASH_FUNCTION_TOEPLITZ) 3996 ifrk->ifrk_func = RSS_FUNC_TOEPLITZ; 3997 else 3998 ifrk->ifrk_func = RSS_FUNC_PRIVATE; 3999 ifrk->ifrk_keylen = NDIS_HASH_KEYSIZE_TOEPLITZ; 4000 memcpy(ifrk->ifrk_key, sc->hn_rss.rss_key, 4001 NDIS_HASH_KEYSIZE_TOEPLITZ); 4002 HN_UNLOCK(sc); 4003 break; 4004 4005 default: 4006 error = ether_ioctl(ifp, cmd, data); 4007 break; 4008 } 4009 return (error); 4010 } 4011 4012 static void 4013 hn_stop(struct hn_softc *sc, bool detaching) 4014 { 4015 struct ifnet *ifp = sc->hn_ifp; 4016 int i; 4017 4018 HN_LOCK_ASSERT(sc); 4019 4020 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, 4021 ("synthetic parts were not attached")); 4022 4023 /* Clear RUNNING bit ASAP. */ 4024 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_RUNNING); 4025 4026 /* Disable polling. */ 4027 hn_polling(sc, 0); 4028 4029 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) { 4030 KASSERT(sc->hn_vf_ifp != NULL, 4031 ("%s: VF is not attached", ifp->if_xname)); 4032 4033 /* Mark transparent mode VF as disabled. */ 4034 hn_xpnt_vf_setdisable(sc, false /* keep hn_vf_ifp */); 4035 4036 /* 4037 * NOTE: 4038 * Datapath setting must happen _before_ bringing 4039 * the VF down. 4040 */ 4041 hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_SYNTH); 4042 4043 /* 4044 * Bring the VF down. 4045 */ 4046 hn_xpnt_vf_saveifflags(sc); 4047 sc->hn_vf_ifp->if_flags &= ~IFF_UP; 4048 hn_xpnt_vf_iocsetflags(sc); 4049 } 4050 4051 /* Suspend data transfers. */ 4052 hn_suspend_data(sc); 4053 4054 /* Clear OACTIVE bit. */ 4055 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 4056 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) 4057 sc->hn_tx_ring[i].hn_oactive = 0; 4058 4059 /* 4060 * If the non-transparent mode VF is active, make sure 4061 * that the RX filter still allows packet reception. 4062 */ 4063 if (!detaching && (sc->hn_flags & HN_FLAG_RXVF)) 4064 hn_rxfilter_config(sc); 4065 } 4066 4067 static void 4068 hn_init_locked(struct hn_softc *sc) 4069 { 4070 struct ifnet *ifp = sc->hn_ifp; 4071 int i; 4072 4073 HN_LOCK_ASSERT(sc); 4074 4075 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) 4076 return; 4077 4078 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 4079 return; 4080 4081 /* Configure RX filter */ 4082 hn_rxfilter_config(sc); 4083 4084 /* Clear OACTIVE bit. */ 4085 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 4086 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) 4087 sc->hn_tx_ring[i].hn_oactive = 0; 4088 4089 /* Clear TX 'suspended' bit. */ 4090 hn_resume_tx(sc, sc->hn_tx_ring_inuse); 4091 4092 if (hn_xpnt_vf_isready(sc)) { 4093 /* Initialize transparent VF. */ 4094 hn_xpnt_vf_init(sc); 4095 } 4096 4097 /* Everything is ready; unleash! */ 4098 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_RUNNING); 4099 4100 /* Re-enable polling if requested. */ 4101 if (sc->hn_pollhz > 0) 4102 hn_polling(sc, sc->hn_pollhz); 4103 } 4104 4105 static void 4106 hn_init(void *xsc) 4107 { 4108 struct hn_softc *sc = xsc; 4109 4110 HN_LOCK(sc); 4111 hn_init_locked(sc); 4112 HN_UNLOCK(sc); 4113 } 4114 4115 #if __FreeBSD_version >= 1100099 4116 4117 static int 4118 hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS) 4119 { 4120 struct hn_softc *sc = arg1; 4121 unsigned int lenlim; 4122 int error; 4123 4124 lenlim = sc->hn_rx_ring[0].hn_lro.lro_length_lim; 4125 error = sysctl_handle_int(oidp, &lenlim, 0, req); 4126 if (error || req->newptr == NULL) 4127 return error; 4128 4129 HN_LOCK(sc); 4130 if (lenlim < HN_LRO_LENLIM_MIN(sc->hn_ifp) || 4131 lenlim > TCP_LRO_LENGTH_MAX) { 4132 HN_UNLOCK(sc); 4133 return EINVAL; 4134 } 4135 hn_set_lro_lenlim(sc, lenlim); 4136 HN_UNLOCK(sc); 4137 4138 return 0; 4139 } 4140 4141 static int 4142 hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS) 4143 { 4144 struct hn_softc *sc = arg1; 4145 int ackcnt, error, i; 4146 4147 /* 4148 * lro_ackcnt_lim is append count limit, 4149 * +1 to turn it into aggregation limit. 4150 */ 4151 ackcnt = sc->hn_rx_ring[0].hn_lro.lro_ackcnt_lim + 1; 4152 error = sysctl_handle_int(oidp, &ackcnt, 0, req); 4153 if (error || req->newptr == NULL) 4154 return error; 4155 4156 if (ackcnt < 2 || ackcnt > (TCP_LRO_ACKCNT_MAX + 1)) 4157 return EINVAL; 4158 4159 /* 4160 * Convert aggregation limit back to append 4161 * count limit. 4162 */ 4163 --ackcnt; 4164 HN_LOCK(sc); 4165 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 4166 sc->hn_rx_ring[i].hn_lro.lro_ackcnt_lim = ackcnt; 4167 HN_UNLOCK(sc); 4168 return 0; 4169 } 4170 4171 #endif 4172 4173 static int 4174 hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS) 4175 { 4176 struct hn_softc *sc = arg1; 4177 int hcsum = arg2; 4178 int on, error, i; 4179 4180 on = 0; 4181 if (sc->hn_rx_ring[0].hn_trust_hcsum & hcsum) 4182 on = 1; 4183 4184 error = sysctl_handle_int(oidp, &on, 0, req); 4185 if (error || req->newptr == NULL) 4186 return error; 4187 4188 HN_LOCK(sc); 4189 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4190 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 4191 4192 if (on) 4193 rxr->hn_trust_hcsum |= hcsum; 4194 else 4195 rxr->hn_trust_hcsum &= ~hcsum; 4196 } 4197 HN_UNLOCK(sc); 4198 return 0; 4199 } 4200 4201 static int 4202 hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS) 4203 { 4204 struct hn_softc *sc = arg1; 4205 int chim_size, error; 4206 4207 chim_size = sc->hn_tx_ring[0].hn_chim_size; 4208 error = sysctl_handle_int(oidp, &chim_size, 0, req); 4209 if (error || req->newptr == NULL) 4210 return error; 4211 4212 if (chim_size > sc->hn_chim_szmax || chim_size <= 0) 4213 return EINVAL; 4214 4215 HN_LOCK(sc); 4216 hn_set_chim_size(sc, chim_size); 4217 HN_UNLOCK(sc); 4218 return 0; 4219 } 4220 4221 #if __FreeBSD_version < 1100095 4222 static int 4223 hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS) 4224 { 4225 struct hn_softc *sc = arg1; 4226 int ofs = arg2, i, error; 4227 struct hn_rx_ring *rxr; 4228 uint64_t stat; 4229 4230 stat = 0; 4231 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4232 rxr = &sc->hn_rx_ring[i]; 4233 stat += *((int *)((uint8_t *)rxr + ofs)); 4234 } 4235 4236 error = sysctl_handle_64(oidp, &stat, 0, req); 4237 if (error || req->newptr == NULL) 4238 return error; 4239 4240 /* Zero out this stat. */ 4241 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4242 rxr = &sc->hn_rx_ring[i]; 4243 *((int *)((uint8_t *)rxr + ofs)) = 0; 4244 } 4245 return 0; 4246 } 4247 #else 4248 static int 4249 hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS) 4250 { 4251 struct hn_softc *sc = arg1; 4252 int ofs = arg2, i, error; 4253 struct hn_rx_ring *rxr; 4254 uint64_t stat; 4255 4256 stat = 0; 4257 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4258 rxr = &sc->hn_rx_ring[i]; 4259 stat += *((uint64_t *)((uint8_t *)rxr + ofs)); 4260 } 4261 4262 error = sysctl_handle_64(oidp, &stat, 0, req); 4263 if (error || req->newptr == NULL) 4264 return error; 4265 4266 /* Zero out this stat. */ 4267 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4268 rxr = &sc->hn_rx_ring[i]; 4269 *((uint64_t *)((uint8_t *)rxr + ofs)) = 0; 4270 } 4271 return 0; 4272 } 4273 4274 #endif 4275 4276 static int 4277 hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS) 4278 { 4279 struct hn_softc *sc = arg1; 4280 int ofs = arg2, i, error; 4281 struct hn_rx_ring *rxr; 4282 u_long stat; 4283 4284 stat = 0; 4285 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4286 rxr = &sc->hn_rx_ring[i]; 4287 stat += *((u_long *)((uint8_t *)rxr + ofs)); 4288 } 4289 4290 error = sysctl_handle_long(oidp, &stat, 0, req); 4291 if (error || req->newptr == NULL) 4292 return error; 4293 4294 /* Zero out this stat. */ 4295 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4296 rxr = &sc->hn_rx_ring[i]; 4297 *((u_long *)((uint8_t *)rxr + ofs)) = 0; 4298 } 4299 return 0; 4300 } 4301 4302 static int 4303 hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS) 4304 { 4305 struct hn_softc *sc = arg1; 4306 int ofs = arg2, i, error; 4307 struct hn_tx_ring *txr; 4308 u_long stat; 4309 4310 stat = 0; 4311 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 4312 txr = &sc->hn_tx_ring[i]; 4313 stat += *((u_long *)((uint8_t *)txr + ofs)); 4314 } 4315 4316 error = sysctl_handle_long(oidp, &stat, 0, req); 4317 if (error || req->newptr == NULL) 4318 return error; 4319 4320 /* Zero out this stat. */ 4321 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 4322 txr = &sc->hn_tx_ring[i]; 4323 *((u_long *)((uint8_t *)txr + ofs)) = 0; 4324 } 4325 return 0; 4326 } 4327 4328 static int 4329 hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS) 4330 { 4331 struct hn_softc *sc = arg1; 4332 int ofs = arg2, i, error, conf; 4333 struct hn_tx_ring *txr; 4334 4335 txr = &sc->hn_tx_ring[0]; 4336 conf = *((int *)((uint8_t *)txr + ofs)); 4337 4338 error = sysctl_handle_int(oidp, &conf, 0, req); 4339 if (error || req->newptr == NULL) 4340 return error; 4341 4342 HN_LOCK(sc); 4343 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 4344 txr = &sc->hn_tx_ring[i]; 4345 *((int *)((uint8_t *)txr + ofs)) = conf; 4346 } 4347 HN_UNLOCK(sc); 4348 4349 return 0; 4350 } 4351 4352 static int 4353 hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS) 4354 { 4355 struct hn_softc *sc = arg1; 4356 int error, size; 4357 4358 size = sc->hn_agg_size; 4359 error = sysctl_handle_int(oidp, &size, 0, req); 4360 if (error || req->newptr == NULL) 4361 return (error); 4362 4363 HN_LOCK(sc); 4364 sc->hn_agg_size = size; 4365 hn_set_txagg(sc); 4366 HN_UNLOCK(sc); 4367 4368 return (0); 4369 } 4370 4371 static int 4372 hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS) 4373 { 4374 struct hn_softc *sc = arg1; 4375 int error, pkts; 4376 4377 pkts = sc->hn_agg_pkts; 4378 error = sysctl_handle_int(oidp, &pkts, 0, req); 4379 if (error || req->newptr == NULL) 4380 return (error); 4381 4382 HN_LOCK(sc); 4383 sc->hn_agg_pkts = pkts; 4384 hn_set_txagg(sc); 4385 HN_UNLOCK(sc); 4386 4387 return (0); 4388 } 4389 4390 static int 4391 hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS) 4392 { 4393 struct hn_softc *sc = arg1; 4394 int pkts; 4395 4396 pkts = sc->hn_tx_ring[0].hn_agg_pktmax; 4397 return (sysctl_handle_int(oidp, &pkts, 0, req)); 4398 } 4399 4400 static int 4401 hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS) 4402 { 4403 struct hn_softc *sc = arg1; 4404 int align; 4405 4406 align = sc->hn_tx_ring[0].hn_agg_align; 4407 return (sysctl_handle_int(oidp, &align, 0, req)); 4408 } 4409 4410 static void 4411 hn_chan_polling(struct vmbus_channel *chan, u_int pollhz) 4412 { 4413 if (pollhz == 0) 4414 vmbus_chan_poll_disable(chan); 4415 else 4416 vmbus_chan_poll_enable(chan, pollhz); 4417 } 4418 4419 static void 4420 hn_polling(struct hn_softc *sc, u_int pollhz) 4421 { 4422 int nsubch = sc->hn_rx_ring_inuse - 1; 4423 4424 HN_LOCK_ASSERT(sc); 4425 4426 if (nsubch > 0) { 4427 struct vmbus_channel **subch; 4428 int i; 4429 4430 subch = vmbus_subchan_get(sc->hn_prichan, nsubch); 4431 for (i = 0; i < nsubch; ++i) 4432 hn_chan_polling(subch[i], pollhz); 4433 vmbus_subchan_rel(subch, nsubch); 4434 } 4435 hn_chan_polling(sc->hn_prichan, pollhz); 4436 } 4437 4438 static int 4439 hn_polling_sysctl(SYSCTL_HANDLER_ARGS) 4440 { 4441 struct hn_softc *sc = arg1; 4442 int pollhz, error; 4443 4444 pollhz = sc->hn_pollhz; 4445 error = sysctl_handle_int(oidp, &pollhz, 0, req); 4446 if (error || req->newptr == NULL) 4447 return (error); 4448 4449 if (pollhz != 0 && 4450 (pollhz < VMBUS_CHAN_POLLHZ_MIN || pollhz > VMBUS_CHAN_POLLHZ_MAX)) 4451 return (EINVAL); 4452 4453 HN_LOCK(sc); 4454 if (sc->hn_pollhz != pollhz) { 4455 sc->hn_pollhz = pollhz; 4456 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) && 4457 (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED)) 4458 hn_polling(sc, sc->hn_pollhz); 4459 } 4460 HN_UNLOCK(sc); 4461 4462 return (0); 4463 } 4464 4465 static int 4466 hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS) 4467 { 4468 struct hn_softc *sc = arg1; 4469 char verstr[16]; 4470 4471 snprintf(verstr, sizeof(verstr), "%u.%u", 4472 HN_NDIS_VERSION_MAJOR(sc->hn_ndis_ver), 4473 HN_NDIS_VERSION_MINOR(sc->hn_ndis_ver)); 4474 return sysctl_handle_string(oidp, verstr, sizeof(verstr), req); 4475 } 4476 4477 static int 4478 hn_caps_sysctl(SYSCTL_HANDLER_ARGS) 4479 { 4480 struct hn_softc *sc = arg1; 4481 char caps_str[128]; 4482 uint32_t caps; 4483 4484 HN_LOCK(sc); 4485 caps = sc->hn_caps; 4486 HN_UNLOCK(sc); 4487 snprintf(caps_str, sizeof(caps_str), "%b", caps, HN_CAP_BITS); 4488 return sysctl_handle_string(oidp, caps_str, sizeof(caps_str), req); 4489 } 4490 4491 static int 4492 hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS) 4493 { 4494 struct hn_softc *sc = arg1; 4495 char assist_str[128]; 4496 uint32_t hwassist; 4497 4498 HN_LOCK(sc); 4499 hwassist = sc->hn_ifp->if_hwassist; 4500 HN_UNLOCK(sc); 4501 snprintf(assist_str, sizeof(assist_str), "%b", hwassist, CSUM_BITS); 4502 return sysctl_handle_string(oidp, assist_str, sizeof(assist_str), req); 4503 } 4504 4505 static int 4506 hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS) 4507 { 4508 struct hn_softc *sc = arg1; 4509 char filter_str[128]; 4510 uint32_t filter; 4511 4512 HN_LOCK(sc); 4513 filter = sc->hn_rx_filter; 4514 HN_UNLOCK(sc); 4515 snprintf(filter_str, sizeof(filter_str), "%b", filter, 4516 NDIS_PACKET_TYPES); 4517 return sysctl_handle_string(oidp, filter_str, sizeof(filter_str), req); 4518 } 4519 4520 #ifndef RSS 4521 4522 static int 4523 hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS) 4524 { 4525 struct hn_softc *sc = arg1; 4526 int error; 4527 4528 HN_LOCK(sc); 4529 4530 error = SYSCTL_OUT(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key)); 4531 if (error || req->newptr == NULL) 4532 goto back; 4533 4534 if ((sc->hn_flags & HN_FLAG_RXVF) || 4535 (hn_xpnt_vf && sc->hn_vf_ifp != NULL)) { 4536 /* 4537 * RSS key is synchronized w/ VF's, don't allow users 4538 * to change it. 4539 */ 4540 error = EBUSY; 4541 goto back; 4542 } 4543 4544 error = SYSCTL_IN(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key)); 4545 if (error) 4546 goto back; 4547 sc->hn_flags |= HN_FLAG_HAS_RSSKEY; 4548 4549 if (sc->hn_rx_ring_inuse > 1) { 4550 error = hn_rss_reconfig(sc); 4551 } else { 4552 /* Not RSS capable, at least for now; just save the RSS key. */ 4553 error = 0; 4554 } 4555 back: 4556 HN_UNLOCK(sc); 4557 return (error); 4558 } 4559 4560 static int 4561 hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS) 4562 { 4563 struct hn_softc *sc = arg1; 4564 int error; 4565 4566 HN_LOCK(sc); 4567 4568 error = SYSCTL_OUT(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind)); 4569 if (error || req->newptr == NULL) 4570 goto back; 4571 4572 /* 4573 * Don't allow RSS indirect table change, if this interface is not 4574 * RSS capable currently. 4575 */ 4576 if (sc->hn_rx_ring_inuse == 1) { 4577 error = EOPNOTSUPP; 4578 goto back; 4579 } 4580 4581 error = SYSCTL_IN(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind)); 4582 if (error) 4583 goto back; 4584 sc->hn_flags |= HN_FLAG_HAS_RSSIND; 4585 4586 hn_rss_ind_fixup(sc); 4587 error = hn_rss_reconfig(sc); 4588 back: 4589 HN_UNLOCK(sc); 4590 return (error); 4591 } 4592 4593 #endif /* !RSS */ 4594 4595 static int 4596 hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS) 4597 { 4598 struct hn_softc *sc = arg1; 4599 char hash_str[128]; 4600 uint32_t hash; 4601 4602 HN_LOCK(sc); 4603 hash = sc->hn_rss_hash; 4604 HN_UNLOCK(sc); 4605 snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS); 4606 return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req); 4607 } 4608 4609 static int 4610 hn_rss_hcap_sysctl(SYSCTL_HANDLER_ARGS) 4611 { 4612 struct hn_softc *sc = arg1; 4613 char hash_str[128]; 4614 uint32_t hash; 4615 4616 HN_LOCK(sc); 4617 hash = sc->hn_rss_hcap; 4618 HN_UNLOCK(sc); 4619 snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS); 4620 return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req); 4621 } 4622 4623 static int 4624 hn_rss_mbuf_sysctl(SYSCTL_HANDLER_ARGS) 4625 { 4626 struct hn_softc *sc = arg1; 4627 char hash_str[128]; 4628 uint32_t hash; 4629 4630 HN_LOCK(sc); 4631 hash = sc->hn_rx_ring[0].hn_mbuf_hash; 4632 HN_UNLOCK(sc); 4633 snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS); 4634 return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req); 4635 } 4636 4637 static int 4638 hn_vf_sysctl(SYSCTL_HANDLER_ARGS) 4639 { 4640 struct hn_softc *sc = arg1; 4641 char vf_name[IFNAMSIZ + 1]; 4642 struct ifnet *vf_ifp; 4643 4644 HN_LOCK(sc); 4645 vf_name[0] = '\0'; 4646 vf_ifp = sc->hn_vf_ifp; 4647 if (vf_ifp != NULL) 4648 snprintf(vf_name, sizeof(vf_name), "%s", vf_ifp->if_xname); 4649 HN_UNLOCK(sc); 4650 return sysctl_handle_string(oidp, vf_name, sizeof(vf_name), req); 4651 } 4652 4653 static int 4654 hn_rxvf_sysctl(SYSCTL_HANDLER_ARGS) 4655 { 4656 struct hn_softc *sc = arg1; 4657 char vf_name[IFNAMSIZ + 1]; 4658 struct ifnet *vf_ifp; 4659 4660 HN_LOCK(sc); 4661 vf_name[0] = '\0'; 4662 vf_ifp = sc->hn_rx_ring[0].hn_rxvf_ifp; 4663 if (vf_ifp != NULL) 4664 snprintf(vf_name, sizeof(vf_name), "%s", vf_ifp->if_xname); 4665 HN_UNLOCK(sc); 4666 return sysctl_handle_string(oidp, vf_name, sizeof(vf_name), req); 4667 } 4668 4669 static int 4670 hn_vflist_sysctl(SYSCTL_HANDLER_ARGS) 4671 { 4672 struct rm_priotracker pt; 4673 struct sbuf *sb; 4674 int error, i; 4675 bool first; 4676 4677 error = sysctl_wire_old_buffer(req, 0); 4678 if (error != 0) 4679 return (error); 4680 4681 sb = sbuf_new_for_sysctl(NULL, NULL, 128, req); 4682 if (sb == NULL) 4683 return (ENOMEM); 4684 4685 rm_rlock(&hn_vfmap_lock, &pt); 4686 4687 first = true; 4688 for (i = 0; i < hn_vfmap_size; ++i) { 4689 struct ifnet *ifp; 4690 4691 if (hn_vfmap[i] == NULL) 4692 continue; 4693 4694 ifp = ifnet_byindex(i); 4695 if (ifp != NULL) { 4696 if (first) 4697 sbuf_printf(sb, "%s", ifp->if_xname); 4698 else 4699 sbuf_printf(sb, " %s", ifp->if_xname); 4700 first = false; 4701 } 4702 } 4703 4704 rm_runlock(&hn_vfmap_lock, &pt); 4705 4706 error = sbuf_finish(sb); 4707 sbuf_delete(sb); 4708 return (error); 4709 } 4710 4711 static int 4712 hn_vfmap_sysctl(SYSCTL_HANDLER_ARGS) 4713 { 4714 struct rm_priotracker pt; 4715 struct sbuf *sb; 4716 int error, i; 4717 bool first; 4718 4719 error = sysctl_wire_old_buffer(req, 0); 4720 if (error != 0) 4721 return (error); 4722 4723 sb = sbuf_new_for_sysctl(NULL, NULL, 128, req); 4724 if (sb == NULL) 4725 return (ENOMEM); 4726 4727 rm_rlock(&hn_vfmap_lock, &pt); 4728 4729 first = true; 4730 for (i = 0; i < hn_vfmap_size; ++i) { 4731 struct ifnet *ifp, *hn_ifp; 4732 4733 hn_ifp = hn_vfmap[i]; 4734 if (hn_ifp == NULL) 4735 continue; 4736 4737 ifp = ifnet_byindex(i); 4738 if (ifp != NULL) { 4739 if (first) { 4740 sbuf_printf(sb, "%s:%s", ifp->if_xname, 4741 hn_ifp->if_xname); 4742 } else { 4743 sbuf_printf(sb, " %s:%s", ifp->if_xname, 4744 hn_ifp->if_xname); 4745 } 4746 first = false; 4747 } 4748 } 4749 4750 rm_runlock(&hn_vfmap_lock, &pt); 4751 4752 error = sbuf_finish(sb); 4753 sbuf_delete(sb); 4754 return (error); 4755 } 4756 4757 static int 4758 hn_xpnt_vf_accbpf_sysctl(SYSCTL_HANDLER_ARGS) 4759 { 4760 struct hn_softc *sc = arg1; 4761 int error, onoff = 0; 4762 4763 if (sc->hn_xvf_flags & HN_XVFFLAG_ACCBPF) 4764 onoff = 1; 4765 error = sysctl_handle_int(oidp, &onoff, 0, req); 4766 if (error || req->newptr == NULL) 4767 return (error); 4768 4769 HN_LOCK(sc); 4770 /* NOTE: hn_vf_lock for hn_transmit() */ 4771 rm_wlock(&sc->hn_vf_lock); 4772 if (onoff) 4773 sc->hn_xvf_flags |= HN_XVFFLAG_ACCBPF; 4774 else 4775 sc->hn_xvf_flags &= ~HN_XVFFLAG_ACCBPF; 4776 rm_wunlock(&sc->hn_vf_lock); 4777 HN_UNLOCK(sc); 4778 4779 return (0); 4780 } 4781 4782 static int 4783 hn_xpnt_vf_enabled_sysctl(SYSCTL_HANDLER_ARGS) 4784 { 4785 struct hn_softc *sc = arg1; 4786 int enabled = 0; 4787 4788 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) 4789 enabled = 1; 4790 return (sysctl_handle_int(oidp, &enabled, 0, req)); 4791 } 4792 4793 static int 4794 hn_check_iplen(const struct mbuf *m, int hoff) 4795 { 4796 const struct ip *ip; 4797 int len, iphlen, iplen; 4798 const struct tcphdr *th; 4799 int thoff; /* TCP data offset */ 4800 4801 len = hoff + sizeof(struct ip); 4802 4803 /* The packet must be at least the size of an IP header. */ 4804 if (m->m_pkthdr.len < len) 4805 return IPPROTO_DONE; 4806 4807 /* The fixed IP header must reside completely in the first mbuf. */ 4808 if (m->m_len < len) 4809 return IPPROTO_DONE; 4810 4811 ip = mtodo(m, hoff); 4812 4813 /* Bound check the packet's stated IP header length. */ 4814 iphlen = ip->ip_hl << 2; 4815 if (iphlen < sizeof(struct ip)) /* minimum header length */ 4816 return IPPROTO_DONE; 4817 4818 /* The full IP header must reside completely in the one mbuf. */ 4819 if (m->m_len < hoff + iphlen) 4820 return IPPROTO_DONE; 4821 4822 iplen = ntohs(ip->ip_len); 4823 4824 /* 4825 * Check that the amount of data in the buffers is as 4826 * at least much as the IP header would have us expect. 4827 */ 4828 if (m->m_pkthdr.len < hoff + iplen) 4829 return IPPROTO_DONE; 4830 4831 /* 4832 * Ignore IP fragments. 4833 */ 4834 if (ntohs(ip->ip_off) & (IP_OFFMASK | IP_MF)) 4835 return IPPROTO_DONE; 4836 4837 /* 4838 * The TCP/IP or UDP/IP header must be entirely contained within 4839 * the first fragment of a packet. 4840 */ 4841 switch (ip->ip_p) { 4842 case IPPROTO_TCP: 4843 if (iplen < iphlen + sizeof(struct tcphdr)) 4844 return IPPROTO_DONE; 4845 if (m->m_len < hoff + iphlen + sizeof(struct tcphdr)) 4846 return IPPROTO_DONE; 4847 th = (const struct tcphdr *)((const uint8_t *)ip + iphlen); 4848 thoff = th->th_off << 2; 4849 if (thoff < sizeof(struct tcphdr) || thoff + iphlen > iplen) 4850 return IPPROTO_DONE; 4851 if (m->m_len < hoff + iphlen + thoff) 4852 return IPPROTO_DONE; 4853 break; 4854 case IPPROTO_UDP: 4855 if (iplen < iphlen + sizeof(struct udphdr)) 4856 return IPPROTO_DONE; 4857 if (m->m_len < hoff + iphlen + sizeof(struct udphdr)) 4858 return IPPROTO_DONE; 4859 break; 4860 default: 4861 if (iplen < iphlen) 4862 return IPPROTO_DONE; 4863 break; 4864 } 4865 return ip->ip_p; 4866 } 4867 4868 static void 4869 hn_rxpkt_proto(const struct mbuf *m_new, int *l3proto, int *l4proto) 4870 { 4871 const struct ether_header *eh; 4872 uint16_t etype; 4873 int hoff; 4874 4875 hoff = sizeof(*eh); 4876 /* Checked at the beginning of this function. */ 4877 KASSERT(m_new->m_len >= hoff, ("not ethernet frame")); 4878 4879 eh = mtod(m_new, const struct ether_header *); 4880 etype = ntohs(eh->ether_type); 4881 if (etype == ETHERTYPE_VLAN) { 4882 const struct ether_vlan_header *evl; 4883 4884 hoff = sizeof(*evl); 4885 if (m_new->m_len < hoff) 4886 return; 4887 evl = mtod(m_new, const struct ether_vlan_header *); 4888 etype = ntohs(evl->evl_proto); 4889 } 4890 *l3proto = etype; 4891 4892 if (etype == ETHERTYPE_IP) 4893 *l4proto = hn_check_iplen(m_new, hoff); 4894 else 4895 *l4proto = IPPROTO_DONE; 4896 } 4897 4898 static int 4899 hn_create_rx_data(struct hn_softc *sc, int ring_cnt) 4900 { 4901 struct sysctl_oid_list *child; 4902 struct sysctl_ctx_list *ctx; 4903 device_t dev = sc->hn_dev; 4904 #if defined(INET) || defined(INET6) 4905 #if __FreeBSD_version >= 1100095 4906 int lroent_cnt; 4907 #endif 4908 #endif 4909 int i; 4910 4911 /* 4912 * Create RXBUF for reception. 4913 * 4914 * NOTE: 4915 * - It is shared by all channels. 4916 * - A large enough buffer is allocated, certain version of NVSes 4917 * may further limit the usable space. 4918 */ 4919 sc->hn_rxbuf = hyperv_dmamem_alloc(bus_get_dma_tag(dev), 4920 PAGE_SIZE, 0, HN_RXBUF_SIZE, &sc->hn_rxbuf_dma, 4921 BUS_DMA_WAITOK | BUS_DMA_ZERO); 4922 if (sc->hn_rxbuf == NULL) { 4923 device_printf(sc->hn_dev, "allocate rxbuf failed\n"); 4924 return (ENOMEM); 4925 } 4926 4927 sc->hn_rx_ring_cnt = ring_cnt; 4928 sc->hn_rx_ring_inuse = sc->hn_rx_ring_cnt; 4929 4930 sc->hn_rx_ring = malloc(sizeof(struct hn_rx_ring) * sc->hn_rx_ring_cnt, 4931 M_DEVBUF, M_WAITOK | M_ZERO); 4932 4933 #if defined(INET) || defined(INET6) 4934 #if __FreeBSD_version >= 1100095 4935 lroent_cnt = hn_lro_entry_count; 4936 if (lroent_cnt < TCP_LRO_ENTRIES) 4937 lroent_cnt = TCP_LRO_ENTRIES; 4938 if (bootverbose) 4939 device_printf(dev, "LRO: entry count %d\n", lroent_cnt); 4940 #endif 4941 #endif /* INET || INET6 */ 4942 4943 ctx = device_get_sysctl_ctx(dev); 4944 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); 4945 4946 /* Create dev.hn.UNIT.rx sysctl tree */ 4947 sc->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rx", 4948 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 4949 4950 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4951 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 4952 4953 rxr->hn_br = hyperv_dmamem_alloc(bus_get_dma_tag(dev), 4954 PAGE_SIZE, 0, HN_TXBR_SIZE + HN_RXBR_SIZE, 4955 &rxr->hn_br_dma, BUS_DMA_WAITOK); 4956 if (rxr->hn_br == NULL) { 4957 device_printf(dev, "allocate bufring failed\n"); 4958 return (ENOMEM); 4959 } 4960 4961 if (hn_trust_hosttcp) 4962 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_TCP; 4963 if (hn_trust_hostudp) 4964 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_UDP; 4965 if (hn_trust_hostip) 4966 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_IP; 4967 rxr->hn_mbuf_hash = NDIS_HASH_ALL; 4968 rxr->hn_ifp = sc->hn_ifp; 4969 if (i < sc->hn_tx_ring_cnt) 4970 rxr->hn_txr = &sc->hn_tx_ring[i]; 4971 rxr->hn_pktbuf_len = HN_PKTBUF_LEN_DEF; 4972 rxr->hn_pktbuf = malloc(rxr->hn_pktbuf_len, M_DEVBUF, M_WAITOK); 4973 rxr->hn_rx_idx = i; 4974 rxr->hn_rxbuf = sc->hn_rxbuf; 4975 4976 /* 4977 * Initialize LRO. 4978 */ 4979 #if defined(INET) || defined(INET6) 4980 #if __FreeBSD_version >= 1100095 4981 tcp_lro_init_args(&rxr->hn_lro, sc->hn_ifp, lroent_cnt, 4982 hn_lro_mbufq_depth); 4983 #else 4984 tcp_lro_init(&rxr->hn_lro); 4985 rxr->hn_lro.ifp = sc->hn_ifp; 4986 #endif 4987 #if __FreeBSD_version >= 1100099 4988 rxr->hn_lro.lro_length_lim = HN_LRO_LENLIM_DEF; 4989 rxr->hn_lro.lro_ackcnt_lim = HN_LRO_ACKCNT_DEF; 4990 #endif 4991 #endif /* INET || INET6 */ 4992 4993 if (sc->hn_rx_sysctl_tree != NULL) { 4994 char name[16]; 4995 4996 /* 4997 * Create per RX ring sysctl tree: 4998 * dev.hn.UNIT.rx.RINGID 4999 */ 5000 snprintf(name, sizeof(name), "%d", i); 5001 rxr->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, 5002 SYSCTL_CHILDREN(sc->hn_rx_sysctl_tree), 5003 OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 5004 5005 if (rxr->hn_rx_sysctl_tree != NULL) { 5006 SYSCTL_ADD_ULONG(ctx, 5007 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), 5008 OID_AUTO, "packets", CTLFLAG_RW, 5009 &rxr->hn_pkts, "# of packets received"); 5010 SYSCTL_ADD_ULONG(ctx, 5011 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), 5012 OID_AUTO, "rss_pkts", CTLFLAG_RW, 5013 &rxr->hn_rss_pkts, 5014 "# of packets w/ RSS info received"); 5015 SYSCTL_ADD_INT(ctx, 5016 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), 5017 OID_AUTO, "pktbuf_len", CTLFLAG_RD, 5018 &rxr->hn_pktbuf_len, 0, 5019 "Temporary channel packet buffer length"); 5020 } 5021 } 5022 } 5023 5024 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_queued", 5025 CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5026 __offsetof(struct hn_rx_ring, hn_lro.lro_queued), 5027 #if __FreeBSD_version < 1100095 5028 hn_rx_stat_int_sysctl, 5029 #else 5030 hn_rx_stat_u64_sysctl, 5031 #endif 5032 "LU", "LRO queued"); 5033 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_flushed", 5034 CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5035 __offsetof(struct hn_rx_ring, hn_lro.lro_flushed), 5036 #if __FreeBSD_version < 1100095 5037 hn_rx_stat_int_sysctl, 5038 #else 5039 hn_rx_stat_u64_sysctl, 5040 #endif 5041 "LU", "LRO flushed"); 5042 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_tried", 5043 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5044 __offsetof(struct hn_rx_ring, hn_lro_tried), 5045 hn_rx_stat_ulong_sysctl, "LU", "# of LRO tries"); 5046 #if __FreeBSD_version >= 1100099 5047 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_length_lim", 5048 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 5049 hn_lro_lenlim_sysctl, "IU", 5050 "Max # of data bytes to be aggregated by LRO"); 5051 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_ackcnt_lim", 5052 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 5053 hn_lro_ackcnt_sysctl, "I", 5054 "Max # of ACKs to be aggregated by LRO"); 5055 #endif 5056 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hosttcp", 5057 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_TCP, 5058 hn_trust_hcsum_sysctl, "I", 5059 "Trust tcp segement verification on host side, " 5060 "when csum info is missing"); 5061 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostudp", 5062 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_UDP, 5063 hn_trust_hcsum_sysctl, "I", 5064 "Trust udp datagram verification on host side, " 5065 "when csum info is missing"); 5066 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostip", 5067 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_IP, 5068 hn_trust_hcsum_sysctl, "I", 5069 "Trust ip packet verification on host side, " 5070 "when csum info is missing"); 5071 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_ip", 5072 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5073 __offsetof(struct hn_rx_ring, hn_csum_ip), 5074 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM IP"); 5075 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_tcp", 5076 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5077 __offsetof(struct hn_rx_ring, hn_csum_tcp), 5078 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM TCP"); 5079 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_udp", 5080 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5081 __offsetof(struct hn_rx_ring, hn_csum_udp), 5082 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM UDP"); 5083 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_trusted", 5084 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5085 __offsetof(struct hn_rx_ring, hn_csum_trusted), 5086 hn_rx_stat_ulong_sysctl, "LU", 5087 "# of packets that we trust host's csum verification"); 5088 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "small_pkts", 5089 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5090 __offsetof(struct hn_rx_ring, hn_small_pkts), 5091 hn_rx_stat_ulong_sysctl, "LU", "# of small packets received"); 5092 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_ack_failed", 5093 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5094 __offsetof(struct hn_rx_ring, hn_ack_failed), 5095 hn_rx_stat_ulong_sysctl, "LU", "# of RXBUF ack failures"); 5096 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_cnt", 5097 CTLFLAG_RD, &sc->hn_rx_ring_cnt, 0, "# created RX rings"); 5098 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_inuse", 5099 CTLFLAG_RD, &sc->hn_rx_ring_inuse, 0, "# used RX rings"); 5100 5101 return (0); 5102 } 5103 5104 static void 5105 hn_destroy_rx_data(struct hn_softc *sc) 5106 { 5107 int i; 5108 5109 if (sc->hn_rxbuf != NULL) { 5110 if ((sc->hn_flags & HN_FLAG_RXBUF_REF) == 0) 5111 hyperv_dmamem_free(&sc->hn_rxbuf_dma, sc->hn_rxbuf); 5112 else 5113 device_printf(sc->hn_dev, "RXBUF is referenced\n"); 5114 sc->hn_rxbuf = NULL; 5115 } 5116 5117 if (sc->hn_rx_ring_cnt == 0) 5118 return; 5119 5120 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 5121 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 5122 5123 if (rxr->hn_br == NULL) 5124 continue; 5125 if ((rxr->hn_rx_flags & HN_RX_FLAG_BR_REF) == 0) { 5126 hyperv_dmamem_free(&rxr->hn_br_dma, rxr->hn_br); 5127 } else { 5128 device_printf(sc->hn_dev, 5129 "%dth channel bufring is referenced", i); 5130 } 5131 rxr->hn_br = NULL; 5132 5133 #if defined(INET) || defined(INET6) 5134 tcp_lro_free(&rxr->hn_lro); 5135 #endif 5136 free(rxr->hn_pktbuf, M_DEVBUF); 5137 } 5138 free(sc->hn_rx_ring, M_DEVBUF); 5139 sc->hn_rx_ring = NULL; 5140 5141 sc->hn_rx_ring_cnt = 0; 5142 sc->hn_rx_ring_inuse = 0; 5143 } 5144 5145 static int 5146 hn_tx_ring_create(struct hn_softc *sc, int id) 5147 { 5148 struct hn_tx_ring *txr = &sc->hn_tx_ring[id]; 5149 device_t dev = sc->hn_dev; 5150 bus_dma_tag_t parent_dtag; 5151 int error, i; 5152 5153 txr->hn_sc = sc; 5154 txr->hn_tx_idx = id; 5155 5156 #ifndef HN_USE_TXDESC_BUFRING 5157 mtx_init(&txr->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN); 5158 #endif 5159 mtx_init(&txr->hn_tx_lock, "hn tx", NULL, MTX_DEF); 5160 5161 txr->hn_txdesc_cnt = HN_TX_DESC_CNT; 5162 txr->hn_txdesc = malloc(sizeof(struct hn_txdesc) * txr->hn_txdesc_cnt, 5163 M_DEVBUF, M_WAITOK | M_ZERO); 5164 #ifndef HN_USE_TXDESC_BUFRING 5165 SLIST_INIT(&txr->hn_txlist); 5166 #else 5167 txr->hn_txdesc_br = buf_ring_alloc(txr->hn_txdesc_cnt, M_DEVBUF, 5168 M_WAITOK, &txr->hn_tx_lock); 5169 #endif 5170 5171 if (hn_tx_taskq_mode == HN_TX_TASKQ_M_EVTTQ) { 5172 txr->hn_tx_taskq = VMBUS_GET_EVENT_TASKQ( 5173 device_get_parent(dev), dev, HN_RING_IDX2CPU(sc, id)); 5174 } else { 5175 txr->hn_tx_taskq = sc->hn_tx_taskqs[id % hn_tx_taskq_cnt]; 5176 } 5177 5178 #ifdef HN_IFSTART_SUPPORT 5179 if (hn_use_if_start) { 5180 txr->hn_txeof = hn_start_txeof; 5181 TASK_INIT(&txr->hn_tx_task, 0, hn_start_taskfunc, txr); 5182 TASK_INIT(&txr->hn_txeof_task, 0, hn_start_txeof_taskfunc, txr); 5183 } else 5184 #endif 5185 { 5186 int br_depth; 5187 5188 txr->hn_txeof = hn_xmit_txeof; 5189 TASK_INIT(&txr->hn_tx_task, 0, hn_xmit_taskfunc, txr); 5190 TASK_INIT(&txr->hn_txeof_task, 0, hn_xmit_txeof_taskfunc, txr); 5191 5192 br_depth = hn_get_txswq_depth(txr); 5193 txr->hn_mbuf_br = buf_ring_alloc(br_depth, M_DEVBUF, 5194 M_WAITOK, &txr->hn_tx_lock); 5195 } 5196 5197 txr->hn_direct_tx_size = hn_direct_tx_size; 5198 5199 /* 5200 * Always schedule transmission instead of trying to do direct 5201 * transmission. This one gives the best performance so far. 5202 */ 5203 txr->hn_sched_tx = 1; 5204 5205 parent_dtag = bus_get_dma_tag(dev); 5206 5207 /* DMA tag for RNDIS packet messages. */ 5208 error = bus_dma_tag_create(parent_dtag, /* parent */ 5209 HN_RNDIS_PKT_ALIGN, /* alignment */ 5210 HN_RNDIS_PKT_BOUNDARY, /* boundary */ 5211 BUS_SPACE_MAXADDR, /* lowaddr */ 5212 BUS_SPACE_MAXADDR, /* highaddr */ 5213 NULL, NULL, /* filter, filterarg */ 5214 HN_RNDIS_PKT_LEN, /* maxsize */ 5215 1, /* nsegments */ 5216 HN_RNDIS_PKT_LEN, /* maxsegsize */ 5217 0, /* flags */ 5218 NULL, /* lockfunc */ 5219 NULL, /* lockfuncarg */ 5220 &txr->hn_tx_rndis_dtag); 5221 if (error) { 5222 device_printf(dev, "failed to create rndis dmatag\n"); 5223 return error; 5224 } 5225 5226 /* DMA tag for data. */ 5227 error = bus_dma_tag_create(parent_dtag, /* parent */ 5228 1, /* alignment */ 5229 HN_TX_DATA_BOUNDARY, /* boundary */ 5230 BUS_SPACE_MAXADDR, /* lowaddr */ 5231 BUS_SPACE_MAXADDR, /* highaddr */ 5232 NULL, NULL, /* filter, filterarg */ 5233 HN_TX_DATA_MAXSIZE, /* maxsize */ 5234 HN_TX_DATA_SEGCNT_MAX, /* nsegments */ 5235 HN_TX_DATA_SEGSIZE, /* maxsegsize */ 5236 0, /* flags */ 5237 NULL, /* lockfunc */ 5238 NULL, /* lockfuncarg */ 5239 &txr->hn_tx_data_dtag); 5240 if (error) { 5241 device_printf(dev, "failed to create data dmatag\n"); 5242 return error; 5243 } 5244 5245 for (i = 0; i < txr->hn_txdesc_cnt; ++i) { 5246 struct hn_txdesc *txd = &txr->hn_txdesc[i]; 5247 5248 txd->txr = txr; 5249 txd->chim_index = HN_NVS_CHIM_IDX_INVALID; 5250 STAILQ_INIT(&txd->agg_list); 5251 5252 /* 5253 * Allocate and load RNDIS packet message. 5254 */ 5255 error = bus_dmamem_alloc(txr->hn_tx_rndis_dtag, 5256 (void **)&txd->rndis_pkt, 5257 BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, 5258 &txd->rndis_pkt_dmap); 5259 if (error) { 5260 device_printf(dev, 5261 "failed to allocate rndis_packet_msg, %d\n", i); 5262 return error; 5263 } 5264 5265 error = bus_dmamap_load(txr->hn_tx_rndis_dtag, 5266 txd->rndis_pkt_dmap, 5267 txd->rndis_pkt, HN_RNDIS_PKT_LEN, 5268 hyperv_dma_map_paddr, &txd->rndis_pkt_paddr, 5269 BUS_DMA_NOWAIT); 5270 if (error) { 5271 device_printf(dev, 5272 "failed to load rndis_packet_msg, %d\n", i); 5273 bus_dmamem_free(txr->hn_tx_rndis_dtag, 5274 txd->rndis_pkt, txd->rndis_pkt_dmap); 5275 return error; 5276 } 5277 5278 /* DMA map for TX data. */ 5279 error = bus_dmamap_create(txr->hn_tx_data_dtag, 0, 5280 &txd->data_dmap); 5281 if (error) { 5282 device_printf(dev, 5283 "failed to allocate tx data dmamap\n"); 5284 bus_dmamap_unload(txr->hn_tx_rndis_dtag, 5285 txd->rndis_pkt_dmap); 5286 bus_dmamem_free(txr->hn_tx_rndis_dtag, 5287 txd->rndis_pkt, txd->rndis_pkt_dmap); 5288 return error; 5289 } 5290 5291 /* All set, put it to list */ 5292 txd->flags |= HN_TXD_FLAG_ONLIST; 5293 #ifndef HN_USE_TXDESC_BUFRING 5294 SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link); 5295 #else 5296 buf_ring_enqueue(txr->hn_txdesc_br, txd); 5297 #endif 5298 } 5299 txr->hn_txdesc_avail = txr->hn_txdesc_cnt; 5300 5301 if (sc->hn_tx_sysctl_tree != NULL) { 5302 struct sysctl_oid_list *child; 5303 struct sysctl_ctx_list *ctx; 5304 char name[16]; 5305 5306 /* 5307 * Create per TX ring sysctl tree: 5308 * dev.hn.UNIT.tx.RINGID 5309 */ 5310 ctx = device_get_sysctl_ctx(dev); 5311 child = SYSCTL_CHILDREN(sc->hn_tx_sysctl_tree); 5312 5313 snprintf(name, sizeof(name), "%d", id); 5314 txr->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, 5315 name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 5316 5317 if (txr->hn_tx_sysctl_tree != NULL) { 5318 child = SYSCTL_CHILDREN(txr->hn_tx_sysctl_tree); 5319 5320 #ifdef HN_DEBUG 5321 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail", 5322 CTLFLAG_RD, &txr->hn_txdesc_avail, 0, 5323 "# of available TX descs"); 5324 #endif 5325 #ifdef HN_IFSTART_SUPPORT 5326 if (!hn_use_if_start) 5327 #endif 5328 { 5329 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "oactive", 5330 CTLFLAG_RD, &txr->hn_oactive, 0, 5331 "over active"); 5332 } 5333 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "packets", 5334 CTLFLAG_RW, &txr->hn_pkts, 5335 "# of packets transmitted"); 5336 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "sends", 5337 CTLFLAG_RW, &txr->hn_sends, "# of sends"); 5338 } 5339 } 5340 5341 return 0; 5342 } 5343 5344 static void 5345 hn_txdesc_dmamap_destroy(struct hn_txdesc *txd) 5346 { 5347 struct hn_tx_ring *txr = txd->txr; 5348 5349 KASSERT(txd->m == NULL, ("still has mbuf installed")); 5350 KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("still dma mapped")); 5351 5352 bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_pkt_dmap); 5353 bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_pkt, 5354 txd->rndis_pkt_dmap); 5355 bus_dmamap_destroy(txr->hn_tx_data_dtag, txd->data_dmap); 5356 } 5357 5358 static void 5359 hn_txdesc_gc(struct hn_tx_ring *txr, struct hn_txdesc *txd) 5360 { 5361 5362 KASSERT(txd->refs == 0 || txd->refs == 1, 5363 ("invalid txd refs %d", txd->refs)); 5364 5365 /* Aggregated txds will be freed by their aggregating txd. */ 5366 if (txd->refs > 0 && (txd->flags & HN_TXD_FLAG_ONAGG) == 0) { 5367 int freed; 5368 5369 freed = hn_txdesc_put(txr, txd); 5370 KASSERT(freed, ("can't free txdesc")); 5371 } 5372 } 5373 5374 static void 5375 hn_tx_ring_destroy(struct hn_tx_ring *txr) 5376 { 5377 int i; 5378 5379 if (txr->hn_txdesc == NULL) 5380 return; 5381 5382 /* 5383 * NOTE: 5384 * Because the freeing of aggregated txds will be deferred 5385 * to the aggregating txd, two passes are used here: 5386 * - The first pass GCes any pending txds. This GC is necessary, 5387 * since if the channels are revoked, hypervisor will not 5388 * deliver send-done for all pending txds. 5389 * - The second pass frees the busdma stuffs, i.e. after all txds 5390 * were freed. 5391 */ 5392 for (i = 0; i < txr->hn_txdesc_cnt; ++i) 5393 hn_txdesc_gc(txr, &txr->hn_txdesc[i]); 5394 for (i = 0; i < txr->hn_txdesc_cnt; ++i) 5395 hn_txdesc_dmamap_destroy(&txr->hn_txdesc[i]); 5396 5397 if (txr->hn_tx_data_dtag != NULL) 5398 bus_dma_tag_destroy(txr->hn_tx_data_dtag); 5399 if (txr->hn_tx_rndis_dtag != NULL) 5400 bus_dma_tag_destroy(txr->hn_tx_rndis_dtag); 5401 5402 #ifdef HN_USE_TXDESC_BUFRING 5403 buf_ring_free(txr->hn_txdesc_br, M_DEVBUF); 5404 #endif 5405 5406 free(txr->hn_txdesc, M_DEVBUF); 5407 txr->hn_txdesc = NULL; 5408 5409 if (txr->hn_mbuf_br != NULL) 5410 buf_ring_free(txr->hn_mbuf_br, M_DEVBUF); 5411 5412 #ifndef HN_USE_TXDESC_BUFRING 5413 mtx_destroy(&txr->hn_txlist_spin); 5414 #endif 5415 mtx_destroy(&txr->hn_tx_lock); 5416 } 5417 5418 static int 5419 hn_create_tx_data(struct hn_softc *sc, int ring_cnt) 5420 { 5421 struct sysctl_oid_list *child; 5422 struct sysctl_ctx_list *ctx; 5423 int i; 5424 5425 /* 5426 * Create TXBUF for chimney sending. 5427 * 5428 * NOTE: It is shared by all channels. 5429 */ 5430 sc->hn_chim = hyperv_dmamem_alloc(bus_get_dma_tag(sc->hn_dev), 5431 PAGE_SIZE, 0, HN_CHIM_SIZE, &sc->hn_chim_dma, 5432 BUS_DMA_WAITOK | BUS_DMA_ZERO); 5433 if (sc->hn_chim == NULL) { 5434 device_printf(sc->hn_dev, "allocate txbuf failed\n"); 5435 return (ENOMEM); 5436 } 5437 5438 sc->hn_tx_ring_cnt = ring_cnt; 5439 sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt; 5440 5441 sc->hn_tx_ring = malloc(sizeof(struct hn_tx_ring) * sc->hn_tx_ring_cnt, 5442 M_DEVBUF, M_WAITOK | M_ZERO); 5443 5444 ctx = device_get_sysctl_ctx(sc->hn_dev); 5445 child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->hn_dev)); 5446 5447 /* Create dev.hn.UNIT.tx sysctl tree */ 5448 sc->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "tx", 5449 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 5450 5451 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 5452 int error; 5453 5454 error = hn_tx_ring_create(sc, i); 5455 if (error) 5456 return error; 5457 } 5458 5459 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "no_txdescs", 5460 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5461 __offsetof(struct hn_tx_ring, hn_no_txdescs), 5462 hn_tx_stat_ulong_sysctl, "LU", "# of times short of TX descs"); 5463 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "send_failed", 5464 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5465 __offsetof(struct hn_tx_ring, hn_send_failed), 5466 hn_tx_stat_ulong_sysctl, "LU", "# of hyper-v sending failure"); 5467 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txdma_failed", 5468 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5469 __offsetof(struct hn_tx_ring, hn_txdma_failed), 5470 hn_tx_stat_ulong_sysctl, "LU", "# of TX DMA failure"); 5471 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_flush_failed", 5472 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5473 __offsetof(struct hn_tx_ring, hn_flush_failed), 5474 hn_tx_stat_ulong_sysctl, "LU", 5475 "# of packet transmission aggregation flush failure"); 5476 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_collapsed", 5477 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5478 __offsetof(struct hn_tx_ring, hn_tx_collapsed), 5479 hn_tx_stat_ulong_sysctl, "LU", "# of TX mbuf collapsed"); 5480 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney", 5481 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5482 __offsetof(struct hn_tx_ring, hn_tx_chimney), 5483 hn_tx_stat_ulong_sysctl, "LU", "# of chimney send"); 5484 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_tried", 5485 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5486 __offsetof(struct hn_tx_ring, hn_tx_chimney_tried), 5487 hn_tx_stat_ulong_sysctl, "LU", "# of chimney send tries"); 5488 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt", 5489 CTLFLAG_RD, &sc->hn_tx_ring[0].hn_txdesc_cnt, 0, 5490 "# of total TX descs"); 5491 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max", 5492 CTLFLAG_RD, &sc->hn_chim_szmax, 0, 5493 "Chimney send packet size upper boundary"); 5494 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size", 5495 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 5496 hn_chim_size_sysctl, "I", "Chimney send packet size limit"); 5497 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "direct_tx_size", 5498 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5499 __offsetof(struct hn_tx_ring, hn_direct_tx_size), 5500 hn_tx_conf_int_sysctl, "I", 5501 "Size of the packet for direct transmission"); 5502 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "sched_tx", 5503 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5504 __offsetof(struct hn_tx_ring, hn_sched_tx), 5505 hn_tx_conf_int_sysctl, "I", 5506 "Always schedule transmission " 5507 "instead of doing direct transmission"); 5508 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_cnt", 5509 CTLFLAG_RD, &sc->hn_tx_ring_cnt, 0, "# created TX rings"); 5510 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_inuse", 5511 CTLFLAG_RD, &sc->hn_tx_ring_inuse, 0, "# used TX rings"); 5512 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "agg_szmax", 5513 CTLFLAG_RD, &sc->hn_tx_ring[0].hn_agg_szmax, 0, 5514 "Applied packet transmission aggregation size"); 5515 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pktmax", 5516 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 5517 hn_txagg_pktmax_sysctl, "I", 5518 "Applied packet transmission aggregation packets"); 5519 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_align", 5520 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 5521 hn_txagg_align_sysctl, "I", 5522 "Applied packet transmission aggregation alignment"); 5523 5524 return 0; 5525 } 5526 5527 static void 5528 hn_set_chim_size(struct hn_softc *sc, int chim_size) 5529 { 5530 int i; 5531 5532 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 5533 sc->hn_tx_ring[i].hn_chim_size = chim_size; 5534 } 5535 5536 static void 5537 hn_set_tso_maxsize(struct hn_softc *sc, int tso_maxlen, int mtu) 5538 { 5539 struct ifnet *ifp = sc->hn_ifp; 5540 u_int hw_tsomax; 5541 int tso_minlen; 5542 5543 HN_LOCK_ASSERT(sc); 5544 5545 if ((ifp->if_capabilities & (IFCAP_TSO4 | IFCAP_TSO6)) == 0) 5546 return; 5547 5548 KASSERT(sc->hn_ndis_tso_sgmin >= 2, 5549 ("invalid NDIS tso sgmin %d", sc->hn_ndis_tso_sgmin)); 5550 tso_minlen = sc->hn_ndis_tso_sgmin * mtu; 5551 5552 KASSERT(sc->hn_ndis_tso_szmax >= tso_minlen && 5553 sc->hn_ndis_tso_szmax <= IP_MAXPACKET, 5554 ("invalid NDIS tso szmax %d", sc->hn_ndis_tso_szmax)); 5555 5556 if (tso_maxlen < tso_minlen) 5557 tso_maxlen = tso_minlen; 5558 else if (tso_maxlen > IP_MAXPACKET) 5559 tso_maxlen = IP_MAXPACKET; 5560 if (tso_maxlen > sc->hn_ndis_tso_szmax) 5561 tso_maxlen = sc->hn_ndis_tso_szmax; 5562 hw_tsomax = tso_maxlen - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); 5563 5564 if (hn_xpnt_vf_isready(sc)) { 5565 if (hw_tsomax > sc->hn_vf_ifp->if_hw_tsomax) 5566 hw_tsomax = sc->hn_vf_ifp->if_hw_tsomax; 5567 } 5568 ifp->if_hw_tsomax = hw_tsomax; 5569 if (bootverbose) 5570 if_printf(ifp, "TSO size max %u\n", ifp->if_hw_tsomax); 5571 } 5572 5573 static void 5574 hn_fixup_tx_data(struct hn_softc *sc) 5575 { 5576 uint64_t csum_assist; 5577 int i; 5578 5579 hn_set_chim_size(sc, sc->hn_chim_szmax); 5580 if (hn_tx_chimney_size > 0 && 5581 hn_tx_chimney_size < sc->hn_chim_szmax) 5582 hn_set_chim_size(sc, hn_tx_chimney_size); 5583 5584 csum_assist = 0; 5585 if (sc->hn_caps & HN_CAP_IPCS) 5586 csum_assist |= CSUM_IP; 5587 if (sc->hn_caps & HN_CAP_TCP4CS) 5588 csum_assist |= CSUM_IP_TCP; 5589 if ((sc->hn_caps & HN_CAP_UDP4CS) && hn_enable_udp4cs) 5590 csum_assist |= CSUM_IP_UDP; 5591 if (sc->hn_caps & HN_CAP_TCP6CS) 5592 csum_assist |= CSUM_IP6_TCP; 5593 if ((sc->hn_caps & HN_CAP_UDP6CS) && hn_enable_udp6cs) 5594 csum_assist |= CSUM_IP6_UDP; 5595 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 5596 sc->hn_tx_ring[i].hn_csum_assist = csum_assist; 5597 5598 if (sc->hn_caps & HN_CAP_HASHVAL) { 5599 /* 5600 * Support HASHVAL pktinfo on TX path. 5601 */ 5602 if (bootverbose) 5603 if_printf(sc->hn_ifp, "support HASHVAL pktinfo\n"); 5604 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 5605 sc->hn_tx_ring[i].hn_tx_flags |= HN_TX_FLAG_HASHVAL; 5606 } 5607 } 5608 5609 static void 5610 hn_fixup_rx_data(struct hn_softc *sc) 5611 { 5612 5613 if (sc->hn_caps & HN_CAP_UDPHASH) { 5614 int i; 5615 5616 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 5617 sc->hn_rx_ring[i].hn_rx_flags |= HN_RX_FLAG_UDP_HASH; 5618 } 5619 } 5620 5621 static void 5622 hn_destroy_tx_data(struct hn_softc *sc) 5623 { 5624 int i; 5625 5626 if (sc->hn_chim != NULL) { 5627 if ((sc->hn_flags & HN_FLAG_CHIM_REF) == 0) { 5628 hyperv_dmamem_free(&sc->hn_chim_dma, sc->hn_chim); 5629 } else { 5630 device_printf(sc->hn_dev, 5631 "chimney sending buffer is referenced"); 5632 } 5633 sc->hn_chim = NULL; 5634 } 5635 5636 if (sc->hn_tx_ring_cnt == 0) 5637 return; 5638 5639 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 5640 hn_tx_ring_destroy(&sc->hn_tx_ring[i]); 5641 5642 free(sc->hn_tx_ring, M_DEVBUF); 5643 sc->hn_tx_ring = NULL; 5644 5645 sc->hn_tx_ring_cnt = 0; 5646 sc->hn_tx_ring_inuse = 0; 5647 } 5648 5649 #ifdef HN_IFSTART_SUPPORT 5650 5651 static void 5652 hn_start_taskfunc(void *xtxr, int pending __unused) 5653 { 5654 struct hn_tx_ring *txr = xtxr; 5655 5656 mtx_lock(&txr->hn_tx_lock); 5657 hn_start_locked(txr, 0); 5658 mtx_unlock(&txr->hn_tx_lock); 5659 } 5660 5661 static int 5662 hn_start_locked(struct hn_tx_ring *txr, int len) 5663 { 5664 struct hn_softc *sc = txr->hn_sc; 5665 struct ifnet *ifp = sc->hn_ifp; 5666 int sched = 0; 5667 5668 KASSERT(hn_use_if_start, 5669 ("hn_start_locked is called, when if_start is disabled")); 5670 KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring")); 5671 mtx_assert(&txr->hn_tx_lock, MA_OWNED); 5672 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc")); 5673 5674 if (__predict_false(txr->hn_suspended)) 5675 return (0); 5676 5677 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != 5678 IFF_DRV_RUNNING) 5679 return (0); 5680 5681 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { 5682 struct hn_txdesc *txd; 5683 struct mbuf *m_head; 5684 int error; 5685 5686 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); 5687 if (m_head == NULL) 5688 break; 5689 5690 if (len > 0 && m_head->m_pkthdr.len > len) { 5691 /* 5692 * This sending could be time consuming; let callers 5693 * dispatch this packet sending (and sending of any 5694 * following up packets) to tx taskqueue. 5695 */ 5696 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 5697 sched = 1; 5698 break; 5699 } 5700 5701 #if defined(INET6) || defined(INET) 5702 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 5703 m_head = hn_tso_fixup(m_head); 5704 if (__predict_false(m_head == NULL)) { 5705 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 5706 continue; 5707 } 5708 } else if (m_head->m_pkthdr.csum_flags & 5709 (CSUM_IP_UDP | CSUM_IP_TCP | CSUM_IP6_UDP | CSUM_IP6_TCP)) { 5710 m_head = hn_set_hlen(m_head); 5711 if (__predict_false(m_head == NULL)) { 5712 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 5713 continue; 5714 } 5715 } 5716 #endif 5717 5718 txd = hn_txdesc_get(txr); 5719 if (txd == NULL) { 5720 txr->hn_no_txdescs++; 5721 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 5722 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 5723 break; 5724 } 5725 5726 error = hn_encap(ifp, txr, txd, &m_head); 5727 if (error) { 5728 /* Both txd and m_head are freed */ 5729 KASSERT(txr->hn_agg_txd == NULL, 5730 ("encap failed w/ pending aggregating txdesc")); 5731 continue; 5732 } 5733 5734 if (txr->hn_agg_pktleft == 0) { 5735 if (txr->hn_agg_txd != NULL) { 5736 KASSERT(m_head == NULL, 5737 ("pending mbuf for aggregating txdesc")); 5738 error = hn_flush_txagg(ifp, txr); 5739 if (__predict_false(error)) { 5740 atomic_set_int(&ifp->if_drv_flags, 5741 IFF_DRV_OACTIVE); 5742 break; 5743 } 5744 } else { 5745 KASSERT(m_head != NULL, ("mbuf was freed")); 5746 error = hn_txpkt(ifp, txr, txd); 5747 if (__predict_false(error)) { 5748 /* txd is freed, but m_head is not */ 5749 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 5750 atomic_set_int(&ifp->if_drv_flags, 5751 IFF_DRV_OACTIVE); 5752 break; 5753 } 5754 } 5755 } 5756 #ifdef INVARIANTS 5757 else { 5758 KASSERT(txr->hn_agg_txd != NULL, 5759 ("no aggregating txdesc")); 5760 KASSERT(m_head == NULL, 5761 ("pending mbuf for aggregating txdesc")); 5762 } 5763 #endif 5764 } 5765 5766 /* Flush pending aggerated transmission. */ 5767 if (txr->hn_agg_txd != NULL) 5768 hn_flush_txagg(ifp, txr); 5769 return (sched); 5770 } 5771 5772 static void 5773 hn_start(struct ifnet *ifp) 5774 { 5775 struct hn_softc *sc = ifp->if_softc; 5776 struct hn_tx_ring *txr = &sc->hn_tx_ring[0]; 5777 5778 if (txr->hn_sched_tx) 5779 goto do_sched; 5780 5781 if (mtx_trylock(&txr->hn_tx_lock)) { 5782 int sched; 5783 5784 sched = hn_start_locked(txr, txr->hn_direct_tx_size); 5785 mtx_unlock(&txr->hn_tx_lock); 5786 if (!sched) 5787 return; 5788 } 5789 do_sched: 5790 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); 5791 } 5792 5793 static void 5794 hn_start_txeof_taskfunc(void *xtxr, int pending __unused) 5795 { 5796 struct hn_tx_ring *txr = xtxr; 5797 5798 mtx_lock(&txr->hn_tx_lock); 5799 atomic_clear_int(&txr->hn_sc->hn_ifp->if_drv_flags, IFF_DRV_OACTIVE); 5800 hn_start_locked(txr, 0); 5801 mtx_unlock(&txr->hn_tx_lock); 5802 } 5803 5804 static void 5805 hn_start_txeof(struct hn_tx_ring *txr) 5806 { 5807 struct hn_softc *sc = txr->hn_sc; 5808 struct ifnet *ifp = sc->hn_ifp; 5809 5810 KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring")); 5811 5812 if (txr->hn_sched_tx) 5813 goto do_sched; 5814 5815 if (mtx_trylock(&txr->hn_tx_lock)) { 5816 int sched; 5817 5818 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 5819 sched = hn_start_locked(txr, txr->hn_direct_tx_size); 5820 mtx_unlock(&txr->hn_tx_lock); 5821 if (sched) { 5822 taskqueue_enqueue(txr->hn_tx_taskq, 5823 &txr->hn_tx_task); 5824 } 5825 } else { 5826 do_sched: 5827 /* 5828 * Release the OACTIVE earlier, with the hope, that 5829 * others could catch up. The task will clear the 5830 * flag again with the hn_tx_lock to avoid possible 5831 * races. 5832 */ 5833 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 5834 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); 5835 } 5836 } 5837 5838 #endif /* HN_IFSTART_SUPPORT */ 5839 5840 static int 5841 hn_xmit(struct hn_tx_ring *txr, int len) 5842 { 5843 struct hn_softc *sc = txr->hn_sc; 5844 struct ifnet *ifp = sc->hn_ifp; 5845 struct mbuf *m_head; 5846 int sched = 0; 5847 5848 mtx_assert(&txr->hn_tx_lock, MA_OWNED); 5849 #ifdef HN_IFSTART_SUPPORT 5850 KASSERT(hn_use_if_start == 0, 5851 ("hn_xmit is called, when if_start is enabled")); 5852 #endif 5853 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc")); 5854 5855 if (__predict_false(txr->hn_suspended)) 5856 return (0); 5857 5858 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || txr->hn_oactive) 5859 return (0); 5860 5861 while ((m_head = drbr_peek(ifp, txr->hn_mbuf_br)) != NULL) { 5862 struct hn_txdesc *txd; 5863 int error; 5864 5865 if (len > 0 && m_head->m_pkthdr.len > len) { 5866 /* 5867 * This sending could be time consuming; let callers 5868 * dispatch this packet sending (and sending of any 5869 * following up packets) to tx taskqueue. 5870 */ 5871 drbr_putback(ifp, txr->hn_mbuf_br, m_head); 5872 sched = 1; 5873 break; 5874 } 5875 5876 txd = hn_txdesc_get(txr); 5877 if (txd == NULL) { 5878 txr->hn_no_txdescs++; 5879 drbr_putback(ifp, txr->hn_mbuf_br, m_head); 5880 txr->hn_oactive = 1; 5881 break; 5882 } 5883 5884 error = hn_encap(ifp, txr, txd, &m_head); 5885 if (error) { 5886 /* Both txd and m_head are freed; discard */ 5887 KASSERT(txr->hn_agg_txd == NULL, 5888 ("encap failed w/ pending aggregating txdesc")); 5889 drbr_advance(ifp, txr->hn_mbuf_br); 5890 continue; 5891 } 5892 5893 if (txr->hn_agg_pktleft == 0) { 5894 if (txr->hn_agg_txd != NULL) { 5895 KASSERT(m_head == NULL, 5896 ("pending mbuf for aggregating txdesc")); 5897 error = hn_flush_txagg(ifp, txr); 5898 if (__predict_false(error)) { 5899 txr->hn_oactive = 1; 5900 break; 5901 } 5902 } else { 5903 KASSERT(m_head != NULL, ("mbuf was freed")); 5904 error = hn_txpkt(ifp, txr, txd); 5905 if (__predict_false(error)) { 5906 /* txd is freed, but m_head is not */ 5907 drbr_putback(ifp, txr->hn_mbuf_br, 5908 m_head); 5909 txr->hn_oactive = 1; 5910 break; 5911 } 5912 } 5913 } 5914 #ifdef INVARIANTS 5915 else { 5916 KASSERT(txr->hn_agg_txd != NULL, 5917 ("no aggregating txdesc")); 5918 KASSERT(m_head == NULL, 5919 ("pending mbuf for aggregating txdesc")); 5920 } 5921 #endif 5922 5923 /* Sent */ 5924 drbr_advance(ifp, txr->hn_mbuf_br); 5925 } 5926 5927 /* Flush pending aggerated transmission. */ 5928 if (txr->hn_agg_txd != NULL) 5929 hn_flush_txagg(ifp, txr); 5930 return (sched); 5931 } 5932 5933 static int 5934 hn_transmit(struct ifnet *ifp, struct mbuf *m) 5935 { 5936 struct hn_softc *sc = ifp->if_softc; 5937 struct hn_tx_ring *txr; 5938 int error, idx = 0; 5939 5940 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) { 5941 struct rm_priotracker pt; 5942 5943 rm_rlock(&sc->hn_vf_lock, &pt); 5944 if (__predict_true(sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) { 5945 struct mbuf *m_bpf = NULL; 5946 int obytes, omcast; 5947 5948 obytes = m->m_pkthdr.len; 5949 omcast = (m->m_flags & M_MCAST) != 0; 5950 5951 if (sc->hn_xvf_flags & HN_XVFFLAG_ACCBPF) { 5952 if (bpf_peers_present(ifp->if_bpf)) { 5953 m_bpf = m_copypacket(m, M_NOWAIT); 5954 if (m_bpf == NULL) { 5955 /* 5956 * Failed to grab a shallow 5957 * copy; tap now. 5958 */ 5959 ETHER_BPF_MTAP(ifp, m); 5960 } 5961 } 5962 } else { 5963 ETHER_BPF_MTAP(ifp, m); 5964 } 5965 5966 error = sc->hn_vf_ifp->if_transmit(sc->hn_vf_ifp, m); 5967 rm_runlock(&sc->hn_vf_lock, &pt); 5968 5969 if (m_bpf != NULL) { 5970 if (!error) 5971 ETHER_BPF_MTAP(ifp, m_bpf); 5972 m_freem(m_bpf); 5973 } 5974 5975 if (error == ENOBUFS) { 5976 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); 5977 } else if (error) { 5978 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 5979 } else { 5980 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 5981 if_inc_counter(ifp, IFCOUNTER_OBYTES, obytes); 5982 if (omcast) { 5983 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 5984 omcast); 5985 } 5986 } 5987 return (error); 5988 } 5989 rm_runlock(&sc->hn_vf_lock, &pt); 5990 } 5991 5992 #if defined(INET6) || defined(INET) 5993 /* 5994 * Perform TSO packet header fixup or get l2/l3 header length now, 5995 * since packet headers should be cache-hot. 5996 */ 5997 if (m->m_pkthdr.csum_flags & CSUM_TSO) { 5998 m = hn_tso_fixup(m); 5999 if (__predict_false(m == NULL)) { 6000 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 6001 return EIO; 6002 } 6003 } else if (m->m_pkthdr.csum_flags & 6004 (CSUM_IP_UDP | CSUM_IP_TCP | CSUM_IP6_UDP | CSUM_IP6_TCP)) { 6005 m = hn_set_hlen(m); 6006 if (__predict_false(m == NULL)) { 6007 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 6008 return EIO; 6009 } 6010 } 6011 #endif 6012 6013 /* 6014 * Select the TX ring based on flowid 6015 */ 6016 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 6017 #ifdef RSS 6018 uint32_t bid; 6019 6020 if (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m), 6021 &bid) == 0) 6022 idx = bid % sc->hn_tx_ring_inuse; 6023 else 6024 #endif 6025 { 6026 #if defined(INET6) || defined(INET) 6027 int tcpsyn = 0; 6028 6029 if (m->m_pkthdr.len < 128 && 6030 (m->m_pkthdr.csum_flags & 6031 (CSUM_IP_TCP | CSUM_IP6_TCP)) && 6032 (m->m_pkthdr.csum_flags & CSUM_TSO) == 0) { 6033 m = hn_check_tcpsyn(m, &tcpsyn); 6034 if (__predict_false(m == NULL)) { 6035 if_inc_counter(ifp, 6036 IFCOUNTER_OERRORS, 1); 6037 return (EIO); 6038 } 6039 } 6040 #else 6041 const int tcpsyn = 0; 6042 #endif 6043 if (tcpsyn) 6044 idx = 0; 6045 else 6046 idx = m->m_pkthdr.flowid % sc->hn_tx_ring_inuse; 6047 } 6048 } 6049 txr = &sc->hn_tx_ring[idx]; 6050 6051 error = drbr_enqueue(ifp, txr->hn_mbuf_br, m); 6052 if (error) { 6053 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); 6054 return error; 6055 } 6056 6057 if (txr->hn_oactive) 6058 return 0; 6059 6060 if (txr->hn_sched_tx) 6061 goto do_sched; 6062 6063 if (mtx_trylock(&txr->hn_tx_lock)) { 6064 int sched; 6065 6066 sched = hn_xmit(txr, txr->hn_direct_tx_size); 6067 mtx_unlock(&txr->hn_tx_lock); 6068 if (!sched) 6069 return 0; 6070 } 6071 do_sched: 6072 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); 6073 return 0; 6074 } 6075 6076 static void 6077 hn_tx_ring_qflush(struct hn_tx_ring *txr) 6078 { 6079 struct mbuf *m; 6080 6081 mtx_lock(&txr->hn_tx_lock); 6082 while ((m = buf_ring_dequeue_sc(txr->hn_mbuf_br)) != NULL) 6083 m_freem(m); 6084 mtx_unlock(&txr->hn_tx_lock); 6085 } 6086 6087 static void 6088 hn_xmit_qflush(struct ifnet *ifp) 6089 { 6090 struct hn_softc *sc = ifp->if_softc; 6091 struct rm_priotracker pt; 6092 int i; 6093 6094 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) 6095 hn_tx_ring_qflush(&sc->hn_tx_ring[i]); 6096 if_qflush(ifp); 6097 6098 rm_rlock(&sc->hn_vf_lock, &pt); 6099 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) 6100 sc->hn_vf_ifp->if_qflush(sc->hn_vf_ifp); 6101 rm_runlock(&sc->hn_vf_lock, &pt); 6102 } 6103 6104 static void 6105 hn_xmit_txeof(struct hn_tx_ring *txr) 6106 { 6107 6108 if (txr->hn_sched_tx) 6109 goto do_sched; 6110 6111 if (mtx_trylock(&txr->hn_tx_lock)) { 6112 int sched; 6113 6114 txr->hn_oactive = 0; 6115 sched = hn_xmit(txr, txr->hn_direct_tx_size); 6116 mtx_unlock(&txr->hn_tx_lock); 6117 if (sched) { 6118 taskqueue_enqueue(txr->hn_tx_taskq, 6119 &txr->hn_tx_task); 6120 } 6121 } else { 6122 do_sched: 6123 /* 6124 * Release the oactive earlier, with the hope, that 6125 * others could catch up. The task will clear the 6126 * oactive again with the hn_tx_lock to avoid possible 6127 * races. 6128 */ 6129 txr->hn_oactive = 0; 6130 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); 6131 } 6132 } 6133 6134 static void 6135 hn_xmit_taskfunc(void *xtxr, int pending __unused) 6136 { 6137 struct hn_tx_ring *txr = xtxr; 6138 6139 mtx_lock(&txr->hn_tx_lock); 6140 hn_xmit(txr, 0); 6141 mtx_unlock(&txr->hn_tx_lock); 6142 } 6143 6144 static void 6145 hn_xmit_txeof_taskfunc(void *xtxr, int pending __unused) 6146 { 6147 struct hn_tx_ring *txr = xtxr; 6148 6149 mtx_lock(&txr->hn_tx_lock); 6150 txr->hn_oactive = 0; 6151 hn_xmit(txr, 0); 6152 mtx_unlock(&txr->hn_tx_lock); 6153 } 6154 6155 static int 6156 hn_chan_attach(struct hn_softc *sc, struct vmbus_channel *chan) 6157 { 6158 struct vmbus_chan_br cbr; 6159 struct hn_rx_ring *rxr; 6160 struct hn_tx_ring *txr = NULL; 6161 int idx, error; 6162 6163 idx = vmbus_chan_subidx(chan); 6164 6165 /* 6166 * Link this channel to RX/TX ring. 6167 */ 6168 KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse, 6169 ("invalid channel index %d, should > 0 && < %d", 6170 idx, sc->hn_rx_ring_inuse)); 6171 rxr = &sc->hn_rx_ring[idx]; 6172 KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED) == 0, 6173 ("RX ring %d already attached", idx)); 6174 rxr->hn_rx_flags |= HN_RX_FLAG_ATTACHED; 6175 rxr->hn_chan = chan; 6176 6177 if (bootverbose) { 6178 if_printf(sc->hn_ifp, "link RX ring %d to chan%u\n", 6179 idx, vmbus_chan_id(chan)); 6180 } 6181 6182 if (idx < sc->hn_tx_ring_inuse) { 6183 txr = &sc->hn_tx_ring[idx]; 6184 KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED) == 0, 6185 ("TX ring %d already attached", idx)); 6186 txr->hn_tx_flags |= HN_TX_FLAG_ATTACHED; 6187 6188 txr->hn_chan = chan; 6189 if (bootverbose) { 6190 if_printf(sc->hn_ifp, "link TX ring %d to chan%u\n", 6191 idx, vmbus_chan_id(chan)); 6192 } 6193 } 6194 6195 /* Bind this channel to a proper CPU. */ 6196 vmbus_chan_cpu_set(chan, HN_RING_IDX2CPU(sc, idx)); 6197 6198 /* 6199 * Open this channel 6200 */ 6201 cbr.cbr = rxr->hn_br; 6202 cbr.cbr_paddr = rxr->hn_br_dma.hv_paddr; 6203 cbr.cbr_txsz = HN_TXBR_SIZE; 6204 cbr.cbr_rxsz = HN_RXBR_SIZE; 6205 error = vmbus_chan_open_br(chan, &cbr, NULL, 0, hn_chan_callback, rxr); 6206 if (error) { 6207 if (error == EISCONN) { 6208 if_printf(sc->hn_ifp, "bufring is connected after " 6209 "chan%u open failure\n", vmbus_chan_id(chan)); 6210 rxr->hn_rx_flags |= HN_RX_FLAG_BR_REF; 6211 } else { 6212 if_printf(sc->hn_ifp, "open chan%u failed: %d\n", 6213 vmbus_chan_id(chan), error); 6214 } 6215 } 6216 return (error); 6217 } 6218 6219 static void 6220 hn_chan_detach(struct hn_softc *sc, struct vmbus_channel *chan) 6221 { 6222 struct hn_rx_ring *rxr; 6223 int idx, error; 6224 6225 idx = vmbus_chan_subidx(chan); 6226 6227 /* 6228 * Link this channel to RX/TX ring. 6229 */ 6230 KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse, 6231 ("invalid channel index %d, should > 0 && < %d", 6232 idx, sc->hn_rx_ring_inuse)); 6233 rxr = &sc->hn_rx_ring[idx]; 6234 KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED), 6235 ("RX ring %d is not attached", idx)); 6236 rxr->hn_rx_flags &= ~HN_RX_FLAG_ATTACHED; 6237 6238 if (idx < sc->hn_tx_ring_inuse) { 6239 struct hn_tx_ring *txr = &sc->hn_tx_ring[idx]; 6240 6241 KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED), 6242 ("TX ring %d is not attached attached", idx)); 6243 txr->hn_tx_flags &= ~HN_TX_FLAG_ATTACHED; 6244 } 6245 6246 /* 6247 * Close this channel. 6248 * 6249 * NOTE: 6250 * Channel closing does _not_ destroy the target channel. 6251 */ 6252 error = vmbus_chan_close_direct(chan); 6253 if (error == EISCONN) { 6254 if_printf(sc->hn_ifp, "chan%u bufring is connected " 6255 "after being closed\n", vmbus_chan_id(chan)); 6256 rxr->hn_rx_flags |= HN_RX_FLAG_BR_REF; 6257 } else if (error) { 6258 if_printf(sc->hn_ifp, "chan%u close failed: %d\n", 6259 vmbus_chan_id(chan), error); 6260 } 6261 } 6262 6263 static int 6264 hn_attach_subchans(struct hn_softc *sc) 6265 { 6266 struct vmbus_channel **subchans; 6267 int subchan_cnt = sc->hn_rx_ring_inuse - 1; 6268 int i, error = 0; 6269 6270 KASSERT(subchan_cnt > 0, ("no sub-channels")); 6271 6272 /* Attach the sub-channels. */ 6273 subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt); 6274 for (i = 0; i < subchan_cnt; ++i) { 6275 int error1; 6276 6277 error1 = hn_chan_attach(sc, subchans[i]); 6278 if (error1) { 6279 error = error1; 6280 /* Move on; all channels will be detached later. */ 6281 } 6282 } 6283 vmbus_subchan_rel(subchans, subchan_cnt); 6284 6285 if (error) { 6286 if_printf(sc->hn_ifp, "sub-channels attach failed: %d\n", error); 6287 } else { 6288 if (bootverbose) { 6289 if_printf(sc->hn_ifp, "%d sub-channels attached\n", 6290 subchan_cnt); 6291 } 6292 } 6293 return (error); 6294 } 6295 6296 static void 6297 hn_detach_allchans(struct hn_softc *sc) 6298 { 6299 struct vmbus_channel **subchans; 6300 int subchan_cnt = sc->hn_rx_ring_inuse - 1; 6301 int i; 6302 6303 if (subchan_cnt == 0) 6304 goto back; 6305 6306 /* Detach the sub-channels. */ 6307 subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt); 6308 for (i = 0; i < subchan_cnt; ++i) 6309 hn_chan_detach(sc, subchans[i]); 6310 vmbus_subchan_rel(subchans, subchan_cnt); 6311 6312 back: 6313 /* 6314 * Detach the primary channel, _after_ all sub-channels 6315 * are detached. 6316 */ 6317 hn_chan_detach(sc, sc->hn_prichan); 6318 6319 /* Wait for sub-channels to be destroyed, if any. */ 6320 vmbus_subchan_drain(sc->hn_prichan); 6321 6322 #ifdef INVARIANTS 6323 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 6324 KASSERT((sc->hn_rx_ring[i].hn_rx_flags & 6325 HN_RX_FLAG_ATTACHED) == 0, 6326 ("%dth RX ring is still attached", i)); 6327 } 6328 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 6329 KASSERT((sc->hn_tx_ring[i].hn_tx_flags & 6330 HN_TX_FLAG_ATTACHED) == 0, 6331 ("%dth TX ring is still attached", i)); 6332 } 6333 #endif 6334 } 6335 6336 static int 6337 hn_synth_alloc_subchans(struct hn_softc *sc, int *nsubch) 6338 { 6339 struct vmbus_channel **subchans; 6340 int nchan, rxr_cnt, error; 6341 6342 nchan = *nsubch + 1; 6343 if (nchan == 1) { 6344 /* 6345 * Multiple RX/TX rings are not requested. 6346 */ 6347 *nsubch = 0; 6348 return (0); 6349 } 6350 6351 /* 6352 * Query RSS capabilities, e.g. # of RX rings, and # of indirect 6353 * table entries. 6354 */ 6355 error = hn_rndis_query_rsscaps(sc, &rxr_cnt); 6356 if (error) { 6357 /* No RSS; this is benign. */ 6358 *nsubch = 0; 6359 return (0); 6360 } 6361 if (bootverbose) { 6362 if_printf(sc->hn_ifp, "RX rings offered %u, requested %d\n", 6363 rxr_cnt, nchan); 6364 } 6365 6366 if (nchan > rxr_cnt) 6367 nchan = rxr_cnt; 6368 if (nchan == 1) { 6369 if_printf(sc->hn_ifp, "only 1 channel is supported, no vRSS\n"); 6370 *nsubch = 0; 6371 return (0); 6372 } 6373 6374 /* 6375 * Allocate sub-channels from NVS. 6376 */ 6377 *nsubch = nchan - 1; 6378 error = hn_nvs_alloc_subchans(sc, nsubch); 6379 if (error || *nsubch == 0) { 6380 /* Failed to allocate sub-channels. */ 6381 *nsubch = 0; 6382 return (0); 6383 } 6384 6385 /* 6386 * Wait for all sub-channels to become ready before moving on. 6387 */ 6388 subchans = vmbus_subchan_get(sc->hn_prichan, *nsubch); 6389 vmbus_subchan_rel(subchans, *nsubch); 6390 return (0); 6391 } 6392 6393 static bool 6394 hn_synth_attachable(const struct hn_softc *sc) 6395 { 6396 int i; 6397 6398 if (sc->hn_flags & HN_FLAG_ERRORS) 6399 return (false); 6400 6401 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 6402 const struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 6403 6404 if (rxr->hn_rx_flags & HN_RX_FLAG_BR_REF) 6405 return (false); 6406 } 6407 return (true); 6408 } 6409 6410 /* 6411 * Make sure that the RX filter is zero after the successful 6412 * RNDIS initialization. 6413 * 6414 * NOTE: 6415 * Under certain conditions on certain versions of Hyper-V, 6416 * the RNDIS rxfilter is _not_ zero on the hypervisor side 6417 * after the successful RNDIS initialization, which breaks 6418 * the assumption of any following code (well, it breaks the 6419 * RNDIS API contract actually). Clear the RNDIS rxfilter 6420 * explicitly, drain packets sneaking through, and drain the 6421 * interrupt taskqueues scheduled due to the stealth packets. 6422 */ 6423 static void 6424 hn_rndis_init_fixat(struct hn_softc *sc, int nchan) 6425 { 6426 6427 hn_disable_rx(sc); 6428 hn_drain_rxtx(sc, nchan); 6429 } 6430 6431 static int 6432 hn_synth_attach(struct hn_softc *sc, int mtu) 6433 { 6434 #define ATTACHED_NVS 0x0002 6435 #define ATTACHED_RNDIS 0x0004 6436 6437 struct ndis_rssprm_toeplitz *rss = &sc->hn_rss; 6438 int error, nsubch, nchan = 1, i, rndis_inited; 6439 uint32_t old_caps, attached = 0; 6440 6441 KASSERT((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0, 6442 ("synthetic parts were attached")); 6443 6444 if (!hn_synth_attachable(sc)) 6445 return (ENXIO); 6446 6447 /* Save capabilities for later verification. */ 6448 old_caps = sc->hn_caps; 6449 sc->hn_caps = 0; 6450 6451 /* Clear RSS stuffs. */ 6452 sc->hn_rss_ind_size = 0; 6453 sc->hn_rss_hash = 0; 6454 sc->hn_rss_hcap = 0; 6455 6456 /* 6457 * Attach the primary channel _before_ attaching NVS and RNDIS. 6458 */ 6459 error = hn_chan_attach(sc, sc->hn_prichan); 6460 if (error) 6461 goto failed; 6462 6463 /* 6464 * Attach NVS. 6465 */ 6466 error = hn_nvs_attach(sc, mtu); 6467 if (error) 6468 goto failed; 6469 attached |= ATTACHED_NVS; 6470 6471 /* 6472 * Attach RNDIS _after_ NVS is attached. 6473 */ 6474 error = hn_rndis_attach(sc, mtu, &rndis_inited); 6475 if (rndis_inited) 6476 attached |= ATTACHED_RNDIS; 6477 if (error) 6478 goto failed; 6479 6480 /* 6481 * Make sure capabilities are not changed. 6482 */ 6483 if (device_is_attached(sc->hn_dev) && old_caps != sc->hn_caps) { 6484 if_printf(sc->hn_ifp, "caps mismatch old 0x%08x, new 0x%08x\n", 6485 old_caps, sc->hn_caps); 6486 error = ENXIO; 6487 goto failed; 6488 } 6489 6490 /* 6491 * Allocate sub-channels for multi-TX/RX rings. 6492 * 6493 * NOTE: 6494 * The # of RX rings that can be used is equivalent to the # of 6495 * channels to be requested. 6496 */ 6497 nsubch = sc->hn_rx_ring_cnt - 1; 6498 error = hn_synth_alloc_subchans(sc, &nsubch); 6499 if (error) 6500 goto failed; 6501 /* NOTE: _Full_ synthetic parts detach is required now. */ 6502 sc->hn_flags |= HN_FLAG_SYNTH_ATTACHED; 6503 6504 /* 6505 * Set the # of TX/RX rings that could be used according to 6506 * the # of channels that NVS offered. 6507 */ 6508 nchan = nsubch + 1; 6509 hn_set_ring_inuse(sc, nchan); 6510 if (nchan == 1) { 6511 /* Only the primary channel can be used; done */ 6512 goto back; 6513 } 6514 6515 /* 6516 * Attach the sub-channels. 6517 * 6518 * NOTE: hn_set_ring_inuse() _must_ have been called. 6519 */ 6520 error = hn_attach_subchans(sc); 6521 if (error) 6522 goto failed; 6523 6524 /* 6525 * Configure RSS key and indirect table _after_ all sub-channels 6526 * are attached. 6527 */ 6528 if ((sc->hn_flags & HN_FLAG_HAS_RSSKEY) == 0) { 6529 /* 6530 * RSS key is not set yet; set it to the default RSS key. 6531 */ 6532 if (bootverbose) 6533 if_printf(sc->hn_ifp, "setup default RSS key\n"); 6534 #ifdef RSS 6535 rss_getkey(rss->rss_key); 6536 #else 6537 memcpy(rss->rss_key, hn_rss_key_default, sizeof(rss->rss_key)); 6538 #endif 6539 sc->hn_flags |= HN_FLAG_HAS_RSSKEY; 6540 } 6541 6542 if ((sc->hn_flags & HN_FLAG_HAS_RSSIND) == 0) { 6543 /* 6544 * RSS indirect table is not set yet; set it up in round- 6545 * robin fashion. 6546 */ 6547 if (bootverbose) { 6548 if_printf(sc->hn_ifp, "setup default RSS indirect " 6549 "table\n"); 6550 } 6551 for (i = 0; i < NDIS_HASH_INDCNT; ++i) { 6552 uint32_t subidx; 6553 6554 #ifdef RSS 6555 subidx = rss_get_indirection_to_bucket(i); 6556 #else 6557 subidx = i; 6558 #endif 6559 rss->rss_ind[i] = subidx % nchan; 6560 } 6561 sc->hn_flags |= HN_FLAG_HAS_RSSIND; 6562 } else { 6563 /* 6564 * # of usable channels may be changed, so we have to 6565 * make sure that all entries in RSS indirect table 6566 * are valid. 6567 * 6568 * NOTE: hn_set_ring_inuse() _must_ have been called. 6569 */ 6570 hn_rss_ind_fixup(sc); 6571 } 6572 6573 sc->hn_rss_hash = sc->hn_rss_hcap; 6574 if ((sc->hn_flags & HN_FLAG_RXVF) || 6575 (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) { 6576 /* NOTE: Don't reconfigure RSS; will do immediately. */ 6577 hn_vf_rss_fixup(sc, false); 6578 } 6579 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE); 6580 if (error) 6581 goto failed; 6582 back: 6583 /* 6584 * Fixup transmission aggregation setup. 6585 */ 6586 hn_set_txagg(sc); 6587 hn_rndis_init_fixat(sc, nchan); 6588 return (0); 6589 6590 failed: 6591 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) { 6592 hn_rndis_init_fixat(sc, nchan); 6593 hn_synth_detach(sc); 6594 } else { 6595 if (attached & ATTACHED_RNDIS) { 6596 hn_rndis_init_fixat(sc, nchan); 6597 hn_rndis_detach(sc); 6598 } 6599 if (attached & ATTACHED_NVS) 6600 hn_nvs_detach(sc); 6601 hn_chan_detach(sc, sc->hn_prichan); 6602 /* Restore old capabilities. */ 6603 sc->hn_caps = old_caps; 6604 } 6605 return (error); 6606 6607 #undef ATTACHED_RNDIS 6608 #undef ATTACHED_NVS 6609 } 6610 6611 /* 6612 * NOTE: 6613 * The interface must have been suspended though hn_suspend(), before 6614 * this function get called. 6615 */ 6616 static void 6617 hn_synth_detach(struct hn_softc *sc) 6618 { 6619 6620 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, 6621 ("synthetic parts were not attached")); 6622 6623 /* Detach the RNDIS first. */ 6624 hn_rndis_detach(sc); 6625 6626 /* Detach NVS. */ 6627 hn_nvs_detach(sc); 6628 6629 /* Detach all of the channels. */ 6630 hn_detach_allchans(sc); 6631 6632 sc->hn_flags &= ~HN_FLAG_SYNTH_ATTACHED; 6633 } 6634 6635 static void 6636 hn_set_ring_inuse(struct hn_softc *sc, int ring_cnt) 6637 { 6638 KASSERT(ring_cnt > 0 && ring_cnt <= sc->hn_rx_ring_cnt, 6639 ("invalid ring count %d", ring_cnt)); 6640 6641 if (sc->hn_tx_ring_cnt > ring_cnt) 6642 sc->hn_tx_ring_inuse = ring_cnt; 6643 else 6644 sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt; 6645 sc->hn_rx_ring_inuse = ring_cnt; 6646 6647 #ifdef RSS 6648 if (sc->hn_rx_ring_inuse != rss_getnumbuckets()) { 6649 if_printf(sc->hn_ifp, "# of RX rings (%d) does not match " 6650 "# of RSS buckets (%d)\n", sc->hn_rx_ring_inuse, 6651 rss_getnumbuckets()); 6652 } 6653 #endif 6654 6655 if (bootverbose) { 6656 if_printf(sc->hn_ifp, "%d TX ring, %d RX ring\n", 6657 sc->hn_tx_ring_inuse, sc->hn_rx_ring_inuse); 6658 } 6659 } 6660 6661 static void 6662 hn_chan_drain(struct hn_softc *sc, struct vmbus_channel *chan) 6663 { 6664 6665 /* 6666 * NOTE: 6667 * The TX bufring will not be drained by the hypervisor, 6668 * if the primary channel is revoked. 6669 */ 6670 while (!vmbus_chan_rx_empty(chan) || 6671 (!vmbus_chan_is_revoked(sc->hn_prichan) && 6672 !vmbus_chan_tx_empty(chan))) 6673 pause("waitch", 1); 6674 vmbus_chan_intr_drain(chan); 6675 } 6676 6677 static void 6678 hn_disable_rx(struct hn_softc *sc) 6679 { 6680 6681 /* 6682 * Disable RX by clearing RX filter forcefully. 6683 */ 6684 sc->hn_rx_filter = NDIS_PACKET_TYPE_NONE; 6685 hn_rndis_set_rxfilter(sc, sc->hn_rx_filter); /* ignore error */ 6686 6687 /* 6688 * Give RNDIS enough time to flush all pending data packets. 6689 */ 6690 pause("waitrx", (200 * hz) / 1000); 6691 } 6692 6693 /* 6694 * NOTE: 6695 * RX/TX _must_ have been suspended/disabled, before this function 6696 * is called. 6697 */ 6698 static void 6699 hn_drain_rxtx(struct hn_softc *sc, int nchan) 6700 { 6701 struct vmbus_channel **subch = NULL; 6702 int nsubch; 6703 6704 /* 6705 * Drain RX/TX bufrings and interrupts. 6706 */ 6707 nsubch = nchan - 1; 6708 if (nsubch > 0) 6709 subch = vmbus_subchan_get(sc->hn_prichan, nsubch); 6710 6711 if (subch != NULL) { 6712 int i; 6713 6714 for (i = 0; i < nsubch; ++i) 6715 hn_chan_drain(sc, subch[i]); 6716 } 6717 hn_chan_drain(sc, sc->hn_prichan); 6718 6719 if (subch != NULL) 6720 vmbus_subchan_rel(subch, nsubch); 6721 } 6722 6723 static void 6724 hn_suspend_data(struct hn_softc *sc) 6725 { 6726 struct hn_tx_ring *txr; 6727 int i; 6728 6729 HN_LOCK_ASSERT(sc); 6730 6731 /* 6732 * Suspend TX. 6733 */ 6734 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { 6735 txr = &sc->hn_tx_ring[i]; 6736 6737 mtx_lock(&txr->hn_tx_lock); 6738 txr->hn_suspended = 1; 6739 mtx_unlock(&txr->hn_tx_lock); 6740 /* No one is able send more packets now. */ 6741 6742 /* 6743 * Wait for all pending sends to finish. 6744 * 6745 * NOTE: 6746 * We will _not_ receive all pending send-done, if the 6747 * primary channel is revoked. 6748 */ 6749 while (hn_tx_ring_pending(txr) && 6750 !vmbus_chan_is_revoked(sc->hn_prichan)) 6751 pause("hnwtx", 1 /* 1 tick */); 6752 } 6753 6754 /* 6755 * Disable RX. 6756 */ 6757 hn_disable_rx(sc); 6758 6759 /* 6760 * Drain RX/TX. 6761 */ 6762 hn_drain_rxtx(sc, sc->hn_rx_ring_inuse); 6763 6764 /* 6765 * Drain any pending TX tasks. 6766 * 6767 * NOTE: 6768 * The above hn_drain_rxtx() can dispatch TX tasks, so the TX 6769 * tasks will have to be drained _after_ the above hn_drain_rxtx(). 6770 */ 6771 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { 6772 txr = &sc->hn_tx_ring[i]; 6773 6774 taskqueue_drain(txr->hn_tx_taskq, &txr->hn_tx_task); 6775 taskqueue_drain(txr->hn_tx_taskq, &txr->hn_txeof_task); 6776 } 6777 } 6778 6779 static void 6780 hn_suspend_mgmt_taskfunc(void *xsc, int pending __unused) 6781 { 6782 6783 ((struct hn_softc *)xsc)->hn_mgmt_taskq = NULL; 6784 } 6785 6786 static void 6787 hn_suspend_mgmt(struct hn_softc *sc) 6788 { 6789 struct task task; 6790 6791 HN_LOCK_ASSERT(sc); 6792 6793 /* 6794 * Make sure that hn_mgmt_taskq0 can nolonger be accessed 6795 * through hn_mgmt_taskq. 6796 */ 6797 TASK_INIT(&task, 0, hn_suspend_mgmt_taskfunc, sc); 6798 vmbus_chan_run_task(sc->hn_prichan, &task); 6799 6800 /* 6801 * Make sure that all pending management tasks are completed. 6802 */ 6803 taskqueue_drain(sc->hn_mgmt_taskq0, &sc->hn_netchg_init); 6804 taskqueue_drain_timeout(sc->hn_mgmt_taskq0, &sc->hn_netchg_status); 6805 taskqueue_drain_all(sc->hn_mgmt_taskq0); 6806 } 6807 6808 static void 6809 hn_suspend(struct hn_softc *sc) 6810 { 6811 6812 /* Disable polling. */ 6813 hn_polling(sc, 0); 6814 6815 /* 6816 * If the non-transparent mode VF is activated, the synthetic 6817 * device is receiving packets, so the data path of the 6818 * synthetic device must be suspended. 6819 */ 6820 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) || 6821 (sc->hn_flags & HN_FLAG_RXVF)) 6822 hn_suspend_data(sc); 6823 hn_suspend_mgmt(sc); 6824 } 6825 6826 static void 6827 hn_resume_tx(struct hn_softc *sc, int tx_ring_cnt) 6828 { 6829 int i; 6830 6831 KASSERT(tx_ring_cnt <= sc->hn_tx_ring_cnt, 6832 ("invalid TX ring count %d", tx_ring_cnt)); 6833 6834 for (i = 0; i < tx_ring_cnt; ++i) { 6835 struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; 6836 6837 mtx_lock(&txr->hn_tx_lock); 6838 txr->hn_suspended = 0; 6839 mtx_unlock(&txr->hn_tx_lock); 6840 } 6841 } 6842 6843 static void 6844 hn_resume_data(struct hn_softc *sc) 6845 { 6846 int i; 6847 6848 HN_LOCK_ASSERT(sc); 6849 6850 /* 6851 * Re-enable RX. 6852 */ 6853 hn_rxfilter_config(sc); 6854 6855 /* 6856 * Make sure to clear suspend status on "all" TX rings, 6857 * since hn_tx_ring_inuse can be changed after 6858 * hn_suspend_data(). 6859 */ 6860 hn_resume_tx(sc, sc->hn_tx_ring_cnt); 6861 6862 #ifdef HN_IFSTART_SUPPORT 6863 if (!hn_use_if_start) 6864 #endif 6865 { 6866 /* 6867 * Flush unused drbrs, since hn_tx_ring_inuse may be 6868 * reduced. 6869 */ 6870 for (i = sc->hn_tx_ring_inuse; i < sc->hn_tx_ring_cnt; ++i) 6871 hn_tx_ring_qflush(&sc->hn_tx_ring[i]); 6872 } 6873 6874 /* 6875 * Kick start TX. 6876 */ 6877 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { 6878 struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; 6879 6880 /* 6881 * Use txeof task, so that any pending oactive can be 6882 * cleared properly. 6883 */ 6884 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); 6885 } 6886 } 6887 6888 static void 6889 hn_resume_mgmt(struct hn_softc *sc) 6890 { 6891 6892 sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0; 6893 6894 /* 6895 * Kick off network change detection, if it was pending. 6896 * If no network change was pending, start link status 6897 * checks, which is more lightweight than network change 6898 * detection. 6899 */ 6900 if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG) 6901 hn_change_network(sc); 6902 else 6903 hn_update_link_status(sc); 6904 } 6905 6906 static void 6907 hn_resume(struct hn_softc *sc) 6908 { 6909 6910 /* 6911 * If the non-transparent mode VF is activated, the synthetic 6912 * device have to receive packets, so the data path of the 6913 * synthetic device must be resumed. 6914 */ 6915 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) || 6916 (sc->hn_flags & HN_FLAG_RXVF)) 6917 hn_resume_data(sc); 6918 6919 /* 6920 * Don't resume link status change if VF is attached/activated. 6921 * - In the non-transparent VF mode, the synthetic device marks 6922 * link down until the VF is deactivated; i.e. VF is down. 6923 * - In transparent VF mode, VF's media status is used until 6924 * the VF is detached. 6925 */ 6926 if ((sc->hn_flags & HN_FLAG_RXVF) == 0 && 6927 !(hn_xpnt_vf && sc->hn_vf_ifp != NULL)) 6928 hn_resume_mgmt(sc); 6929 6930 /* 6931 * Re-enable polling if this interface is running and 6932 * the polling is requested. 6933 */ 6934 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) && sc->hn_pollhz > 0) 6935 hn_polling(sc, sc->hn_pollhz); 6936 } 6937 6938 static void 6939 hn_rndis_rx_status(struct hn_softc *sc, const void *data, int dlen) 6940 { 6941 const struct rndis_status_msg *msg; 6942 int ofs; 6943 6944 if (dlen < sizeof(*msg)) { 6945 if_printf(sc->hn_ifp, "invalid RNDIS status\n"); 6946 return; 6947 } 6948 msg = data; 6949 6950 switch (msg->rm_status) { 6951 case RNDIS_STATUS_MEDIA_CONNECT: 6952 case RNDIS_STATUS_MEDIA_DISCONNECT: 6953 hn_update_link_status(sc); 6954 break; 6955 6956 case RNDIS_STATUS_TASK_OFFLOAD_CURRENT_CONFIG: 6957 case RNDIS_STATUS_LINK_SPEED_CHANGE: 6958 /* Not really useful; ignore. */ 6959 break; 6960 6961 case RNDIS_STATUS_NETWORK_CHANGE: 6962 ofs = RNDIS_STBUFOFFSET_ABS(msg->rm_stbufoffset); 6963 if (dlen < ofs + msg->rm_stbuflen || 6964 msg->rm_stbuflen < sizeof(uint32_t)) { 6965 if_printf(sc->hn_ifp, "network changed\n"); 6966 } else { 6967 uint32_t change; 6968 6969 memcpy(&change, ((const uint8_t *)msg) + ofs, 6970 sizeof(change)); 6971 if_printf(sc->hn_ifp, "network changed, change %u\n", 6972 change); 6973 } 6974 hn_change_network(sc); 6975 break; 6976 6977 default: 6978 if_printf(sc->hn_ifp, "unknown RNDIS status 0x%08x\n", 6979 msg->rm_status); 6980 break; 6981 } 6982 } 6983 6984 static int 6985 hn_rndis_rxinfo(const void *info_data, int info_dlen, struct hn_rxinfo *info) 6986 { 6987 const struct rndis_pktinfo *pi = info_data; 6988 uint32_t mask = 0; 6989 6990 while (info_dlen != 0) { 6991 const void *data; 6992 uint32_t dlen; 6993 6994 if (__predict_false(info_dlen < sizeof(*pi))) 6995 return (EINVAL); 6996 if (__predict_false(info_dlen < pi->rm_size)) 6997 return (EINVAL); 6998 info_dlen -= pi->rm_size; 6999 7000 if (__predict_false(pi->rm_size & RNDIS_PKTINFO_SIZE_ALIGNMASK)) 7001 return (EINVAL); 7002 if (__predict_false(pi->rm_size < pi->rm_pktinfooffset)) 7003 return (EINVAL); 7004 dlen = pi->rm_size - pi->rm_pktinfooffset; 7005 data = pi->rm_data; 7006 7007 switch (pi->rm_type) { 7008 case NDIS_PKTINFO_TYPE_VLAN: 7009 if (__predict_false(dlen < NDIS_VLAN_INFO_SIZE)) 7010 return (EINVAL); 7011 info->vlan_info = *((const uint32_t *)data); 7012 mask |= HN_RXINFO_VLAN; 7013 break; 7014 7015 case NDIS_PKTINFO_TYPE_CSUM: 7016 if (__predict_false(dlen < NDIS_RXCSUM_INFO_SIZE)) 7017 return (EINVAL); 7018 info->csum_info = *((const uint32_t *)data); 7019 mask |= HN_RXINFO_CSUM; 7020 break; 7021 7022 case HN_NDIS_PKTINFO_TYPE_HASHVAL: 7023 if (__predict_false(dlen < HN_NDIS_HASH_VALUE_SIZE)) 7024 return (EINVAL); 7025 info->hash_value = *((const uint32_t *)data); 7026 mask |= HN_RXINFO_HASHVAL; 7027 break; 7028 7029 case HN_NDIS_PKTINFO_TYPE_HASHINF: 7030 if (__predict_false(dlen < HN_NDIS_HASH_INFO_SIZE)) 7031 return (EINVAL); 7032 info->hash_info = *((const uint32_t *)data); 7033 mask |= HN_RXINFO_HASHINF; 7034 break; 7035 7036 default: 7037 goto next; 7038 } 7039 7040 if (mask == HN_RXINFO_ALL) { 7041 /* All found; done */ 7042 break; 7043 } 7044 next: 7045 pi = (const struct rndis_pktinfo *) 7046 ((const uint8_t *)pi + pi->rm_size); 7047 } 7048 7049 /* 7050 * Final fixup. 7051 * - If there is no hash value, invalidate the hash info. 7052 */ 7053 if ((mask & HN_RXINFO_HASHVAL) == 0) 7054 info->hash_info = HN_NDIS_HASH_INFO_INVALID; 7055 return (0); 7056 } 7057 7058 static __inline bool 7059 hn_rndis_check_overlap(int off, int len, int check_off, int check_len) 7060 { 7061 7062 if (off < check_off) { 7063 if (__predict_true(off + len <= check_off)) 7064 return (false); 7065 } else if (off > check_off) { 7066 if (__predict_true(check_off + check_len <= off)) 7067 return (false); 7068 } 7069 return (true); 7070 } 7071 7072 static void 7073 hn_rndis_rx_data(struct hn_rx_ring *rxr, const void *data, int dlen) 7074 { 7075 const struct rndis_packet_msg *pkt; 7076 struct hn_rxinfo info; 7077 int data_off, pktinfo_off, data_len, pktinfo_len; 7078 7079 /* 7080 * Check length. 7081 */ 7082 if (__predict_false(dlen < sizeof(*pkt))) { 7083 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg\n"); 7084 return; 7085 } 7086 pkt = data; 7087 7088 if (__predict_false(dlen < pkt->rm_len)) { 7089 if_printf(rxr->hn_ifp, "truncated RNDIS packet msg, " 7090 "dlen %d, msglen %u\n", dlen, pkt->rm_len); 7091 return; 7092 } 7093 if (__predict_false(pkt->rm_len < 7094 pkt->rm_datalen + pkt->rm_oobdatalen + pkt->rm_pktinfolen)) { 7095 if_printf(rxr->hn_ifp, "invalid RNDIS packet msglen, " 7096 "msglen %u, data %u, oob %u, pktinfo %u\n", 7097 pkt->rm_len, pkt->rm_datalen, pkt->rm_oobdatalen, 7098 pkt->rm_pktinfolen); 7099 return; 7100 } 7101 if (__predict_false(pkt->rm_datalen == 0)) { 7102 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, no data\n"); 7103 return; 7104 } 7105 7106 /* 7107 * Check offests. 7108 */ 7109 #define IS_OFFSET_INVALID(ofs) \ 7110 ((ofs) < RNDIS_PACKET_MSG_OFFSET_MIN || \ 7111 ((ofs) & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK)) 7112 7113 /* XXX Hyper-V does not meet data offset alignment requirement */ 7114 if (__predict_false(pkt->rm_dataoffset < RNDIS_PACKET_MSG_OFFSET_MIN)) { 7115 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7116 "data offset %u\n", pkt->rm_dataoffset); 7117 return; 7118 } 7119 if (__predict_false(pkt->rm_oobdataoffset > 0 && 7120 IS_OFFSET_INVALID(pkt->rm_oobdataoffset))) { 7121 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7122 "oob offset %u\n", pkt->rm_oobdataoffset); 7123 return; 7124 } 7125 if (__predict_true(pkt->rm_pktinfooffset > 0) && 7126 __predict_false(IS_OFFSET_INVALID(pkt->rm_pktinfooffset))) { 7127 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7128 "pktinfo offset %u\n", pkt->rm_pktinfooffset); 7129 return; 7130 } 7131 7132 #undef IS_OFFSET_INVALID 7133 7134 data_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_dataoffset); 7135 data_len = pkt->rm_datalen; 7136 pktinfo_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_pktinfooffset); 7137 pktinfo_len = pkt->rm_pktinfolen; 7138 7139 /* 7140 * Check OOB coverage. 7141 */ 7142 if (__predict_false(pkt->rm_oobdatalen != 0)) { 7143 int oob_off, oob_len; 7144 7145 if_printf(rxr->hn_ifp, "got oobdata\n"); 7146 oob_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_oobdataoffset); 7147 oob_len = pkt->rm_oobdatalen; 7148 7149 if (__predict_false(oob_off + oob_len > pkt->rm_len)) { 7150 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7151 "oob overflow, msglen %u, oob abs %d len %d\n", 7152 pkt->rm_len, oob_off, oob_len); 7153 return; 7154 } 7155 7156 /* 7157 * Check against data. 7158 */ 7159 if (hn_rndis_check_overlap(oob_off, oob_len, 7160 data_off, data_len)) { 7161 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7162 "oob overlaps data, oob abs %d len %d, " 7163 "data abs %d len %d\n", 7164 oob_off, oob_len, data_off, data_len); 7165 return; 7166 } 7167 7168 /* 7169 * Check against pktinfo. 7170 */ 7171 if (pktinfo_len != 0 && 7172 hn_rndis_check_overlap(oob_off, oob_len, 7173 pktinfo_off, pktinfo_len)) { 7174 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7175 "oob overlaps pktinfo, oob abs %d len %d, " 7176 "pktinfo abs %d len %d\n", 7177 oob_off, oob_len, pktinfo_off, pktinfo_len); 7178 return; 7179 } 7180 } 7181 7182 /* 7183 * Check per-packet-info coverage and find useful per-packet-info. 7184 */ 7185 info.vlan_info = HN_NDIS_VLAN_INFO_INVALID; 7186 info.csum_info = HN_NDIS_RXCSUM_INFO_INVALID; 7187 info.hash_info = HN_NDIS_HASH_INFO_INVALID; 7188 if (__predict_true(pktinfo_len != 0)) { 7189 bool overlap; 7190 int error; 7191 7192 if (__predict_false(pktinfo_off + pktinfo_len > pkt->rm_len)) { 7193 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7194 "pktinfo overflow, msglen %u, " 7195 "pktinfo abs %d len %d\n", 7196 pkt->rm_len, pktinfo_off, pktinfo_len); 7197 return; 7198 } 7199 7200 /* 7201 * Check packet info coverage. 7202 */ 7203 overlap = hn_rndis_check_overlap(pktinfo_off, pktinfo_len, 7204 data_off, data_len); 7205 if (__predict_false(overlap)) { 7206 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7207 "pktinfo overlap data, pktinfo abs %d len %d, " 7208 "data abs %d len %d\n", 7209 pktinfo_off, pktinfo_len, data_off, data_len); 7210 return; 7211 } 7212 7213 /* 7214 * Find useful per-packet-info. 7215 */ 7216 error = hn_rndis_rxinfo(((const uint8_t *)pkt) + pktinfo_off, 7217 pktinfo_len, &info); 7218 if (__predict_false(error)) { 7219 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg " 7220 "pktinfo\n"); 7221 return; 7222 } 7223 } 7224 7225 if (__predict_false(data_off + data_len > pkt->rm_len)) { 7226 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7227 "data overflow, msglen %u, data abs %d len %d\n", 7228 pkt->rm_len, data_off, data_len); 7229 return; 7230 } 7231 hn_rxpkt(rxr, ((const uint8_t *)pkt) + data_off, data_len, &info); 7232 } 7233 7234 static __inline void 7235 hn_rndis_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen) 7236 { 7237 const struct rndis_msghdr *hdr; 7238 7239 if (__predict_false(dlen < sizeof(*hdr))) { 7240 if_printf(rxr->hn_ifp, "invalid RNDIS msg\n"); 7241 return; 7242 } 7243 hdr = data; 7244 7245 if (__predict_true(hdr->rm_type == REMOTE_NDIS_PACKET_MSG)) { 7246 /* Hot data path. */ 7247 hn_rndis_rx_data(rxr, data, dlen); 7248 /* Done! */ 7249 return; 7250 } 7251 7252 if (hdr->rm_type == REMOTE_NDIS_INDICATE_STATUS_MSG) 7253 hn_rndis_rx_status(rxr->hn_ifp->if_softc, data, dlen); 7254 else 7255 hn_rndis_rx_ctrl(rxr->hn_ifp->if_softc, data, dlen); 7256 } 7257 7258 static void 7259 hn_nvs_handle_notify(struct hn_softc *sc, const struct vmbus_chanpkt_hdr *pkt) 7260 { 7261 const struct hn_nvs_hdr *hdr; 7262 7263 if (VMBUS_CHANPKT_DATALEN(pkt) < sizeof(*hdr)) { 7264 if_printf(sc->hn_ifp, "invalid nvs notify\n"); 7265 return; 7266 } 7267 hdr = VMBUS_CHANPKT_CONST_DATA(pkt); 7268 7269 if (hdr->nvs_type == HN_NVS_TYPE_TXTBL_NOTE) { 7270 /* Useless; ignore */ 7271 return; 7272 } 7273 if_printf(sc->hn_ifp, "got notify, nvs type %u\n", hdr->nvs_type); 7274 } 7275 7276 static void 7277 hn_nvs_handle_comp(struct hn_softc *sc, struct vmbus_channel *chan, 7278 const struct vmbus_chanpkt_hdr *pkt) 7279 { 7280 struct hn_nvs_sendctx *sndc; 7281 7282 sndc = (struct hn_nvs_sendctx *)(uintptr_t)pkt->cph_xactid; 7283 sndc->hn_cb(sndc, sc, chan, VMBUS_CHANPKT_CONST_DATA(pkt), 7284 VMBUS_CHANPKT_DATALEN(pkt)); 7285 /* 7286 * NOTE: 7287 * 'sndc' CAN NOT be accessed anymore, since it can be freed by 7288 * its callback. 7289 */ 7290 } 7291 7292 static void 7293 hn_nvs_handle_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan, 7294 const struct vmbus_chanpkt_hdr *pkthdr) 7295 { 7296 const struct vmbus_chanpkt_rxbuf *pkt; 7297 const struct hn_nvs_hdr *nvs_hdr; 7298 int count, i, hlen; 7299 7300 if (__predict_false(VMBUS_CHANPKT_DATALEN(pkthdr) < sizeof(*nvs_hdr))) { 7301 if_printf(rxr->hn_ifp, "invalid nvs RNDIS\n"); 7302 return; 7303 } 7304 nvs_hdr = VMBUS_CHANPKT_CONST_DATA(pkthdr); 7305 7306 /* Make sure that this is a RNDIS message. */ 7307 if (__predict_false(nvs_hdr->nvs_type != HN_NVS_TYPE_RNDIS)) { 7308 if_printf(rxr->hn_ifp, "nvs type %u, not RNDIS\n", 7309 nvs_hdr->nvs_type); 7310 return; 7311 } 7312 7313 hlen = VMBUS_CHANPKT_GETLEN(pkthdr->cph_hlen); 7314 if (__predict_false(hlen < sizeof(*pkt))) { 7315 if_printf(rxr->hn_ifp, "invalid rxbuf chanpkt\n"); 7316 return; 7317 } 7318 pkt = (const struct vmbus_chanpkt_rxbuf *)pkthdr; 7319 7320 if (__predict_false(pkt->cp_rxbuf_id != HN_NVS_RXBUF_SIG)) { 7321 if_printf(rxr->hn_ifp, "invalid rxbuf_id 0x%08x\n", 7322 pkt->cp_rxbuf_id); 7323 return; 7324 } 7325 7326 count = pkt->cp_rxbuf_cnt; 7327 if (__predict_false(hlen < 7328 __offsetof(struct vmbus_chanpkt_rxbuf, cp_rxbuf[count]))) { 7329 if_printf(rxr->hn_ifp, "invalid rxbuf_cnt %d\n", count); 7330 return; 7331 } 7332 7333 /* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */ 7334 for (i = 0; i < count; ++i) { 7335 int ofs, len; 7336 7337 ofs = pkt->cp_rxbuf[i].rb_ofs; 7338 len = pkt->cp_rxbuf[i].rb_len; 7339 if (__predict_false(ofs + len > HN_RXBUF_SIZE)) { 7340 if_printf(rxr->hn_ifp, "%dth RNDIS msg overflow rxbuf, " 7341 "ofs %d, len %d\n", i, ofs, len); 7342 continue; 7343 } 7344 hn_rndis_rxpkt(rxr, rxr->hn_rxbuf + ofs, len); 7345 } 7346 7347 /* 7348 * Ack the consumed RXBUF associated w/ this channel packet, 7349 * so that this RXBUF can be recycled by the hypervisor. 7350 */ 7351 hn_nvs_ack_rxbuf(rxr, chan, pkt->cp_hdr.cph_xactid); 7352 } 7353 7354 static void 7355 hn_nvs_ack_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan, 7356 uint64_t tid) 7357 { 7358 struct hn_nvs_rndis_ack ack; 7359 int retries, error; 7360 7361 ack.nvs_type = HN_NVS_TYPE_RNDIS_ACK; 7362 ack.nvs_status = HN_NVS_STATUS_OK; 7363 7364 retries = 0; 7365 again: 7366 error = vmbus_chan_send(chan, VMBUS_CHANPKT_TYPE_COMP, 7367 VMBUS_CHANPKT_FLAG_NONE, &ack, sizeof(ack), tid); 7368 if (__predict_false(error == EAGAIN)) { 7369 /* 7370 * NOTE: 7371 * This should _not_ happen in real world, since the 7372 * consumption of the TX bufring from the TX path is 7373 * controlled. 7374 */ 7375 if (rxr->hn_ack_failed == 0) 7376 if_printf(rxr->hn_ifp, "RXBUF ack retry\n"); 7377 rxr->hn_ack_failed++; 7378 retries++; 7379 if (retries < 10) { 7380 DELAY(100); 7381 goto again; 7382 } 7383 /* RXBUF leaks! */ 7384 if_printf(rxr->hn_ifp, "RXBUF ack failed\n"); 7385 } 7386 } 7387 7388 static void 7389 hn_chan_callback(struct vmbus_channel *chan, void *xrxr) 7390 { 7391 struct hn_rx_ring *rxr = xrxr; 7392 struct hn_softc *sc = rxr->hn_ifp->if_softc; 7393 7394 for (;;) { 7395 struct vmbus_chanpkt_hdr *pkt = rxr->hn_pktbuf; 7396 int error, pktlen; 7397 7398 pktlen = rxr->hn_pktbuf_len; 7399 error = vmbus_chan_recv_pkt(chan, pkt, &pktlen); 7400 if (__predict_false(error == ENOBUFS)) { 7401 void *nbuf; 7402 int nlen; 7403 7404 /* 7405 * Expand channel packet buffer. 7406 * 7407 * XXX 7408 * Use M_WAITOK here, since allocation failure 7409 * is fatal. 7410 */ 7411 nlen = rxr->hn_pktbuf_len * 2; 7412 while (nlen < pktlen) 7413 nlen *= 2; 7414 nbuf = malloc(nlen, M_DEVBUF, M_WAITOK); 7415 7416 if_printf(rxr->hn_ifp, "expand pktbuf %d -> %d\n", 7417 rxr->hn_pktbuf_len, nlen); 7418 7419 free(rxr->hn_pktbuf, M_DEVBUF); 7420 rxr->hn_pktbuf = nbuf; 7421 rxr->hn_pktbuf_len = nlen; 7422 /* Retry! */ 7423 continue; 7424 } else if (__predict_false(error == EAGAIN)) { 7425 /* No more channel packets; done! */ 7426 break; 7427 } 7428 KASSERT(!error, ("vmbus_chan_recv_pkt failed: %d", error)); 7429 7430 switch (pkt->cph_type) { 7431 case VMBUS_CHANPKT_TYPE_COMP: 7432 hn_nvs_handle_comp(sc, chan, pkt); 7433 break; 7434 7435 case VMBUS_CHANPKT_TYPE_RXBUF: 7436 hn_nvs_handle_rxbuf(rxr, chan, pkt); 7437 break; 7438 7439 case VMBUS_CHANPKT_TYPE_INBAND: 7440 hn_nvs_handle_notify(sc, pkt); 7441 break; 7442 7443 default: 7444 if_printf(rxr->hn_ifp, "unknown chan pkt %u\n", 7445 pkt->cph_type); 7446 break; 7447 } 7448 } 7449 hn_chan_rollup(rxr, rxr->hn_txr); 7450 } 7451 7452 static void 7453 hn_sysinit(void *arg __unused) 7454 { 7455 int i; 7456 7457 hn_udpcs_fixup = counter_u64_alloc(M_WAITOK); 7458 7459 #ifdef HN_IFSTART_SUPPORT 7460 /* 7461 * Don't use ifnet.if_start if transparent VF mode is requested; 7462 * mainly due to the IFF_DRV_OACTIVE flag. 7463 */ 7464 if (hn_xpnt_vf && hn_use_if_start) { 7465 hn_use_if_start = 0; 7466 printf("hn: tranparent VF mode, if_transmit will be used, " 7467 "instead of if_start\n"); 7468 } 7469 #endif 7470 if (hn_xpnt_vf_attwait < HN_XPNT_VF_ATTWAIT_MIN) { 7471 printf("hn: invalid transparent VF attach routing " 7472 "wait timeout %d, reset to %d\n", 7473 hn_xpnt_vf_attwait, HN_XPNT_VF_ATTWAIT_MIN); 7474 hn_xpnt_vf_attwait = HN_XPNT_VF_ATTWAIT_MIN; 7475 } 7476 7477 /* 7478 * Initialize VF map. 7479 */ 7480 rm_init_flags(&hn_vfmap_lock, "hn_vfmap", RM_SLEEPABLE); 7481 hn_vfmap_size = HN_VFMAP_SIZE_DEF; 7482 hn_vfmap = malloc(sizeof(struct ifnet *) * hn_vfmap_size, M_DEVBUF, 7483 M_WAITOK | M_ZERO); 7484 7485 /* 7486 * Fix the # of TX taskqueues. 7487 */ 7488 if (hn_tx_taskq_cnt <= 0) 7489 hn_tx_taskq_cnt = 1; 7490 else if (hn_tx_taskq_cnt > mp_ncpus) 7491 hn_tx_taskq_cnt = mp_ncpus; 7492 7493 /* 7494 * Fix the TX taskqueue mode. 7495 */ 7496 switch (hn_tx_taskq_mode) { 7497 case HN_TX_TASKQ_M_INDEP: 7498 case HN_TX_TASKQ_M_GLOBAL: 7499 case HN_TX_TASKQ_M_EVTTQ: 7500 break; 7501 default: 7502 hn_tx_taskq_mode = HN_TX_TASKQ_M_INDEP; 7503 break; 7504 } 7505 7506 if (vm_guest != VM_GUEST_HV) 7507 return; 7508 7509 if (hn_tx_taskq_mode != HN_TX_TASKQ_M_GLOBAL) 7510 return; 7511 7512 hn_tx_taskque = malloc(hn_tx_taskq_cnt * sizeof(struct taskqueue *), 7513 M_DEVBUF, M_WAITOK); 7514 for (i = 0; i < hn_tx_taskq_cnt; ++i) { 7515 hn_tx_taskque[i] = taskqueue_create("hn_tx", M_WAITOK, 7516 taskqueue_thread_enqueue, &hn_tx_taskque[i]); 7517 taskqueue_start_threads(&hn_tx_taskque[i], 1, PI_NET, 7518 "hn tx%d", i); 7519 } 7520 } 7521 SYSINIT(hn_sysinit, SI_SUB_DRIVERS, SI_ORDER_SECOND, hn_sysinit, NULL); 7522 7523 static void 7524 hn_sysuninit(void *arg __unused) 7525 { 7526 7527 if (hn_tx_taskque != NULL) { 7528 int i; 7529 7530 for (i = 0; i < hn_tx_taskq_cnt; ++i) 7531 taskqueue_free(hn_tx_taskque[i]); 7532 free(hn_tx_taskque, M_DEVBUF); 7533 } 7534 7535 if (hn_vfmap != NULL) 7536 free(hn_vfmap, M_DEVBUF); 7537 rm_destroy(&hn_vfmap_lock); 7538 7539 counter_u64_free(hn_udpcs_fixup); 7540 } 7541 SYSUNINIT(hn_sysuninit, SI_SUB_DRIVERS, SI_ORDER_SECOND, hn_sysuninit, NULL); 7542