1 /*- 2 * Copyright (c) 2010-2012 Citrix Inc. 3 * Copyright (c) 2009-2012,2016-2017 Microsoft Corp. 4 * Copyright (c) 2012 NetApp Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /*- 30 * Copyright (c) 2004-2006 Kip Macy 31 * All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 45 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 52 * SUCH DAMAGE. 53 */ 54 55 #include <sys/cdefs.h> 56 __FBSDID("$FreeBSD$"); 57 58 #include "opt_hn.h" 59 #include "opt_inet6.h" 60 #include "opt_inet.h" 61 #include "opt_rss.h" 62 63 #include <sys/param.h> 64 #include <sys/systm.h> 65 #include <sys/bus.h> 66 #include <sys/counter.h> 67 #include <sys/kernel.h> 68 #include <sys/limits.h> 69 #include <sys/malloc.h> 70 #include <sys/mbuf.h> 71 #include <sys/module.h> 72 #include <sys/queue.h> 73 #include <sys/lock.h> 74 #include <sys/rmlock.h> 75 #include <sys/sbuf.h> 76 #include <sys/smp.h> 77 #include <sys/socket.h> 78 #include <sys/sockio.h> 79 #include <sys/sx.h> 80 #include <sys/sysctl.h> 81 #include <sys/taskqueue.h> 82 #include <sys/buf_ring.h> 83 #include <sys/eventhandler.h> 84 85 #include <machine/atomic.h> 86 #include <machine/in_cksum.h> 87 88 #include <net/bpf.h> 89 #include <net/ethernet.h> 90 #include <net/if.h> 91 #include <net/if_dl.h> 92 #include <net/if_media.h> 93 #include <net/if_types.h> 94 #include <net/if_var.h> 95 #include <net/rndis.h> 96 #ifdef RSS 97 #include <net/rss_config.h> 98 #endif 99 100 #include <netinet/in_systm.h> 101 #include <netinet/in.h> 102 #include <netinet/ip.h> 103 #include <netinet/ip6.h> 104 #include <netinet/tcp.h> 105 #include <netinet/tcp_lro.h> 106 #include <netinet/udp.h> 107 108 #include <dev/hyperv/include/hyperv.h> 109 #include <dev/hyperv/include/hyperv_busdma.h> 110 #include <dev/hyperv/include/vmbus.h> 111 #include <dev/hyperv/include/vmbus_xact.h> 112 113 #include <dev/hyperv/netvsc/ndis.h> 114 #include <dev/hyperv/netvsc/if_hnreg.h> 115 #include <dev/hyperv/netvsc/if_hnvar.h> 116 #include <dev/hyperv/netvsc/hn_nvs.h> 117 #include <dev/hyperv/netvsc/hn_rndis.h> 118 119 #include "vmbus_if.h" 120 121 #define HN_IFSTART_SUPPORT 122 123 #define HN_RING_CNT_DEF_MAX 8 124 125 #define HN_VFMAP_SIZE_DEF 8 126 127 #define HN_XPNT_VF_ATTWAIT_MIN 2 /* seconds */ 128 129 /* YYY should get it from the underlying channel */ 130 #define HN_TX_DESC_CNT 512 131 132 #define HN_RNDIS_PKT_LEN \ 133 (sizeof(struct rndis_packet_msg) + \ 134 HN_RNDIS_PKTINFO_SIZE(HN_NDIS_HASH_VALUE_SIZE) + \ 135 HN_RNDIS_PKTINFO_SIZE(NDIS_VLAN_INFO_SIZE) + \ 136 HN_RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) + \ 137 HN_RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE)) 138 #define HN_RNDIS_PKT_BOUNDARY PAGE_SIZE 139 #define HN_RNDIS_PKT_ALIGN CACHE_LINE_SIZE 140 141 #define HN_TX_DATA_BOUNDARY PAGE_SIZE 142 #define HN_TX_DATA_MAXSIZE IP_MAXPACKET 143 #define HN_TX_DATA_SEGSIZE PAGE_SIZE 144 /* -1 for RNDIS packet message */ 145 #define HN_TX_DATA_SEGCNT_MAX (HN_GPACNT_MAX - 1) 146 147 #define HN_DIRECT_TX_SIZE_DEF 128 148 149 #define HN_EARLY_TXEOF_THRESH 8 150 151 #define HN_PKTBUF_LEN_DEF (16 * 1024) 152 153 #define HN_LROENT_CNT_DEF 128 154 155 #define HN_LRO_LENLIM_MULTIRX_DEF (12 * ETHERMTU) 156 #define HN_LRO_LENLIM_DEF (25 * ETHERMTU) 157 /* YYY 2*MTU is a bit rough, but should be good enough. */ 158 #define HN_LRO_LENLIM_MIN(ifp) (2 * (ifp)->if_mtu) 159 160 #define HN_LRO_ACKCNT_DEF 1 161 162 #define HN_LOCK_INIT(sc) \ 163 sx_init(&(sc)->hn_lock, device_get_nameunit((sc)->hn_dev)) 164 #define HN_LOCK_DESTROY(sc) sx_destroy(&(sc)->hn_lock) 165 #define HN_LOCK_ASSERT(sc) sx_assert(&(sc)->hn_lock, SA_XLOCKED) 166 #define HN_LOCK(sc) \ 167 do { \ 168 while (sx_try_xlock(&(sc)->hn_lock) == 0) \ 169 DELAY(1000); \ 170 } while (0) 171 #define HN_UNLOCK(sc) sx_xunlock(&(sc)->hn_lock) 172 173 #define HN_CSUM_IP_MASK (CSUM_IP | CSUM_IP_TCP | CSUM_IP_UDP) 174 #define HN_CSUM_IP6_MASK (CSUM_IP6_TCP | CSUM_IP6_UDP) 175 #define HN_CSUM_IP_HWASSIST(sc) \ 176 ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP_MASK) 177 #define HN_CSUM_IP6_HWASSIST(sc) \ 178 ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP6_MASK) 179 180 #define HN_PKTSIZE_MIN(align) \ 181 roundup2(ETHER_MIN_LEN + ETHER_VLAN_ENCAP_LEN - ETHER_CRC_LEN + \ 182 HN_RNDIS_PKT_LEN, (align)) 183 #define HN_PKTSIZE(m, align) \ 184 roundup2((m)->m_pkthdr.len + HN_RNDIS_PKT_LEN, (align)) 185 186 #ifdef RSS 187 #define HN_RING_IDX2CPU(sc, idx) rss_getcpu((idx) % rss_getnumbuckets()) 188 #else 189 #define HN_RING_IDX2CPU(sc, idx) (((sc)->hn_cpu + (idx)) % mp_ncpus) 190 #endif 191 192 struct hn_txdesc { 193 #ifndef HN_USE_TXDESC_BUFRING 194 SLIST_ENTRY(hn_txdesc) link; 195 #endif 196 STAILQ_ENTRY(hn_txdesc) agg_link; 197 198 /* Aggregated txdescs, in sending order. */ 199 STAILQ_HEAD(, hn_txdesc) agg_list; 200 201 /* The oldest packet, if transmission aggregation happens. */ 202 struct mbuf *m; 203 struct hn_tx_ring *txr; 204 int refs; 205 uint32_t flags; /* HN_TXD_FLAG_ */ 206 struct hn_nvs_sendctx send_ctx; 207 uint32_t chim_index; 208 int chim_size; 209 210 bus_dmamap_t data_dmap; 211 212 bus_addr_t rndis_pkt_paddr; 213 struct rndis_packet_msg *rndis_pkt; 214 bus_dmamap_t rndis_pkt_dmap; 215 }; 216 217 #define HN_TXD_FLAG_ONLIST 0x0001 218 #define HN_TXD_FLAG_DMAMAP 0x0002 219 #define HN_TXD_FLAG_ONAGG 0x0004 220 221 struct hn_rxinfo { 222 uint32_t vlan_info; 223 uint32_t csum_info; 224 uint32_t hash_info; 225 uint32_t hash_value; 226 }; 227 228 struct hn_rxvf_setarg { 229 struct hn_rx_ring *rxr; 230 struct ifnet *vf_ifp; 231 }; 232 233 #define HN_RXINFO_VLAN 0x0001 234 #define HN_RXINFO_CSUM 0x0002 235 #define HN_RXINFO_HASHINF 0x0004 236 #define HN_RXINFO_HASHVAL 0x0008 237 #define HN_RXINFO_ALL \ 238 (HN_RXINFO_VLAN | \ 239 HN_RXINFO_CSUM | \ 240 HN_RXINFO_HASHINF | \ 241 HN_RXINFO_HASHVAL) 242 243 #define HN_NDIS_VLAN_INFO_INVALID 0xffffffff 244 #define HN_NDIS_RXCSUM_INFO_INVALID 0 245 #define HN_NDIS_HASH_INFO_INVALID 0 246 247 static int hn_probe(device_t); 248 static int hn_attach(device_t); 249 static int hn_detach(device_t); 250 static int hn_shutdown(device_t); 251 static void hn_chan_callback(struct vmbus_channel *, 252 void *); 253 254 static void hn_init(void *); 255 static int hn_ioctl(struct ifnet *, u_long, caddr_t); 256 #ifdef HN_IFSTART_SUPPORT 257 static void hn_start(struct ifnet *); 258 #endif 259 static int hn_transmit(struct ifnet *, struct mbuf *); 260 static void hn_xmit_qflush(struct ifnet *); 261 static int hn_ifmedia_upd(struct ifnet *); 262 static void hn_ifmedia_sts(struct ifnet *, 263 struct ifmediareq *); 264 265 static void hn_ifnet_event(void *, struct ifnet *, int); 266 static void hn_ifaddr_event(void *, struct ifnet *); 267 static void hn_ifnet_attevent(void *, struct ifnet *); 268 static void hn_ifnet_detevent(void *, struct ifnet *); 269 static void hn_ifnet_lnkevent(void *, struct ifnet *, int); 270 271 static bool hn_ismyvf(const struct hn_softc *, 272 const struct ifnet *); 273 static void hn_rxvf_change(struct hn_softc *, 274 struct ifnet *, bool); 275 static void hn_rxvf_set(struct hn_softc *, struct ifnet *); 276 static void hn_rxvf_set_task(void *, int); 277 static void hn_xpnt_vf_input(struct ifnet *, struct mbuf *); 278 static int hn_xpnt_vf_iocsetflags(struct hn_softc *); 279 static int hn_xpnt_vf_iocsetcaps(struct hn_softc *, 280 struct ifreq *); 281 static void hn_xpnt_vf_saveifflags(struct hn_softc *); 282 static bool hn_xpnt_vf_isready(struct hn_softc *); 283 static void hn_xpnt_vf_setready(struct hn_softc *); 284 static void hn_xpnt_vf_init_taskfunc(void *, int); 285 static void hn_xpnt_vf_init(struct hn_softc *); 286 static void hn_xpnt_vf_setenable(struct hn_softc *); 287 static void hn_xpnt_vf_setdisable(struct hn_softc *, bool); 288 static void hn_vf_rss_fixup(struct hn_softc *, bool); 289 static void hn_vf_rss_restore(struct hn_softc *); 290 291 static int hn_rndis_rxinfo(const void *, int, 292 struct hn_rxinfo *); 293 static void hn_rndis_rx_data(struct hn_rx_ring *, 294 const void *, int); 295 static void hn_rndis_rx_status(struct hn_softc *, 296 const void *, int); 297 static void hn_rndis_init_fixat(struct hn_softc *, int); 298 299 static void hn_nvs_handle_notify(struct hn_softc *, 300 const struct vmbus_chanpkt_hdr *); 301 static void hn_nvs_handle_comp(struct hn_softc *, 302 struct vmbus_channel *, 303 const struct vmbus_chanpkt_hdr *); 304 static void hn_nvs_handle_rxbuf(struct hn_rx_ring *, 305 struct vmbus_channel *, 306 const struct vmbus_chanpkt_hdr *); 307 static void hn_nvs_ack_rxbuf(struct hn_rx_ring *, 308 struct vmbus_channel *, uint64_t); 309 310 #if __FreeBSD_version >= 1100099 311 static int hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS); 312 static int hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS); 313 #endif 314 static int hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS); 315 static int hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS); 316 #if __FreeBSD_version < 1100095 317 static int hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS); 318 #else 319 static int hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS); 320 #endif 321 static int hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS); 322 static int hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS); 323 static int hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS); 324 static int hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS); 325 static int hn_caps_sysctl(SYSCTL_HANDLER_ARGS); 326 static int hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS); 327 static int hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS); 328 #ifndef RSS 329 static int hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS); 330 static int hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS); 331 #endif 332 static int hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS); 333 static int hn_rss_hcap_sysctl(SYSCTL_HANDLER_ARGS); 334 static int hn_rss_mbuf_sysctl(SYSCTL_HANDLER_ARGS); 335 static int hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS); 336 static int hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS); 337 static int hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS); 338 static int hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS); 339 static int hn_polling_sysctl(SYSCTL_HANDLER_ARGS); 340 static int hn_vf_sysctl(SYSCTL_HANDLER_ARGS); 341 static int hn_rxvf_sysctl(SYSCTL_HANDLER_ARGS); 342 static int hn_vflist_sysctl(SYSCTL_HANDLER_ARGS); 343 static int hn_vfmap_sysctl(SYSCTL_HANDLER_ARGS); 344 static int hn_xpnt_vf_accbpf_sysctl(SYSCTL_HANDLER_ARGS); 345 static int hn_xpnt_vf_enabled_sysctl(SYSCTL_HANDLER_ARGS); 346 347 static void hn_stop(struct hn_softc *, bool); 348 static void hn_init_locked(struct hn_softc *); 349 static int hn_chan_attach(struct hn_softc *, 350 struct vmbus_channel *); 351 static void hn_chan_detach(struct hn_softc *, 352 struct vmbus_channel *); 353 static int hn_attach_subchans(struct hn_softc *); 354 static void hn_detach_allchans(struct hn_softc *); 355 static void hn_chan_rollup(struct hn_rx_ring *, 356 struct hn_tx_ring *); 357 static void hn_set_ring_inuse(struct hn_softc *, int); 358 static int hn_synth_attach(struct hn_softc *, int); 359 static void hn_synth_detach(struct hn_softc *); 360 static int hn_synth_alloc_subchans(struct hn_softc *, 361 int *); 362 static bool hn_synth_attachable(const struct hn_softc *); 363 static void hn_suspend(struct hn_softc *); 364 static void hn_suspend_data(struct hn_softc *); 365 static void hn_suspend_mgmt(struct hn_softc *); 366 static void hn_resume(struct hn_softc *); 367 static void hn_resume_data(struct hn_softc *); 368 static void hn_resume_mgmt(struct hn_softc *); 369 static void hn_suspend_mgmt_taskfunc(void *, int); 370 static void hn_chan_drain(struct hn_softc *, 371 struct vmbus_channel *); 372 static void hn_disable_rx(struct hn_softc *); 373 static void hn_drain_rxtx(struct hn_softc *, int); 374 static void hn_polling(struct hn_softc *, u_int); 375 static void hn_chan_polling(struct vmbus_channel *, u_int); 376 static void hn_mtu_change_fixup(struct hn_softc *); 377 378 static void hn_update_link_status(struct hn_softc *); 379 static void hn_change_network(struct hn_softc *); 380 static void hn_link_taskfunc(void *, int); 381 static void hn_netchg_init_taskfunc(void *, int); 382 static void hn_netchg_status_taskfunc(void *, int); 383 static void hn_link_status(struct hn_softc *); 384 385 static int hn_create_rx_data(struct hn_softc *, int); 386 static void hn_destroy_rx_data(struct hn_softc *); 387 static int hn_check_iplen(const struct mbuf *, int); 388 static void hn_rxpkt_proto(const struct mbuf *, int *, int *); 389 static int hn_set_rxfilter(struct hn_softc *, uint32_t); 390 static int hn_rxfilter_config(struct hn_softc *); 391 static int hn_rss_reconfig(struct hn_softc *); 392 static void hn_rss_ind_fixup(struct hn_softc *); 393 static void hn_rss_mbuf_hash(struct hn_softc *, uint32_t); 394 static int hn_rxpkt(struct hn_rx_ring *, const void *, 395 int, const struct hn_rxinfo *); 396 static uint32_t hn_rss_type_fromndis(uint32_t); 397 static uint32_t hn_rss_type_tondis(uint32_t); 398 399 static int hn_tx_ring_create(struct hn_softc *, int); 400 static void hn_tx_ring_destroy(struct hn_tx_ring *); 401 static int hn_create_tx_data(struct hn_softc *, int); 402 static void hn_fixup_tx_data(struct hn_softc *); 403 static void hn_fixup_rx_data(struct hn_softc *); 404 static void hn_destroy_tx_data(struct hn_softc *); 405 static void hn_txdesc_dmamap_destroy(struct hn_txdesc *); 406 static void hn_txdesc_gc(struct hn_tx_ring *, 407 struct hn_txdesc *); 408 static int hn_encap(struct ifnet *, struct hn_tx_ring *, 409 struct hn_txdesc *, struct mbuf **); 410 static int hn_txpkt(struct ifnet *, struct hn_tx_ring *, 411 struct hn_txdesc *); 412 static void hn_set_chim_size(struct hn_softc *, int); 413 static void hn_set_tso_maxsize(struct hn_softc *, int, int); 414 static bool hn_tx_ring_pending(struct hn_tx_ring *); 415 static void hn_tx_ring_qflush(struct hn_tx_ring *); 416 static void hn_resume_tx(struct hn_softc *, int); 417 static void hn_set_txagg(struct hn_softc *); 418 static void *hn_try_txagg(struct ifnet *, 419 struct hn_tx_ring *, struct hn_txdesc *, 420 int); 421 static int hn_get_txswq_depth(const struct hn_tx_ring *); 422 static void hn_txpkt_done(struct hn_nvs_sendctx *, 423 struct hn_softc *, struct vmbus_channel *, 424 const void *, int); 425 static int hn_txpkt_sglist(struct hn_tx_ring *, 426 struct hn_txdesc *); 427 static int hn_txpkt_chim(struct hn_tx_ring *, 428 struct hn_txdesc *); 429 static int hn_xmit(struct hn_tx_ring *, int); 430 static void hn_xmit_taskfunc(void *, int); 431 static void hn_xmit_txeof(struct hn_tx_ring *); 432 static void hn_xmit_txeof_taskfunc(void *, int); 433 #ifdef HN_IFSTART_SUPPORT 434 static int hn_start_locked(struct hn_tx_ring *, int); 435 static void hn_start_taskfunc(void *, int); 436 static void hn_start_txeof(struct hn_tx_ring *); 437 static void hn_start_txeof_taskfunc(void *, int); 438 #endif 439 440 SYSCTL_NODE(_hw, OID_AUTO, hn, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 441 "Hyper-V network interface"); 442 443 /* Trust tcp segements verification on host side. */ 444 static int hn_trust_hosttcp = 1; 445 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hosttcp, CTLFLAG_RDTUN, 446 &hn_trust_hosttcp, 0, 447 "Trust tcp segement verification on host side, " 448 "when csum info is missing (global setting)"); 449 450 /* Trust udp datagrams verification on host side. */ 451 static int hn_trust_hostudp = 1; 452 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostudp, CTLFLAG_RDTUN, 453 &hn_trust_hostudp, 0, 454 "Trust udp datagram verification on host side, " 455 "when csum info is missing (global setting)"); 456 457 /* Trust ip packets verification on host side. */ 458 static int hn_trust_hostip = 1; 459 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostip, CTLFLAG_RDTUN, 460 &hn_trust_hostip, 0, 461 "Trust ip packet verification on host side, " 462 "when csum info is missing (global setting)"); 463 464 /* 465 * Offload UDP/IPv4 checksum. 466 */ 467 static int hn_enable_udp4cs = 1; 468 SYSCTL_INT(_hw_hn, OID_AUTO, enable_udp4cs, CTLFLAG_RDTUN, 469 &hn_enable_udp4cs, 0, "Offload UDP/IPv4 checksum"); 470 471 /* 472 * Offload UDP/IPv6 checksum. 473 */ 474 static int hn_enable_udp6cs = 1; 475 SYSCTL_INT(_hw_hn, OID_AUTO, enable_udp6cs, CTLFLAG_RDTUN, 476 &hn_enable_udp6cs, 0, "Offload UDP/IPv6 checksum"); 477 478 /* Stats. */ 479 static counter_u64_t hn_udpcs_fixup; 480 SYSCTL_COUNTER_U64(_hw_hn, OID_AUTO, udpcs_fixup, CTLFLAG_RW, 481 &hn_udpcs_fixup, "# of UDP checksum fixup"); 482 483 /* 484 * See hn_set_hlen(). 485 * 486 * This value is for Azure. For Hyper-V, set this above 487 * 65536 to disable UDP datagram checksum fixup. 488 */ 489 static int hn_udpcs_fixup_mtu = 1420; 490 SYSCTL_INT(_hw_hn, OID_AUTO, udpcs_fixup_mtu, CTLFLAG_RWTUN, 491 &hn_udpcs_fixup_mtu, 0, "UDP checksum fixup MTU threshold"); 492 493 /* Limit TSO burst size */ 494 static int hn_tso_maxlen = IP_MAXPACKET; 495 SYSCTL_INT(_hw_hn, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN, 496 &hn_tso_maxlen, 0, "TSO burst limit"); 497 498 /* Limit chimney send size */ 499 static int hn_tx_chimney_size = 0; 500 SYSCTL_INT(_hw_hn, OID_AUTO, tx_chimney_size, CTLFLAG_RDTUN, 501 &hn_tx_chimney_size, 0, "Chimney send packet size limit"); 502 503 /* Limit the size of packet for direct transmission */ 504 static int hn_direct_tx_size = HN_DIRECT_TX_SIZE_DEF; 505 SYSCTL_INT(_hw_hn, OID_AUTO, direct_tx_size, CTLFLAG_RDTUN, 506 &hn_direct_tx_size, 0, "Size of the packet for direct transmission"); 507 508 /* # of LRO entries per RX ring */ 509 #if defined(INET) || defined(INET6) 510 #if __FreeBSD_version >= 1100095 511 static int hn_lro_entry_count = HN_LROENT_CNT_DEF; 512 SYSCTL_INT(_hw_hn, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN, 513 &hn_lro_entry_count, 0, "LRO entry count"); 514 #endif 515 #endif 516 517 static int hn_tx_taskq_cnt = 1; 518 SYSCTL_INT(_hw_hn, OID_AUTO, tx_taskq_cnt, CTLFLAG_RDTUN, 519 &hn_tx_taskq_cnt, 0, "# of TX taskqueues"); 520 521 #define HN_TX_TASKQ_M_INDEP 0 522 #define HN_TX_TASKQ_M_GLOBAL 1 523 #define HN_TX_TASKQ_M_EVTTQ 2 524 525 static int hn_tx_taskq_mode = HN_TX_TASKQ_M_INDEP; 526 SYSCTL_INT(_hw_hn, OID_AUTO, tx_taskq_mode, CTLFLAG_RDTUN, 527 &hn_tx_taskq_mode, 0, "TX taskqueue modes: " 528 "0 - independent, 1 - share global tx taskqs, 2 - share event taskqs"); 529 530 #ifndef HN_USE_TXDESC_BUFRING 531 static int hn_use_txdesc_bufring = 0; 532 #else 533 static int hn_use_txdesc_bufring = 1; 534 #endif 535 SYSCTL_INT(_hw_hn, OID_AUTO, use_txdesc_bufring, CTLFLAG_RD, 536 &hn_use_txdesc_bufring, 0, "Use buf_ring for TX descriptors"); 537 538 #ifdef HN_IFSTART_SUPPORT 539 /* Use ifnet.if_start instead of ifnet.if_transmit */ 540 static int hn_use_if_start = 0; 541 SYSCTL_INT(_hw_hn, OID_AUTO, use_if_start, CTLFLAG_RDTUN, 542 &hn_use_if_start, 0, "Use if_start TX method"); 543 #endif 544 545 /* # of channels to use */ 546 static int hn_chan_cnt = 0; 547 SYSCTL_INT(_hw_hn, OID_AUTO, chan_cnt, CTLFLAG_RDTUN, 548 &hn_chan_cnt, 0, 549 "# of channels to use; each channel has one RX ring and one TX ring"); 550 551 /* # of transmit rings to use */ 552 static int hn_tx_ring_cnt = 0; 553 SYSCTL_INT(_hw_hn, OID_AUTO, tx_ring_cnt, CTLFLAG_RDTUN, 554 &hn_tx_ring_cnt, 0, "# of TX rings to use"); 555 556 /* Software TX ring deptch */ 557 static int hn_tx_swq_depth = 0; 558 SYSCTL_INT(_hw_hn, OID_AUTO, tx_swq_depth, CTLFLAG_RDTUN, 559 &hn_tx_swq_depth, 0, "Depth of IFQ or BUFRING"); 560 561 /* Enable sorted LRO, and the depth of the per-channel mbuf queue */ 562 #if __FreeBSD_version >= 1100095 563 static u_int hn_lro_mbufq_depth = 0; 564 SYSCTL_UINT(_hw_hn, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN, 565 &hn_lro_mbufq_depth, 0, "Depth of LRO mbuf queue"); 566 #endif 567 568 /* Packet transmission aggregation size limit */ 569 static int hn_tx_agg_size = -1; 570 SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_size, CTLFLAG_RDTUN, 571 &hn_tx_agg_size, 0, "Packet transmission aggregation size limit"); 572 573 /* Packet transmission aggregation count limit */ 574 static int hn_tx_agg_pkts = -1; 575 SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_pkts, CTLFLAG_RDTUN, 576 &hn_tx_agg_pkts, 0, "Packet transmission aggregation packet limit"); 577 578 /* VF list */ 579 SYSCTL_PROC(_hw_hn, OID_AUTO, vflist, CTLFLAG_RD | CTLTYPE_STRING, 580 0, 0, hn_vflist_sysctl, "A", "VF list"); 581 582 /* VF mapping */ 583 SYSCTL_PROC(_hw_hn, OID_AUTO, vfmap, CTLFLAG_RD | CTLTYPE_STRING, 584 0, 0, hn_vfmap_sysctl, "A", "VF mapping"); 585 586 /* Transparent VF */ 587 static int hn_xpnt_vf = 1; 588 SYSCTL_INT(_hw_hn, OID_AUTO, vf_transparent, CTLFLAG_RDTUN, 589 &hn_xpnt_vf, 0, "Transparent VF mod"); 590 591 /* Accurate BPF support for Transparent VF */ 592 static int hn_xpnt_vf_accbpf = 0; 593 SYSCTL_INT(_hw_hn, OID_AUTO, vf_xpnt_accbpf, CTLFLAG_RDTUN, 594 &hn_xpnt_vf_accbpf, 0, "Accurate BPF for transparent VF"); 595 596 /* Extra wait for transparent VF attach routing; unit seconds. */ 597 static int hn_xpnt_vf_attwait = HN_XPNT_VF_ATTWAIT_MIN; 598 SYSCTL_INT(_hw_hn, OID_AUTO, vf_xpnt_attwait, CTLFLAG_RWTUN, 599 &hn_xpnt_vf_attwait, 0, 600 "Extra wait for transparent VF attach routing; unit: seconds"); 601 602 static u_int hn_cpu_index; /* next CPU for channel */ 603 static struct taskqueue **hn_tx_taskque;/* shared TX taskqueues */ 604 605 static struct rmlock hn_vfmap_lock; 606 static int hn_vfmap_size; 607 static struct ifnet **hn_vfmap; 608 609 #ifndef RSS 610 static const uint8_t 611 hn_rss_key_default[NDIS_HASH_KEYSIZE_TOEPLITZ] = { 612 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 613 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 614 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, 615 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, 616 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa 617 }; 618 #endif /* !RSS */ 619 620 static const struct hyperv_guid hn_guid = { 621 .hv_guid = { 622 0x63, 0x51, 0x61, 0xf8, 0x3e, 0xdf, 0xc5, 0x46, 623 0x91, 0x3f, 0xf2, 0xd2, 0xf9, 0x65, 0xed, 0x0e } 624 }; 625 626 static device_method_t hn_methods[] = { 627 /* Device interface */ 628 DEVMETHOD(device_probe, hn_probe), 629 DEVMETHOD(device_attach, hn_attach), 630 DEVMETHOD(device_detach, hn_detach), 631 DEVMETHOD(device_shutdown, hn_shutdown), 632 DEVMETHOD_END 633 }; 634 635 static driver_t hn_driver = { 636 "hn", 637 hn_methods, 638 sizeof(struct hn_softc) 639 }; 640 641 static devclass_t hn_devclass; 642 643 DRIVER_MODULE(hn, vmbus, hn_driver, hn_devclass, 0, 0); 644 MODULE_VERSION(hn, 1); 645 MODULE_DEPEND(hn, vmbus, 1, 1, 1); 646 647 #if __FreeBSD_version >= 1100099 648 static void 649 hn_set_lro_lenlim(struct hn_softc *sc, int lenlim) 650 { 651 int i; 652 653 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 654 sc->hn_rx_ring[i].hn_lro.lro_length_lim = lenlim; 655 } 656 #endif 657 658 static int 659 hn_txpkt_sglist(struct hn_tx_ring *txr, struct hn_txdesc *txd) 660 { 661 662 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID && 663 txd->chim_size == 0, ("invalid rndis sglist txd")); 664 return (hn_nvs_send_rndis_sglist(txr->hn_chan, HN_NVS_RNDIS_MTYPE_DATA, 665 &txd->send_ctx, txr->hn_gpa, txr->hn_gpa_cnt)); 666 } 667 668 static int 669 hn_txpkt_chim(struct hn_tx_ring *txr, struct hn_txdesc *txd) 670 { 671 struct hn_nvs_rndis rndis; 672 673 KASSERT(txd->chim_index != HN_NVS_CHIM_IDX_INVALID && 674 txd->chim_size > 0, ("invalid rndis chim txd")); 675 676 rndis.nvs_type = HN_NVS_TYPE_RNDIS; 677 rndis.nvs_rndis_mtype = HN_NVS_RNDIS_MTYPE_DATA; 678 rndis.nvs_chim_idx = txd->chim_index; 679 rndis.nvs_chim_sz = txd->chim_size; 680 681 return (hn_nvs_send(txr->hn_chan, VMBUS_CHANPKT_FLAG_RC, 682 &rndis, sizeof(rndis), &txd->send_ctx)); 683 } 684 685 static __inline uint32_t 686 hn_chim_alloc(struct hn_softc *sc) 687 { 688 int i, bmap_cnt = sc->hn_chim_bmap_cnt; 689 u_long *bmap = sc->hn_chim_bmap; 690 uint32_t ret = HN_NVS_CHIM_IDX_INVALID; 691 692 for (i = 0; i < bmap_cnt; ++i) { 693 int idx; 694 695 idx = ffsl(~bmap[i]); 696 if (idx == 0) 697 continue; 698 699 --idx; /* ffsl is 1-based */ 700 KASSERT(i * LONG_BIT + idx < sc->hn_chim_cnt, 701 ("invalid i %d and idx %d", i, idx)); 702 703 if (atomic_testandset_long(&bmap[i], idx)) 704 continue; 705 706 ret = i * LONG_BIT + idx; 707 break; 708 } 709 return (ret); 710 } 711 712 static __inline void 713 hn_chim_free(struct hn_softc *sc, uint32_t chim_idx) 714 { 715 u_long mask; 716 uint32_t idx; 717 718 idx = chim_idx / LONG_BIT; 719 KASSERT(idx < sc->hn_chim_bmap_cnt, 720 ("invalid chimney index 0x%x", chim_idx)); 721 722 mask = 1UL << (chim_idx % LONG_BIT); 723 KASSERT(sc->hn_chim_bmap[idx] & mask, 724 ("index bitmap 0x%lx, chimney index %u, " 725 "bitmap idx %d, bitmask 0x%lx", 726 sc->hn_chim_bmap[idx], chim_idx, idx, mask)); 727 728 atomic_clear_long(&sc->hn_chim_bmap[idx], mask); 729 } 730 731 #if defined(INET6) || defined(INET) 732 733 #define PULLUP_HDR(m, len) \ 734 do { \ 735 if (__predict_false((m)->m_len < (len))) { \ 736 (m) = m_pullup((m), (len)); \ 737 if ((m) == NULL) \ 738 return (NULL); \ 739 } \ 740 } while (0) 741 742 /* 743 * NOTE: If this function failed, the m_head would be freed. 744 */ 745 static __inline struct mbuf * 746 hn_tso_fixup(struct mbuf *m_head) 747 { 748 struct ether_vlan_header *evl; 749 struct tcphdr *th; 750 int ehlen; 751 752 KASSERT(M_WRITABLE(m_head), ("TSO mbuf not writable")); 753 754 PULLUP_HDR(m_head, sizeof(*evl)); 755 evl = mtod(m_head, struct ether_vlan_header *); 756 if (evl->evl_encap_proto == ntohs(ETHERTYPE_VLAN)) 757 ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 758 else 759 ehlen = ETHER_HDR_LEN; 760 m_head->m_pkthdr.l2hlen = ehlen; 761 762 #ifdef INET 763 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) { 764 struct ip *ip; 765 int iphlen; 766 767 PULLUP_HDR(m_head, ehlen + sizeof(*ip)); 768 ip = mtodo(m_head, ehlen); 769 iphlen = ip->ip_hl << 2; 770 m_head->m_pkthdr.l3hlen = iphlen; 771 772 PULLUP_HDR(m_head, ehlen + iphlen + sizeof(*th)); 773 th = mtodo(m_head, ehlen + iphlen); 774 775 ip->ip_len = 0; 776 ip->ip_sum = 0; 777 th->th_sum = in_pseudo(ip->ip_src.s_addr, 778 ip->ip_dst.s_addr, htons(IPPROTO_TCP)); 779 } 780 #endif 781 #if defined(INET6) && defined(INET) 782 else 783 #endif 784 #ifdef INET6 785 { 786 struct ip6_hdr *ip6; 787 788 PULLUP_HDR(m_head, ehlen + sizeof(*ip6)); 789 ip6 = mtodo(m_head, ehlen); 790 if (ip6->ip6_nxt != IPPROTO_TCP) { 791 m_freem(m_head); 792 return (NULL); 793 } 794 m_head->m_pkthdr.l3hlen = sizeof(*ip6); 795 796 PULLUP_HDR(m_head, ehlen + sizeof(*ip6) + sizeof(*th)); 797 th = mtodo(m_head, ehlen + sizeof(*ip6)); 798 799 ip6->ip6_plen = 0; 800 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); 801 } 802 #endif 803 return (m_head); 804 } 805 806 /* 807 * NOTE: If this function failed, the m_head would be freed. 808 */ 809 static __inline struct mbuf * 810 hn_set_hlen(struct mbuf *m_head) 811 { 812 const struct ether_vlan_header *evl; 813 int ehlen; 814 815 PULLUP_HDR(m_head, sizeof(*evl)); 816 evl = mtod(m_head, const struct ether_vlan_header *); 817 if (evl->evl_encap_proto == ntohs(ETHERTYPE_VLAN)) 818 ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 819 else 820 ehlen = ETHER_HDR_LEN; 821 m_head->m_pkthdr.l2hlen = ehlen; 822 823 #ifdef INET 824 if (m_head->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP_UDP)) { 825 const struct ip *ip; 826 int iphlen; 827 828 PULLUP_HDR(m_head, ehlen + sizeof(*ip)); 829 ip = mtodo(m_head, ehlen); 830 iphlen = ip->ip_hl << 2; 831 m_head->m_pkthdr.l3hlen = iphlen; 832 833 /* 834 * UDP checksum offload does not work in Azure, if the 835 * following conditions meet: 836 * - sizeof(IP hdr + UDP hdr + payload) > 1420. 837 * - IP_DF is not set in the IP hdr. 838 * 839 * Fallback to software checksum for these UDP datagrams. 840 */ 841 if ((m_head->m_pkthdr.csum_flags & CSUM_IP_UDP) && 842 m_head->m_pkthdr.len > hn_udpcs_fixup_mtu + ehlen && 843 (ntohs(ip->ip_off) & IP_DF) == 0) { 844 uint16_t off = ehlen + iphlen; 845 846 counter_u64_add(hn_udpcs_fixup, 1); 847 PULLUP_HDR(m_head, off + sizeof(struct udphdr)); 848 *(uint16_t *)(m_head->m_data + off + 849 m_head->m_pkthdr.csum_data) = in_cksum_skip( 850 m_head, m_head->m_pkthdr.len, off); 851 m_head->m_pkthdr.csum_flags &= ~CSUM_IP_UDP; 852 } 853 } 854 #endif 855 #if defined(INET6) && defined(INET) 856 else 857 #endif 858 #ifdef INET6 859 { 860 const struct ip6_hdr *ip6; 861 862 PULLUP_HDR(m_head, ehlen + sizeof(*ip6)); 863 ip6 = mtodo(m_head, ehlen); 864 if (ip6->ip6_nxt != IPPROTO_TCP) { 865 m_freem(m_head); 866 return (NULL); 867 } 868 m_head->m_pkthdr.l3hlen = sizeof(*ip6); 869 } 870 #endif 871 return (m_head); 872 } 873 874 /* 875 * NOTE: If this function failed, the m_head would be freed. 876 */ 877 static __inline struct mbuf * 878 hn_check_tcpsyn(struct mbuf *m_head, int *tcpsyn) 879 { 880 const struct tcphdr *th; 881 int ehlen, iphlen; 882 883 *tcpsyn = 0; 884 ehlen = m_head->m_pkthdr.l2hlen; 885 iphlen = m_head->m_pkthdr.l3hlen; 886 887 PULLUP_HDR(m_head, ehlen + iphlen + sizeof(*th)); 888 th = mtodo(m_head, ehlen + iphlen); 889 if (th->th_flags & TH_SYN) 890 *tcpsyn = 1; 891 return (m_head); 892 } 893 894 #undef PULLUP_HDR 895 896 #endif /* INET6 || INET */ 897 898 static int 899 hn_set_rxfilter(struct hn_softc *sc, uint32_t filter) 900 { 901 int error = 0; 902 903 HN_LOCK_ASSERT(sc); 904 905 if (sc->hn_rx_filter != filter) { 906 error = hn_rndis_set_rxfilter(sc, filter); 907 if (!error) 908 sc->hn_rx_filter = filter; 909 } 910 return (error); 911 } 912 913 static int 914 hn_rxfilter_config(struct hn_softc *sc) 915 { 916 struct ifnet *ifp = sc->hn_ifp; 917 uint32_t filter; 918 919 HN_LOCK_ASSERT(sc); 920 921 /* 922 * If the non-transparent mode VF is activated, we don't know how 923 * its RX filter is configured, so stick the synthetic device in 924 * the promiscous mode. 925 */ 926 if ((ifp->if_flags & IFF_PROMISC) || (sc->hn_flags & HN_FLAG_RXVF)) { 927 filter = NDIS_PACKET_TYPE_PROMISCUOUS; 928 } else { 929 filter = NDIS_PACKET_TYPE_DIRECTED; 930 if (ifp->if_flags & IFF_BROADCAST) 931 filter |= NDIS_PACKET_TYPE_BROADCAST; 932 /* TODO: support multicast list */ 933 if ((ifp->if_flags & IFF_ALLMULTI) || 934 !TAILQ_EMPTY(&ifp->if_multiaddrs)) 935 filter |= NDIS_PACKET_TYPE_ALL_MULTICAST; 936 } 937 return (hn_set_rxfilter(sc, filter)); 938 } 939 940 static void 941 hn_set_txagg(struct hn_softc *sc) 942 { 943 uint32_t size, pkts; 944 int i; 945 946 /* 947 * Setup aggregation size. 948 */ 949 if (sc->hn_agg_size < 0) 950 size = UINT32_MAX; 951 else 952 size = sc->hn_agg_size; 953 954 if (sc->hn_rndis_agg_size < size) 955 size = sc->hn_rndis_agg_size; 956 957 /* NOTE: We only aggregate packets using chimney sending buffers. */ 958 if (size > (uint32_t)sc->hn_chim_szmax) 959 size = sc->hn_chim_szmax; 960 961 if (size <= 2 * HN_PKTSIZE_MIN(sc->hn_rndis_agg_align)) { 962 /* Disable */ 963 size = 0; 964 pkts = 0; 965 goto done; 966 } 967 968 /* NOTE: Type of the per TX ring setting is 'int'. */ 969 if (size > INT_MAX) 970 size = INT_MAX; 971 972 /* 973 * Setup aggregation packet count. 974 */ 975 if (sc->hn_agg_pkts < 0) 976 pkts = UINT32_MAX; 977 else 978 pkts = sc->hn_agg_pkts; 979 980 if (sc->hn_rndis_agg_pkts < pkts) 981 pkts = sc->hn_rndis_agg_pkts; 982 983 if (pkts <= 1) { 984 /* Disable */ 985 size = 0; 986 pkts = 0; 987 goto done; 988 } 989 990 /* NOTE: Type of the per TX ring setting is 'short'. */ 991 if (pkts > SHRT_MAX) 992 pkts = SHRT_MAX; 993 994 done: 995 /* NOTE: Type of the per TX ring setting is 'short'. */ 996 if (sc->hn_rndis_agg_align > SHRT_MAX) { 997 /* Disable */ 998 size = 0; 999 pkts = 0; 1000 } 1001 1002 if (bootverbose) { 1003 if_printf(sc->hn_ifp, "TX agg size %u, pkts %u, align %u\n", 1004 size, pkts, sc->hn_rndis_agg_align); 1005 } 1006 1007 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 1008 struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; 1009 1010 mtx_lock(&txr->hn_tx_lock); 1011 txr->hn_agg_szmax = size; 1012 txr->hn_agg_pktmax = pkts; 1013 txr->hn_agg_align = sc->hn_rndis_agg_align; 1014 mtx_unlock(&txr->hn_tx_lock); 1015 } 1016 } 1017 1018 static int 1019 hn_get_txswq_depth(const struct hn_tx_ring *txr) 1020 { 1021 1022 KASSERT(txr->hn_txdesc_cnt > 0, ("tx ring is not setup yet")); 1023 if (hn_tx_swq_depth < txr->hn_txdesc_cnt) 1024 return txr->hn_txdesc_cnt; 1025 return hn_tx_swq_depth; 1026 } 1027 1028 static int 1029 hn_rss_reconfig(struct hn_softc *sc) 1030 { 1031 int error; 1032 1033 HN_LOCK_ASSERT(sc); 1034 1035 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) 1036 return (ENXIO); 1037 1038 /* 1039 * Disable RSS first. 1040 * 1041 * NOTE: 1042 * Direct reconfiguration by setting the UNCHG flags does 1043 * _not_ work properly. 1044 */ 1045 if (bootverbose) 1046 if_printf(sc->hn_ifp, "disable RSS\n"); 1047 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_DISABLE); 1048 if (error) { 1049 if_printf(sc->hn_ifp, "RSS disable failed\n"); 1050 return (error); 1051 } 1052 1053 /* 1054 * Reenable the RSS w/ the updated RSS key or indirect 1055 * table. 1056 */ 1057 if (bootverbose) 1058 if_printf(sc->hn_ifp, "reconfig RSS\n"); 1059 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE); 1060 if (error) { 1061 if_printf(sc->hn_ifp, "RSS reconfig failed\n"); 1062 return (error); 1063 } 1064 return (0); 1065 } 1066 1067 static void 1068 hn_rss_ind_fixup(struct hn_softc *sc) 1069 { 1070 struct ndis_rssprm_toeplitz *rss = &sc->hn_rss; 1071 int i, nchan; 1072 1073 nchan = sc->hn_rx_ring_inuse; 1074 KASSERT(nchan > 1, ("invalid # of channels %d", nchan)); 1075 1076 /* 1077 * Check indirect table to make sure that all channels in it 1078 * can be used. 1079 */ 1080 for (i = 0; i < NDIS_HASH_INDCNT; ++i) { 1081 if (rss->rss_ind[i] >= nchan) { 1082 if_printf(sc->hn_ifp, 1083 "RSS indirect table %d fixup: %u -> %d\n", 1084 i, rss->rss_ind[i], nchan - 1); 1085 rss->rss_ind[i] = nchan - 1; 1086 } 1087 } 1088 } 1089 1090 static int 1091 hn_ifmedia_upd(struct ifnet *ifp __unused) 1092 { 1093 1094 return EOPNOTSUPP; 1095 } 1096 1097 static void 1098 hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) 1099 { 1100 struct hn_softc *sc = ifp->if_softc; 1101 1102 ifmr->ifm_status = IFM_AVALID; 1103 ifmr->ifm_active = IFM_ETHER; 1104 1105 if ((sc->hn_link_flags & HN_LINK_FLAG_LINKUP) == 0) { 1106 ifmr->ifm_active |= IFM_NONE; 1107 return; 1108 } 1109 ifmr->ifm_status |= IFM_ACTIVE; 1110 ifmr->ifm_active |= IFM_10G_T | IFM_FDX; 1111 } 1112 1113 static void 1114 hn_rxvf_set_task(void *xarg, int pending __unused) 1115 { 1116 struct hn_rxvf_setarg *arg = xarg; 1117 1118 arg->rxr->hn_rxvf_ifp = arg->vf_ifp; 1119 } 1120 1121 static void 1122 hn_rxvf_set(struct hn_softc *sc, struct ifnet *vf_ifp) 1123 { 1124 struct hn_rx_ring *rxr; 1125 struct hn_rxvf_setarg arg; 1126 struct task task; 1127 int i; 1128 1129 HN_LOCK_ASSERT(sc); 1130 1131 TASK_INIT(&task, 0, hn_rxvf_set_task, &arg); 1132 1133 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 1134 rxr = &sc->hn_rx_ring[i]; 1135 1136 if (i < sc->hn_rx_ring_inuse) { 1137 arg.rxr = rxr; 1138 arg.vf_ifp = vf_ifp; 1139 vmbus_chan_run_task(rxr->hn_chan, &task); 1140 } else { 1141 rxr->hn_rxvf_ifp = vf_ifp; 1142 } 1143 } 1144 } 1145 1146 static bool 1147 hn_ismyvf(const struct hn_softc *sc, const struct ifnet *ifp) 1148 { 1149 const struct ifnet *hn_ifp; 1150 1151 hn_ifp = sc->hn_ifp; 1152 1153 if (ifp == hn_ifp) 1154 return (false); 1155 1156 if (ifp->if_alloctype != IFT_ETHER) 1157 return (false); 1158 1159 /* Ignore lagg/vlan interfaces */ 1160 if (strcmp(ifp->if_dname, "lagg") == 0 || 1161 strcmp(ifp->if_dname, "vlan") == 0) 1162 return (false); 1163 1164 if (bcmp(IF_LLADDR(ifp), IF_LLADDR(hn_ifp), ETHER_ADDR_LEN) != 0) 1165 return (false); 1166 1167 return (true); 1168 } 1169 1170 static void 1171 hn_rxvf_change(struct hn_softc *sc, struct ifnet *ifp, bool rxvf) 1172 { 1173 struct ifnet *hn_ifp; 1174 1175 HN_LOCK(sc); 1176 1177 if (!(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED)) 1178 goto out; 1179 1180 if (!hn_ismyvf(sc, ifp)) 1181 goto out; 1182 hn_ifp = sc->hn_ifp; 1183 1184 if (rxvf) { 1185 if (sc->hn_flags & HN_FLAG_RXVF) 1186 goto out; 1187 1188 sc->hn_flags |= HN_FLAG_RXVF; 1189 hn_rxfilter_config(sc); 1190 } else { 1191 if (!(sc->hn_flags & HN_FLAG_RXVF)) 1192 goto out; 1193 1194 sc->hn_flags &= ~HN_FLAG_RXVF; 1195 if (hn_ifp->if_drv_flags & IFF_DRV_RUNNING) 1196 hn_rxfilter_config(sc); 1197 else 1198 hn_set_rxfilter(sc, NDIS_PACKET_TYPE_NONE); 1199 } 1200 1201 hn_nvs_set_datapath(sc, 1202 rxvf ? HN_NVS_DATAPATH_VF : HN_NVS_DATAPATH_SYNTH); 1203 1204 hn_rxvf_set(sc, rxvf ? ifp : NULL); 1205 1206 if (rxvf) { 1207 hn_vf_rss_fixup(sc, true); 1208 hn_suspend_mgmt(sc); 1209 sc->hn_link_flags &= 1210 ~(HN_LINK_FLAG_LINKUP | HN_LINK_FLAG_NETCHG); 1211 if_link_state_change(hn_ifp, LINK_STATE_DOWN); 1212 } else { 1213 hn_vf_rss_restore(sc); 1214 hn_resume_mgmt(sc); 1215 } 1216 1217 devctl_notify("HYPERV_NIC_VF", hn_ifp->if_xname, 1218 rxvf ? "VF_UP" : "VF_DOWN", NULL); 1219 1220 if (bootverbose) { 1221 if_printf(hn_ifp, "datapath is switched %s %s\n", 1222 rxvf ? "to" : "from", ifp->if_xname); 1223 } 1224 out: 1225 HN_UNLOCK(sc); 1226 } 1227 1228 static void 1229 hn_ifnet_event(void *arg, struct ifnet *ifp, int event) 1230 { 1231 1232 if (event != IFNET_EVENT_UP && event != IFNET_EVENT_DOWN) 1233 return; 1234 hn_rxvf_change(arg, ifp, event == IFNET_EVENT_UP); 1235 } 1236 1237 static void 1238 hn_ifaddr_event(void *arg, struct ifnet *ifp) 1239 { 1240 1241 hn_rxvf_change(arg, ifp, ifp->if_flags & IFF_UP); 1242 } 1243 1244 static int 1245 hn_xpnt_vf_iocsetcaps(struct hn_softc *sc, struct ifreq *ifr) 1246 { 1247 struct ifnet *ifp, *vf_ifp; 1248 uint64_t tmp; 1249 int error; 1250 1251 HN_LOCK_ASSERT(sc); 1252 ifp = sc->hn_ifp; 1253 vf_ifp = sc->hn_vf_ifp; 1254 1255 /* 1256 * Fix up requested capabilities w/ supported capabilities, 1257 * since the supported capabilities could have been changed. 1258 */ 1259 ifr->ifr_reqcap &= ifp->if_capabilities; 1260 /* Pass SIOCSIFCAP to VF. */ 1261 error = vf_ifp->if_ioctl(vf_ifp, SIOCSIFCAP, (caddr_t)ifr); 1262 1263 /* 1264 * NOTE: 1265 * The error will be propagated to the callers, however, it 1266 * is _not_ useful here. 1267 */ 1268 1269 /* 1270 * Merge VF's enabled capabilities. 1271 */ 1272 ifp->if_capenable = vf_ifp->if_capenable & ifp->if_capabilities; 1273 1274 tmp = vf_ifp->if_hwassist & HN_CSUM_IP_HWASSIST(sc); 1275 if (ifp->if_capenable & IFCAP_TXCSUM) 1276 ifp->if_hwassist |= tmp; 1277 else 1278 ifp->if_hwassist &= ~tmp; 1279 1280 tmp = vf_ifp->if_hwassist & HN_CSUM_IP6_HWASSIST(sc); 1281 if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) 1282 ifp->if_hwassist |= tmp; 1283 else 1284 ifp->if_hwassist &= ~tmp; 1285 1286 tmp = vf_ifp->if_hwassist & CSUM_IP_TSO; 1287 if (ifp->if_capenable & IFCAP_TSO4) 1288 ifp->if_hwassist |= tmp; 1289 else 1290 ifp->if_hwassist &= ~tmp; 1291 1292 tmp = vf_ifp->if_hwassist & CSUM_IP6_TSO; 1293 if (ifp->if_capenable & IFCAP_TSO6) 1294 ifp->if_hwassist |= tmp; 1295 else 1296 ifp->if_hwassist &= ~tmp; 1297 1298 return (error); 1299 } 1300 1301 static int 1302 hn_xpnt_vf_iocsetflags(struct hn_softc *sc) 1303 { 1304 struct ifnet *vf_ifp; 1305 struct ifreq ifr; 1306 1307 HN_LOCK_ASSERT(sc); 1308 vf_ifp = sc->hn_vf_ifp; 1309 1310 memset(&ifr, 0, sizeof(ifr)); 1311 strlcpy(ifr.ifr_name, vf_ifp->if_xname, sizeof(ifr.ifr_name)); 1312 ifr.ifr_flags = vf_ifp->if_flags & 0xffff; 1313 ifr.ifr_flagshigh = vf_ifp->if_flags >> 16; 1314 return (vf_ifp->if_ioctl(vf_ifp, SIOCSIFFLAGS, (caddr_t)&ifr)); 1315 } 1316 1317 static void 1318 hn_xpnt_vf_saveifflags(struct hn_softc *sc) 1319 { 1320 struct ifnet *ifp = sc->hn_ifp; 1321 int allmulti = 0; 1322 1323 HN_LOCK_ASSERT(sc); 1324 1325 /* XXX vlan(4) style mcast addr maintenance */ 1326 if (!TAILQ_EMPTY(&ifp->if_multiaddrs)) 1327 allmulti = IFF_ALLMULTI; 1328 1329 /* Always set the VF's if_flags */ 1330 sc->hn_vf_ifp->if_flags = ifp->if_flags | allmulti; 1331 } 1332 1333 static void 1334 hn_xpnt_vf_input(struct ifnet *vf_ifp, struct mbuf *m) 1335 { 1336 struct rm_priotracker pt; 1337 struct ifnet *hn_ifp = NULL; 1338 struct mbuf *mn; 1339 1340 /* 1341 * XXX racy, if hn(4) ever detached. 1342 */ 1343 rm_rlock(&hn_vfmap_lock, &pt); 1344 if (vf_ifp->if_index < hn_vfmap_size) 1345 hn_ifp = hn_vfmap[vf_ifp->if_index]; 1346 rm_runlock(&hn_vfmap_lock, &pt); 1347 1348 if (hn_ifp != NULL) { 1349 for (mn = m; mn != NULL; mn = mn->m_nextpkt) { 1350 /* 1351 * Allow tapping on the VF. 1352 */ 1353 ETHER_BPF_MTAP(vf_ifp, mn); 1354 1355 /* 1356 * Update VF stats. 1357 */ 1358 if ((vf_ifp->if_capenable & IFCAP_HWSTATS) == 0) { 1359 if_inc_counter(vf_ifp, IFCOUNTER_IBYTES, 1360 mn->m_pkthdr.len); 1361 } 1362 /* 1363 * XXX IFCOUNTER_IMCAST 1364 * This stat updating is kinda invasive, since it 1365 * requires two checks on the mbuf: the length check 1366 * and the ethernet header check. As of this write, 1367 * all multicast packets go directly to hn(4), which 1368 * makes imcast stat updating in the VF a try in vian. 1369 */ 1370 1371 /* 1372 * Fix up rcvif and increase hn(4)'s ipackets. 1373 */ 1374 mn->m_pkthdr.rcvif = hn_ifp; 1375 if_inc_counter(hn_ifp, IFCOUNTER_IPACKETS, 1); 1376 } 1377 /* 1378 * Go through hn(4)'s if_input. 1379 */ 1380 hn_ifp->if_input(hn_ifp, m); 1381 } else { 1382 /* 1383 * In the middle of the transition; free this 1384 * mbuf chain. 1385 */ 1386 while (m != NULL) { 1387 mn = m->m_nextpkt; 1388 m->m_nextpkt = NULL; 1389 m_freem(m); 1390 m = mn; 1391 } 1392 } 1393 } 1394 1395 static void 1396 hn_mtu_change_fixup(struct hn_softc *sc) 1397 { 1398 struct ifnet *ifp; 1399 1400 HN_LOCK_ASSERT(sc); 1401 ifp = sc->hn_ifp; 1402 1403 hn_set_tso_maxsize(sc, hn_tso_maxlen, ifp->if_mtu); 1404 #if __FreeBSD_version >= 1100099 1405 if (sc->hn_rx_ring[0].hn_lro.lro_length_lim < HN_LRO_LENLIM_MIN(ifp)) 1406 hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MIN(ifp)); 1407 #endif 1408 } 1409 1410 static uint32_t 1411 hn_rss_type_fromndis(uint32_t rss_hash) 1412 { 1413 uint32_t types = 0; 1414 1415 if (rss_hash & NDIS_HASH_IPV4) 1416 types |= RSS_TYPE_IPV4; 1417 if (rss_hash & NDIS_HASH_TCP_IPV4) 1418 types |= RSS_TYPE_TCP_IPV4; 1419 if (rss_hash & NDIS_HASH_IPV6) 1420 types |= RSS_TYPE_IPV6; 1421 if (rss_hash & NDIS_HASH_IPV6_EX) 1422 types |= RSS_TYPE_IPV6_EX; 1423 if (rss_hash & NDIS_HASH_TCP_IPV6) 1424 types |= RSS_TYPE_TCP_IPV6; 1425 if (rss_hash & NDIS_HASH_TCP_IPV6_EX) 1426 types |= RSS_TYPE_TCP_IPV6_EX; 1427 if (rss_hash & NDIS_HASH_UDP_IPV4_X) 1428 types |= RSS_TYPE_UDP_IPV4; 1429 return (types); 1430 } 1431 1432 static uint32_t 1433 hn_rss_type_tondis(uint32_t types) 1434 { 1435 uint32_t rss_hash = 0; 1436 1437 KASSERT((types & (RSS_TYPE_UDP_IPV6 | RSS_TYPE_UDP_IPV6_EX)) == 0, 1438 ("UDP6 and UDP6EX are not supported")); 1439 1440 if (types & RSS_TYPE_IPV4) 1441 rss_hash |= NDIS_HASH_IPV4; 1442 if (types & RSS_TYPE_TCP_IPV4) 1443 rss_hash |= NDIS_HASH_TCP_IPV4; 1444 if (types & RSS_TYPE_IPV6) 1445 rss_hash |= NDIS_HASH_IPV6; 1446 if (types & RSS_TYPE_IPV6_EX) 1447 rss_hash |= NDIS_HASH_IPV6_EX; 1448 if (types & RSS_TYPE_TCP_IPV6) 1449 rss_hash |= NDIS_HASH_TCP_IPV6; 1450 if (types & RSS_TYPE_TCP_IPV6_EX) 1451 rss_hash |= NDIS_HASH_TCP_IPV6_EX; 1452 if (types & RSS_TYPE_UDP_IPV4) 1453 rss_hash |= NDIS_HASH_UDP_IPV4_X; 1454 return (rss_hash); 1455 } 1456 1457 static void 1458 hn_rss_mbuf_hash(struct hn_softc *sc, uint32_t mbuf_hash) 1459 { 1460 int i; 1461 1462 HN_LOCK_ASSERT(sc); 1463 1464 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 1465 sc->hn_rx_ring[i].hn_mbuf_hash = mbuf_hash; 1466 } 1467 1468 static void 1469 hn_vf_rss_fixup(struct hn_softc *sc, bool reconf) 1470 { 1471 struct ifnet *ifp, *vf_ifp; 1472 struct ifrsshash ifrh; 1473 struct ifrsskey ifrk; 1474 int error; 1475 uint32_t my_types, diff_types, mbuf_types = 0; 1476 1477 HN_LOCK_ASSERT(sc); 1478 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, 1479 ("%s: synthetic parts are not attached", sc->hn_ifp->if_xname)); 1480 1481 if (sc->hn_rx_ring_inuse == 1) { 1482 /* No RSS on synthetic parts; done. */ 1483 return; 1484 } 1485 if ((sc->hn_rss_hcap & NDIS_HASH_FUNCTION_TOEPLITZ) == 0) { 1486 /* Synthetic parts do not support Toeplitz; done. */ 1487 return; 1488 } 1489 1490 ifp = sc->hn_ifp; 1491 vf_ifp = sc->hn_vf_ifp; 1492 1493 /* 1494 * Extract VF's RSS key. Only 40 bytes key for Toeplitz is 1495 * supported. 1496 */ 1497 memset(&ifrk, 0, sizeof(ifrk)); 1498 strlcpy(ifrk.ifrk_name, vf_ifp->if_xname, sizeof(ifrk.ifrk_name)); 1499 error = vf_ifp->if_ioctl(vf_ifp, SIOCGIFRSSKEY, (caddr_t)&ifrk); 1500 if (error) { 1501 if_printf(ifp, "%s SIOCGRSSKEY failed: %d\n", 1502 vf_ifp->if_xname, error); 1503 goto done; 1504 } 1505 if (ifrk.ifrk_func != RSS_FUNC_TOEPLITZ) { 1506 if_printf(ifp, "%s RSS function %u is not Toeplitz\n", 1507 vf_ifp->if_xname, ifrk.ifrk_func); 1508 goto done; 1509 } 1510 if (ifrk.ifrk_keylen != NDIS_HASH_KEYSIZE_TOEPLITZ) { 1511 if_printf(ifp, "%s invalid RSS Toeplitz key length %d\n", 1512 vf_ifp->if_xname, ifrk.ifrk_keylen); 1513 goto done; 1514 } 1515 1516 /* 1517 * Extract VF's RSS hash. Only Toeplitz is supported. 1518 */ 1519 memset(&ifrh, 0, sizeof(ifrh)); 1520 strlcpy(ifrh.ifrh_name, vf_ifp->if_xname, sizeof(ifrh.ifrh_name)); 1521 error = vf_ifp->if_ioctl(vf_ifp, SIOCGIFRSSHASH, (caddr_t)&ifrh); 1522 if (error) { 1523 if_printf(ifp, "%s SIOCGRSSHASH failed: %d\n", 1524 vf_ifp->if_xname, error); 1525 goto done; 1526 } 1527 if (ifrh.ifrh_func != RSS_FUNC_TOEPLITZ) { 1528 if_printf(ifp, "%s RSS function %u is not Toeplitz\n", 1529 vf_ifp->if_xname, ifrh.ifrh_func); 1530 goto done; 1531 } 1532 1533 my_types = hn_rss_type_fromndis(sc->hn_rss_hcap); 1534 if ((ifrh.ifrh_types & my_types) == 0) { 1535 /* This disables RSS; ignore it then */ 1536 if_printf(ifp, "%s intersection of RSS types failed. " 1537 "VF %#x, mine %#x\n", vf_ifp->if_xname, 1538 ifrh.ifrh_types, my_types); 1539 goto done; 1540 } 1541 1542 diff_types = my_types ^ ifrh.ifrh_types; 1543 my_types &= ifrh.ifrh_types; 1544 mbuf_types = my_types; 1545 1546 /* 1547 * Detect RSS hash value/type confliction. 1548 * 1549 * NOTE: 1550 * We don't disable the hash type, but stop delivery the hash 1551 * value/type through mbufs on RX path. 1552 * 1553 * XXX If HN_CAP_UDPHASH is set in hn_caps, then UDP 4-tuple 1554 * hash is delivered with type of TCP_IPV4. This means if 1555 * UDP_IPV4 is enabled, then TCP_IPV4 should be forced, at 1556 * least to hn_mbuf_hash. However, given that _all_ of the 1557 * NICs implement TCP_IPV4, this will _not_ impose any issues 1558 * here. 1559 */ 1560 if ((my_types & RSS_TYPE_IPV4) && 1561 (diff_types & ifrh.ifrh_types & 1562 (RSS_TYPE_TCP_IPV4 | RSS_TYPE_UDP_IPV4))) { 1563 /* Conflict; disable IPV4 hash type/value delivery. */ 1564 if_printf(ifp, "disable IPV4 mbuf hash delivery\n"); 1565 mbuf_types &= ~RSS_TYPE_IPV4; 1566 } 1567 if ((my_types & RSS_TYPE_IPV6) && 1568 (diff_types & ifrh.ifrh_types & 1569 (RSS_TYPE_TCP_IPV6 | RSS_TYPE_UDP_IPV6 | 1570 RSS_TYPE_TCP_IPV6_EX | RSS_TYPE_UDP_IPV6_EX | 1571 RSS_TYPE_IPV6_EX))) { 1572 /* Conflict; disable IPV6 hash type/value delivery. */ 1573 if_printf(ifp, "disable IPV6 mbuf hash delivery\n"); 1574 mbuf_types &= ~RSS_TYPE_IPV6; 1575 } 1576 if ((my_types & RSS_TYPE_IPV6_EX) && 1577 (diff_types & ifrh.ifrh_types & 1578 (RSS_TYPE_TCP_IPV6 | RSS_TYPE_UDP_IPV6 | 1579 RSS_TYPE_TCP_IPV6_EX | RSS_TYPE_UDP_IPV6_EX | 1580 RSS_TYPE_IPV6))) { 1581 /* Conflict; disable IPV6_EX hash type/value delivery. */ 1582 if_printf(ifp, "disable IPV6_EX mbuf hash delivery\n"); 1583 mbuf_types &= ~RSS_TYPE_IPV6_EX; 1584 } 1585 if ((my_types & RSS_TYPE_TCP_IPV6) && 1586 (diff_types & ifrh.ifrh_types & RSS_TYPE_TCP_IPV6_EX)) { 1587 /* Conflict; disable TCP_IPV6 hash type/value delivery. */ 1588 if_printf(ifp, "disable TCP_IPV6 mbuf hash delivery\n"); 1589 mbuf_types &= ~RSS_TYPE_TCP_IPV6; 1590 } 1591 if ((my_types & RSS_TYPE_TCP_IPV6_EX) && 1592 (diff_types & ifrh.ifrh_types & RSS_TYPE_TCP_IPV6)) { 1593 /* Conflict; disable TCP_IPV6_EX hash type/value delivery. */ 1594 if_printf(ifp, "disable TCP_IPV6_EX mbuf hash delivery\n"); 1595 mbuf_types &= ~RSS_TYPE_TCP_IPV6_EX; 1596 } 1597 if ((my_types & RSS_TYPE_UDP_IPV6) && 1598 (diff_types & ifrh.ifrh_types & RSS_TYPE_UDP_IPV6_EX)) { 1599 /* Conflict; disable UDP_IPV6 hash type/value delivery. */ 1600 if_printf(ifp, "disable UDP_IPV6 mbuf hash delivery\n"); 1601 mbuf_types &= ~RSS_TYPE_UDP_IPV6; 1602 } 1603 if ((my_types & RSS_TYPE_UDP_IPV6_EX) && 1604 (diff_types & ifrh.ifrh_types & RSS_TYPE_UDP_IPV6)) { 1605 /* Conflict; disable UDP_IPV6_EX hash type/value delivery. */ 1606 if_printf(ifp, "disable UDP_IPV6_EX mbuf hash delivery\n"); 1607 mbuf_types &= ~RSS_TYPE_UDP_IPV6_EX; 1608 } 1609 1610 /* 1611 * Indirect table does not matter. 1612 */ 1613 1614 sc->hn_rss_hash = (sc->hn_rss_hcap & NDIS_HASH_FUNCTION_MASK) | 1615 hn_rss_type_tondis(my_types); 1616 memcpy(sc->hn_rss.rss_key, ifrk.ifrk_key, sizeof(sc->hn_rss.rss_key)); 1617 sc->hn_flags |= HN_FLAG_HAS_RSSKEY; 1618 1619 if (reconf) { 1620 error = hn_rss_reconfig(sc); 1621 if (error) { 1622 /* XXX roll-back? */ 1623 if_printf(ifp, "hn_rss_reconfig failed: %d\n", error); 1624 /* XXX keep going. */ 1625 } 1626 } 1627 done: 1628 /* Hash deliverability for mbufs. */ 1629 hn_rss_mbuf_hash(sc, hn_rss_type_tondis(mbuf_types)); 1630 } 1631 1632 static void 1633 hn_vf_rss_restore(struct hn_softc *sc) 1634 { 1635 1636 HN_LOCK_ASSERT(sc); 1637 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, 1638 ("%s: synthetic parts are not attached", sc->hn_ifp->if_xname)); 1639 1640 if (sc->hn_rx_ring_inuse == 1) 1641 goto done; 1642 1643 /* 1644 * Restore hash types. Key does _not_ matter. 1645 */ 1646 if (sc->hn_rss_hash != sc->hn_rss_hcap) { 1647 int error; 1648 1649 sc->hn_rss_hash = sc->hn_rss_hcap; 1650 error = hn_rss_reconfig(sc); 1651 if (error) { 1652 if_printf(sc->hn_ifp, "hn_rss_reconfig failed: %d\n", 1653 error); 1654 /* XXX keep going. */ 1655 } 1656 } 1657 done: 1658 /* Hash deliverability for mbufs. */ 1659 hn_rss_mbuf_hash(sc, NDIS_HASH_ALL); 1660 } 1661 1662 static void 1663 hn_xpnt_vf_setready(struct hn_softc *sc) 1664 { 1665 struct ifnet *ifp, *vf_ifp; 1666 struct ifreq ifr; 1667 1668 HN_LOCK_ASSERT(sc); 1669 ifp = sc->hn_ifp; 1670 vf_ifp = sc->hn_vf_ifp; 1671 1672 /* 1673 * Mark the VF ready. 1674 */ 1675 sc->hn_vf_rdytick = 0; 1676 1677 /* 1678 * Save information for restoration. 1679 */ 1680 sc->hn_saved_caps = ifp->if_capabilities; 1681 sc->hn_saved_tsomax = ifp->if_hw_tsomax; 1682 sc->hn_saved_tsosegcnt = ifp->if_hw_tsomaxsegcount; 1683 sc->hn_saved_tsosegsz = ifp->if_hw_tsomaxsegsize; 1684 1685 /* 1686 * Intersect supported/enabled capabilities. 1687 * 1688 * NOTE: 1689 * if_hwassist is not changed here. 1690 */ 1691 ifp->if_capabilities &= vf_ifp->if_capabilities; 1692 ifp->if_capenable &= ifp->if_capabilities; 1693 1694 /* 1695 * Fix TSO settings. 1696 */ 1697 if (ifp->if_hw_tsomax > vf_ifp->if_hw_tsomax) 1698 ifp->if_hw_tsomax = vf_ifp->if_hw_tsomax; 1699 if (ifp->if_hw_tsomaxsegcount > vf_ifp->if_hw_tsomaxsegcount) 1700 ifp->if_hw_tsomaxsegcount = vf_ifp->if_hw_tsomaxsegcount; 1701 if (ifp->if_hw_tsomaxsegsize > vf_ifp->if_hw_tsomaxsegsize) 1702 ifp->if_hw_tsomaxsegsize = vf_ifp->if_hw_tsomaxsegsize; 1703 1704 /* 1705 * Change VF's enabled capabilities. 1706 */ 1707 memset(&ifr, 0, sizeof(ifr)); 1708 strlcpy(ifr.ifr_name, vf_ifp->if_xname, sizeof(ifr.ifr_name)); 1709 ifr.ifr_reqcap = ifp->if_capenable; 1710 hn_xpnt_vf_iocsetcaps(sc, &ifr); 1711 1712 if (ifp->if_mtu != ETHERMTU) { 1713 int error; 1714 1715 /* 1716 * Change VF's MTU. 1717 */ 1718 memset(&ifr, 0, sizeof(ifr)); 1719 strlcpy(ifr.ifr_name, vf_ifp->if_xname, sizeof(ifr.ifr_name)); 1720 ifr.ifr_mtu = ifp->if_mtu; 1721 error = vf_ifp->if_ioctl(vf_ifp, SIOCSIFMTU, (caddr_t)&ifr); 1722 if (error) { 1723 if_printf(ifp, "%s SIOCSIFMTU %u failed\n", 1724 vf_ifp->if_xname, ifp->if_mtu); 1725 if (ifp->if_mtu > ETHERMTU) { 1726 if_printf(ifp, "change MTU to %d\n", ETHERMTU); 1727 1728 /* 1729 * XXX 1730 * No need to adjust the synthetic parts' MTU; 1731 * failure of the adjustment will cause us 1732 * infinite headache. 1733 */ 1734 ifp->if_mtu = ETHERMTU; 1735 hn_mtu_change_fixup(sc); 1736 } 1737 } 1738 } 1739 } 1740 1741 static bool 1742 hn_xpnt_vf_isready(struct hn_softc *sc) 1743 { 1744 1745 HN_LOCK_ASSERT(sc); 1746 1747 if (!hn_xpnt_vf || sc->hn_vf_ifp == NULL) 1748 return (false); 1749 1750 if (sc->hn_vf_rdytick == 0) 1751 return (true); 1752 1753 if (sc->hn_vf_rdytick > ticks) 1754 return (false); 1755 1756 /* Mark VF as ready. */ 1757 hn_xpnt_vf_setready(sc); 1758 return (true); 1759 } 1760 1761 static void 1762 hn_xpnt_vf_setenable(struct hn_softc *sc) 1763 { 1764 int i; 1765 1766 HN_LOCK_ASSERT(sc); 1767 1768 /* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */ 1769 rm_wlock(&sc->hn_vf_lock); 1770 sc->hn_xvf_flags |= HN_XVFFLAG_ENABLED; 1771 rm_wunlock(&sc->hn_vf_lock); 1772 1773 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 1774 sc->hn_rx_ring[i].hn_rx_flags |= HN_RX_FLAG_XPNT_VF; 1775 } 1776 1777 static void 1778 hn_xpnt_vf_setdisable(struct hn_softc *sc, bool clear_vf) 1779 { 1780 int i; 1781 1782 HN_LOCK_ASSERT(sc); 1783 1784 /* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */ 1785 rm_wlock(&sc->hn_vf_lock); 1786 sc->hn_xvf_flags &= ~HN_XVFFLAG_ENABLED; 1787 if (clear_vf) 1788 sc->hn_vf_ifp = NULL; 1789 rm_wunlock(&sc->hn_vf_lock); 1790 1791 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 1792 sc->hn_rx_ring[i].hn_rx_flags &= ~HN_RX_FLAG_XPNT_VF; 1793 } 1794 1795 static void 1796 hn_xpnt_vf_init(struct hn_softc *sc) 1797 { 1798 int error; 1799 1800 HN_LOCK_ASSERT(sc); 1801 1802 KASSERT((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) == 0, 1803 ("%s: transparent VF was enabled", sc->hn_ifp->if_xname)); 1804 1805 if (bootverbose) { 1806 if_printf(sc->hn_ifp, "try bringing up %s\n", 1807 sc->hn_vf_ifp->if_xname); 1808 } 1809 1810 /* 1811 * Bring the VF up. 1812 */ 1813 hn_xpnt_vf_saveifflags(sc); 1814 sc->hn_vf_ifp->if_flags |= IFF_UP; 1815 error = hn_xpnt_vf_iocsetflags(sc); 1816 if (error) { 1817 if_printf(sc->hn_ifp, "bringing up %s failed: %d\n", 1818 sc->hn_vf_ifp->if_xname, error); 1819 return; 1820 } 1821 1822 /* 1823 * NOTE: 1824 * Datapath setting must happen _after_ bringing the VF up. 1825 */ 1826 hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_VF); 1827 1828 /* 1829 * NOTE: 1830 * Fixup RSS related bits _after_ the VF is brought up, since 1831 * many VFs generate RSS key during it's initialization. 1832 */ 1833 hn_vf_rss_fixup(sc, true); 1834 1835 /* Mark transparent mode VF as enabled. */ 1836 hn_xpnt_vf_setenable(sc); 1837 } 1838 1839 static void 1840 hn_xpnt_vf_init_taskfunc(void *xsc, int pending __unused) 1841 { 1842 struct hn_softc *sc = xsc; 1843 1844 HN_LOCK(sc); 1845 1846 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) 1847 goto done; 1848 if (sc->hn_vf_ifp == NULL) 1849 goto done; 1850 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) 1851 goto done; 1852 1853 if (sc->hn_vf_rdytick != 0) { 1854 /* Mark VF as ready. */ 1855 hn_xpnt_vf_setready(sc); 1856 } 1857 1858 if (sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) { 1859 /* 1860 * Delayed VF initialization. 1861 */ 1862 if (bootverbose) { 1863 if_printf(sc->hn_ifp, "delayed initialize %s\n", 1864 sc->hn_vf_ifp->if_xname); 1865 } 1866 hn_xpnt_vf_init(sc); 1867 } 1868 done: 1869 HN_UNLOCK(sc); 1870 } 1871 1872 static void 1873 hn_ifnet_attevent(void *xsc, struct ifnet *ifp) 1874 { 1875 struct hn_softc *sc = xsc; 1876 1877 HN_LOCK(sc); 1878 1879 if (!(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED)) 1880 goto done; 1881 1882 if (!hn_ismyvf(sc, ifp)) 1883 goto done; 1884 1885 if (sc->hn_vf_ifp != NULL) { 1886 if_printf(sc->hn_ifp, "%s was attached as VF\n", 1887 sc->hn_vf_ifp->if_xname); 1888 goto done; 1889 } 1890 1891 if (hn_xpnt_vf && ifp->if_start != NULL) { 1892 /* 1893 * ifnet.if_start is _not_ supported by transparent 1894 * mode VF; mainly due to the IFF_DRV_OACTIVE flag. 1895 */ 1896 if_printf(sc->hn_ifp, "%s uses if_start, which is unsupported " 1897 "in transparent VF mode.\n", ifp->if_xname); 1898 goto done; 1899 } 1900 1901 rm_wlock(&hn_vfmap_lock); 1902 1903 if (ifp->if_index >= hn_vfmap_size) { 1904 struct ifnet **newmap; 1905 int newsize; 1906 1907 newsize = ifp->if_index + HN_VFMAP_SIZE_DEF; 1908 newmap = malloc(sizeof(struct ifnet *) * newsize, M_DEVBUF, 1909 M_WAITOK | M_ZERO); 1910 1911 memcpy(newmap, hn_vfmap, 1912 sizeof(struct ifnet *) * hn_vfmap_size); 1913 free(hn_vfmap, M_DEVBUF); 1914 hn_vfmap = newmap; 1915 hn_vfmap_size = newsize; 1916 } 1917 KASSERT(hn_vfmap[ifp->if_index] == NULL, 1918 ("%s: ifindex %d was mapped to %s", 1919 ifp->if_xname, ifp->if_index, hn_vfmap[ifp->if_index]->if_xname)); 1920 hn_vfmap[ifp->if_index] = sc->hn_ifp; 1921 1922 rm_wunlock(&hn_vfmap_lock); 1923 1924 /* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */ 1925 rm_wlock(&sc->hn_vf_lock); 1926 KASSERT((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) == 0, 1927 ("%s: transparent VF was enabled", sc->hn_ifp->if_xname)); 1928 sc->hn_vf_ifp = ifp; 1929 rm_wunlock(&sc->hn_vf_lock); 1930 1931 if (hn_xpnt_vf) { 1932 int wait_ticks; 1933 1934 /* 1935 * Install if_input for vf_ifp, which does vf_ifp -> hn_ifp. 1936 * Save vf_ifp's current if_input for later restoration. 1937 */ 1938 sc->hn_vf_input = ifp->if_input; 1939 ifp->if_input = hn_xpnt_vf_input; 1940 1941 /* 1942 * Stop link status management; use the VF's. 1943 */ 1944 hn_suspend_mgmt(sc); 1945 1946 /* 1947 * Give VF sometime to complete its attach routing. 1948 */ 1949 wait_ticks = hn_xpnt_vf_attwait * hz; 1950 sc->hn_vf_rdytick = ticks + wait_ticks; 1951 1952 taskqueue_enqueue_timeout(sc->hn_vf_taskq, &sc->hn_vf_init, 1953 wait_ticks); 1954 } 1955 done: 1956 HN_UNLOCK(sc); 1957 } 1958 1959 static void 1960 hn_ifnet_detevent(void *xsc, struct ifnet *ifp) 1961 { 1962 struct hn_softc *sc = xsc; 1963 1964 HN_LOCK(sc); 1965 1966 if (sc->hn_vf_ifp == NULL) 1967 goto done; 1968 1969 if (!hn_ismyvf(sc, ifp)) 1970 goto done; 1971 1972 if (hn_xpnt_vf) { 1973 /* 1974 * Make sure that the delayed initialization is not running. 1975 * 1976 * NOTE: 1977 * - This lock _must_ be released, since the hn_vf_init task 1978 * will try holding this lock. 1979 * - It is safe to release this lock here, since the 1980 * hn_ifnet_attevent() is interlocked by the hn_vf_ifp. 1981 * 1982 * XXX racy, if hn(4) ever detached. 1983 */ 1984 HN_UNLOCK(sc); 1985 taskqueue_drain_timeout(sc->hn_vf_taskq, &sc->hn_vf_init); 1986 HN_LOCK(sc); 1987 1988 KASSERT(sc->hn_vf_input != NULL, ("%s VF input is not saved", 1989 sc->hn_ifp->if_xname)); 1990 ifp->if_input = sc->hn_vf_input; 1991 sc->hn_vf_input = NULL; 1992 1993 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) && 1994 (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) 1995 hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_SYNTH); 1996 1997 if (sc->hn_vf_rdytick == 0) { 1998 /* 1999 * The VF was ready; restore some settings. 2000 */ 2001 sc->hn_ifp->if_capabilities = sc->hn_saved_caps; 2002 /* 2003 * NOTE: 2004 * There is _no_ need to fixup if_capenable and 2005 * if_hwassist, since the if_capabilities before 2006 * restoration was an intersection of the VF's 2007 * if_capabilites and the synthetic device's 2008 * if_capabilites. 2009 */ 2010 sc->hn_ifp->if_hw_tsomax = sc->hn_saved_tsomax; 2011 sc->hn_ifp->if_hw_tsomaxsegcount = 2012 sc->hn_saved_tsosegcnt; 2013 sc->hn_ifp->if_hw_tsomaxsegsize = sc->hn_saved_tsosegsz; 2014 } 2015 2016 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) { 2017 /* 2018 * Restore RSS settings. 2019 */ 2020 hn_vf_rss_restore(sc); 2021 2022 /* 2023 * Resume link status management, which was suspended 2024 * by hn_ifnet_attevent(). 2025 */ 2026 hn_resume_mgmt(sc); 2027 } 2028 } 2029 2030 /* Mark transparent mode VF as disabled. */ 2031 hn_xpnt_vf_setdisable(sc, true /* clear hn_vf_ifp */); 2032 2033 rm_wlock(&hn_vfmap_lock); 2034 2035 KASSERT(ifp->if_index < hn_vfmap_size, 2036 ("ifindex %d, vfmapsize %d", ifp->if_index, hn_vfmap_size)); 2037 if (hn_vfmap[ifp->if_index] != NULL) { 2038 KASSERT(hn_vfmap[ifp->if_index] == sc->hn_ifp, 2039 ("%s: ifindex %d was mapped to %s", 2040 ifp->if_xname, ifp->if_index, 2041 hn_vfmap[ifp->if_index]->if_xname)); 2042 hn_vfmap[ifp->if_index] = NULL; 2043 } 2044 2045 rm_wunlock(&hn_vfmap_lock); 2046 done: 2047 HN_UNLOCK(sc); 2048 } 2049 2050 static void 2051 hn_ifnet_lnkevent(void *xsc, struct ifnet *ifp, int link_state) 2052 { 2053 struct hn_softc *sc = xsc; 2054 2055 if (sc->hn_vf_ifp == ifp) 2056 if_link_state_change(sc->hn_ifp, link_state); 2057 } 2058 2059 static int 2060 hn_probe(device_t dev) 2061 { 2062 2063 if (VMBUS_PROBE_GUID(device_get_parent(dev), dev, &hn_guid) == 0) { 2064 device_set_desc(dev, "Hyper-V Network Interface"); 2065 return BUS_PROBE_DEFAULT; 2066 } 2067 return ENXIO; 2068 } 2069 2070 static int 2071 hn_attach(device_t dev) 2072 { 2073 struct hn_softc *sc = device_get_softc(dev); 2074 struct sysctl_oid_list *child; 2075 struct sysctl_ctx_list *ctx; 2076 uint8_t eaddr[ETHER_ADDR_LEN]; 2077 struct ifnet *ifp = NULL; 2078 int error, ring_cnt, tx_ring_cnt; 2079 uint32_t mtu; 2080 2081 sc->hn_dev = dev; 2082 sc->hn_prichan = vmbus_get_channel(dev); 2083 HN_LOCK_INIT(sc); 2084 rm_init(&sc->hn_vf_lock, "hnvf"); 2085 if (hn_xpnt_vf && hn_xpnt_vf_accbpf) 2086 sc->hn_xvf_flags |= HN_XVFFLAG_ACCBPF; 2087 2088 /* 2089 * Initialize these tunables once. 2090 */ 2091 sc->hn_agg_size = hn_tx_agg_size; 2092 sc->hn_agg_pkts = hn_tx_agg_pkts; 2093 2094 /* 2095 * Setup taskqueue for transmission. 2096 */ 2097 if (hn_tx_taskq_mode == HN_TX_TASKQ_M_INDEP) { 2098 int i; 2099 2100 sc->hn_tx_taskqs = 2101 malloc(hn_tx_taskq_cnt * sizeof(struct taskqueue *), 2102 M_DEVBUF, M_WAITOK); 2103 for (i = 0; i < hn_tx_taskq_cnt; ++i) { 2104 sc->hn_tx_taskqs[i] = taskqueue_create("hn_tx", 2105 M_WAITOK, taskqueue_thread_enqueue, 2106 &sc->hn_tx_taskqs[i]); 2107 taskqueue_start_threads(&sc->hn_tx_taskqs[i], 1, PI_NET, 2108 "%s tx%d", device_get_nameunit(dev), i); 2109 } 2110 } else if (hn_tx_taskq_mode == HN_TX_TASKQ_M_GLOBAL) { 2111 sc->hn_tx_taskqs = hn_tx_taskque; 2112 } 2113 2114 /* 2115 * Setup taskqueue for mangement tasks, e.g. link status. 2116 */ 2117 sc->hn_mgmt_taskq0 = taskqueue_create("hn_mgmt", M_WAITOK, 2118 taskqueue_thread_enqueue, &sc->hn_mgmt_taskq0); 2119 taskqueue_start_threads(&sc->hn_mgmt_taskq0, 1, PI_NET, "%s mgmt", 2120 device_get_nameunit(dev)); 2121 TASK_INIT(&sc->hn_link_task, 0, hn_link_taskfunc, sc); 2122 TASK_INIT(&sc->hn_netchg_init, 0, hn_netchg_init_taskfunc, sc); 2123 TIMEOUT_TASK_INIT(sc->hn_mgmt_taskq0, &sc->hn_netchg_status, 0, 2124 hn_netchg_status_taskfunc, sc); 2125 2126 if (hn_xpnt_vf) { 2127 /* 2128 * Setup taskqueue for VF tasks, e.g. delayed VF bringing up. 2129 */ 2130 sc->hn_vf_taskq = taskqueue_create("hn_vf", M_WAITOK, 2131 taskqueue_thread_enqueue, &sc->hn_vf_taskq); 2132 taskqueue_start_threads(&sc->hn_vf_taskq, 1, PI_NET, "%s vf", 2133 device_get_nameunit(dev)); 2134 TIMEOUT_TASK_INIT(sc->hn_vf_taskq, &sc->hn_vf_init, 0, 2135 hn_xpnt_vf_init_taskfunc, sc); 2136 } 2137 2138 /* 2139 * Allocate ifnet and setup its name earlier, so that if_printf 2140 * can be used by functions, which will be called after 2141 * ether_ifattach(). 2142 */ 2143 ifp = sc->hn_ifp = if_alloc(IFT_ETHER); 2144 ifp->if_softc = sc; 2145 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 2146 2147 /* 2148 * Initialize ifmedia earlier so that it can be unconditionally 2149 * destroyed, if error happened later on. 2150 */ 2151 ifmedia_init(&sc->hn_media, 0, hn_ifmedia_upd, hn_ifmedia_sts); 2152 2153 /* 2154 * Figure out the # of RX rings (ring_cnt) and the # of TX rings 2155 * to use (tx_ring_cnt). 2156 * 2157 * NOTE: 2158 * The # of RX rings to use is same as the # of channels to use. 2159 */ 2160 ring_cnt = hn_chan_cnt; 2161 if (ring_cnt <= 0) { 2162 /* Default */ 2163 ring_cnt = mp_ncpus; 2164 if (ring_cnt > HN_RING_CNT_DEF_MAX) 2165 ring_cnt = HN_RING_CNT_DEF_MAX; 2166 } else if (ring_cnt > mp_ncpus) { 2167 ring_cnt = mp_ncpus; 2168 } 2169 #ifdef RSS 2170 if (ring_cnt > rss_getnumbuckets()) 2171 ring_cnt = rss_getnumbuckets(); 2172 #endif 2173 2174 tx_ring_cnt = hn_tx_ring_cnt; 2175 if (tx_ring_cnt <= 0 || tx_ring_cnt > ring_cnt) 2176 tx_ring_cnt = ring_cnt; 2177 #ifdef HN_IFSTART_SUPPORT 2178 if (hn_use_if_start) { 2179 /* ifnet.if_start only needs one TX ring. */ 2180 tx_ring_cnt = 1; 2181 } 2182 #endif 2183 2184 /* 2185 * Set the leader CPU for channels. 2186 */ 2187 sc->hn_cpu = atomic_fetchadd_int(&hn_cpu_index, ring_cnt) % mp_ncpus; 2188 2189 /* 2190 * Create enough TX/RX rings, even if only limited number of 2191 * channels can be allocated. 2192 */ 2193 error = hn_create_tx_data(sc, tx_ring_cnt); 2194 if (error) 2195 goto failed; 2196 error = hn_create_rx_data(sc, ring_cnt); 2197 if (error) 2198 goto failed; 2199 2200 /* 2201 * Create transaction context for NVS and RNDIS transactions. 2202 */ 2203 sc->hn_xact = vmbus_xact_ctx_create(bus_get_dma_tag(dev), 2204 HN_XACT_REQ_SIZE, HN_XACT_RESP_SIZE, 0); 2205 if (sc->hn_xact == NULL) { 2206 error = ENXIO; 2207 goto failed; 2208 } 2209 2210 /* 2211 * Install orphan handler for the revocation of this device's 2212 * primary channel. 2213 * 2214 * NOTE: 2215 * The processing order is critical here: 2216 * Install the orphan handler, _before_ testing whether this 2217 * device's primary channel has been revoked or not. 2218 */ 2219 vmbus_chan_set_orphan(sc->hn_prichan, sc->hn_xact); 2220 if (vmbus_chan_is_revoked(sc->hn_prichan)) { 2221 error = ENXIO; 2222 goto failed; 2223 } 2224 2225 /* 2226 * Attach the synthetic parts, i.e. NVS and RNDIS. 2227 */ 2228 error = hn_synth_attach(sc, ETHERMTU); 2229 if (error) 2230 goto failed; 2231 2232 error = hn_rndis_get_eaddr(sc, eaddr); 2233 if (error) 2234 goto failed; 2235 2236 error = hn_rndis_get_mtu(sc, &mtu); 2237 if (error) 2238 mtu = ETHERMTU; 2239 else if (bootverbose) 2240 device_printf(dev, "RNDIS mtu %u\n", mtu); 2241 2242 #if __FreeBSD_version >= 1100099 2243 if (sc->hn_rx_ring_inuse > 1) { 2244 /* 2245 * Reduce TCP segment aggregation limit for multiple 2246 * RX rings to increase ACK timeliness. 2247 */ 2248 hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MULTIRX_DEF); 2249 } 2250 #endif 2251 2252 /* 2253 * Fixup TX/RX stuffs after synthetic parts are attached. 2254 */ 2255 hn_fixup_tx_data(sc); 2256 hn_fixup_rx_data(sc); 2257 2258 ctx = device_get_sysctl_ctx(dev); 2259 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); 2260 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "nvs_version", CTLFLAG_RD, 2261 &sc->hn_nvs_ver, 0, "NVS version"); 2262 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "ndis_version", 2263 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2264 hn_ndis_version_sysctl, "A", "NDIS version"); 2265 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "caps", 2266 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2267 hn_caps_sysctl, "A", "capabilities"); 2268 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "hwassist", 2269 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2270 hn_hwassist_sysctl, "A", "hwassist"); 2271 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tso_max", 2272 CTLFLAG_RD, &ifp->if_hw_tsomax, 0, "max TSO size"); 2273 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tso_maxsegcnt", 2274 CTLFLAG_RD, &ifp->if_hw_tsomaxsegcount, 0, 2275 "max # of TSO segments"); 2276 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tso_maxsegsz", 2277 CTLFLAG_RD, &ifp->if_hw_tsomaxsegsize, 0, 2278 "max size of TSO segment"); 2279 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxfilter", 2280 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2281 hn_rxfilter_sysctl, "A", "rxfilter"); 2282 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_hash", 2283 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2284 hn_rss_hash_sysctl, "A", "RSS hash"); 2285 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_hashcap", 2286 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2287 hn_rss_hcap_sysctl, "A", "RSS hash capabilities"); 2288 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "mbuf_hash", 2289 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2290 hn_rss_mbuf_sysctl, "A", "RSS hash for mbufs"); 2291 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rss_ind_size", 2292 CTLFLAG_RD, &sc->hn_rss_ind_size, 0, "RSS indirect entry count"); 2293 #ifndef RSS 2294 /* 2295 * Don't allow RSS key/indirect table changes, if RSS is defined. 2296 */ 2297 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_key", 2298 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 2299 hn_rss_key_sysctl, "IU", "RSS key"); 2300 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_ind", 2301 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 2302 hn_rss_ind_sysctl, "IU", "RSS indirect table"); 2303 #endif 2304 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_size", 2305 CTLFLAG_RD, &sc->hn_rndis_agg_size, 0, 2306 "RNDIS offered packet transmission aggregation size limit"); 2307 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_pkts", 2308 CTLFLAG_RD, &sc->hn_rndis_agg_pkts, 0, 2309 "RNDIS offered packet transmission aggregation count limit"); 2310 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_align", 2311 CTLFLAG_RD, &sc->hn_rndis_agg_align, 0, 2312 "RNDIS packet transmission aggregation alignment"); 2313 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_size", 2314 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 2315 hn_txagg_size_sysctl, "I", 2316 "Packet transmission aggregation size, 0 -- disable, -1 -- auto"); 2317 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pkts", 2318 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 2319 hn_txagg_pkts_sysctl, "I", 2320 "Packet transmission aggregation packets, " 2321 "0 -- disable, -1 -- auto"); 2322 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "polling", 2323 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 2324 hn_polling_sysctl, "I", 2325 "Polling frequency: [100,1000000], 0 disable polling"); 2326 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf", 2327 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2328 hn_vf_sysctl, "A", "Virtual Function's name"); 2329 if (!hn_xpnt_vf) { 2330 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxvf", 2331 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2332 hn_rxvf_sysctl, "A", "activated Virtual Function's name"); 2333 } else { 2334 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf_xpnt_enabled", 2335 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2336 hn_xpnt_vf_enabled_sysctl, "I", 2337 "Transparent VF enabled"); 2338 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf_xpnt_accbpf", 2339 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 2340 hn_xpnt_vf_accbpf_sysctl, "I", 2341 "Accurate BPF for transparent VF"); 2342 } 2343 2344 /* 2345 * Setup the ifmedia, which has been initialized earlier. 2346 */ 2347 ifmedia_add(&sc->hn_media, IFM_ETHER | IFM_AUTO, 0, NULL); 2348 ifmedia_set(&sc->hn_media, IFM_ETHER | IFM_AUTO); 2349 /* XXX ifmedia_set really should do this for us */ 2350 sc->hn_media.ifm_media = sc->hn_media.ifm_cur->ifm_media; 2351 2352 /* 2353 * Setup the ifnet for this interface. 2354 */ 2355 2356 ifp->if_baudrate = IF_Gbps(10); 2357 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 2358 ifp->if_ioctl = hn_ioctl; 2359 ifp->if_init = hn_init; 2360 #ifdef HN_IFSTART_SUPPORT 2361 if (hn_use_if_start) { 2362 int qdepth = hn_get_txswq_depth(&sc->hn_tx_ring[0]); 2363 2364 ifp->if_start = hn_start; 2365 IFQ_SET_MAXLEN(&ifp->if_snd, qdepth); 2366 ifp->if_snd.ifq_drv_maxlen = qdepth - 1; 2367 IFQ_SET_READY(&ifp->if_snd); 2368 } else 2369 #endif 2370 { 2371 ifp->if_transmit = hn_transmit; 2372 ifp->if_qflush = hn_xmit_qflush; 2373 } 2374 2375 ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_LRO | IFCAP_LINKSTATE; 2376 #ifdef foo 2377 /* We can't diff IPv6 packets from IPv4 packets on RX path. */ 2378 ifp->if_capabilities |= IFCAP_RXCSUM_IPV6; 2379 #endif 2380 if (sc->hn_caps & HN_CAP_VLAN) { 2381 /* XXX not sure about VLAN_MTU. */ 2382 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU; 2383 } 2384 2385 ifp->if_hwassist = sc->hn_tx_ring[0].hn_csum_assist; 2386 if (ifp->if_hwassist & HN_CSUM_IP_MASK) 2387 ifp->if_capabilities |= IFCAP_TXCSUM; 2388 if (ifp->if_hwassist & HN_CSUM_IP6_MASK) 2389 ifp->if_capabilities |= IFCAP_TXCSUM_IPV6; 2390 if (sc->hn_caps & HN_CAP_TSO4) { 2391 ifp->if_capabilities |= IFCAP_TSO4; 2392 ifp->if_hwassist |= CSUM_IP_TSO; 2393 } 2394 if (sc->hn_caps & HN_CAP_TSO6) { 2395 ifp->if_capabilities |= IFCAP_TSO6; 2396 ifp->if_hwassist |= CSUM_IP6_TSO; 2397 } 2398 2399 /* Enable all available capabilities by default. */ 2400 ifp->if_capenable = ifp->if_capabilities; 2401 2402 /* 2403 * Disable IPv6 TSO and TXCSUM by default, they still can 2404 * be enabled through SIOCSIFCAP. 2405 */ 2406 ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6 | IFCAP_TSO6); 2407 ifp->if_hwassist &= ~(HN_CSUM_IP6_MASK | CSUM_IP6_TSO); 2408 2409 if (ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) { 2410 /* 2411 * Lock hn_set_tso_maxsize() to simplify its 2412 * internal logic. 2413 */ 2414 HN_LOCK(sc); 2415 hn_set_tso_maxsize(sc, hn_tso_maxlen, ETHERMTU); 2416 HN_UNLOCK(sc); 2417 ifp->if_hw_tsomaxsegcount = HN_TX_DATA_SEGCNT_MAX; 2418 ifp->if_hw_tsomaxsegsize = PAGE_SIZE; 2419 } 2420 2421 ether_ifattach(ifp, eaddr); 2422 2423 if ((ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) && bootverbose) { 2424 if_printf(ifp, "TSO segcnt %u segsz %u\n", 2425 ifp->if_hw_tsomaxsegcount, ifp->if_hw_tsomaxsegsize); 2426 } 2427 if (mtu < ETHERMTU) { 2428 if_printf(ifp, "fixup mtu %u -> %u\n", ifp->if_mtu, mtu); 2429 ifp->if_mtu = mtu; 2430 } 2431 2432 /* Inform the upper layer about the long frame support. */ 2433 ifp->if_hdrlen = sizeof(struct ether_vlan_header); 2434 2435 /* 2436 * Kick off link status check. 2437 */ 2438 sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0; 2439 hn_update_link_status(sc); 2440 2441 if (!hn_xpnt_vf) { 2442 sc->hn_ifnet_evthand = EVENTHANDLER_REGISTER(ifnet_event, 2443 hn_ifnet_event, sc, EVENTHANDLER_PRI_ANY); 2444 sc->hn_ifaddr_evthand = EVENTHANDLER_REGISTER(ifaddr_event, 2445 hn_ifaddr_event, sc, EVENTHANDLER_PRI_ANY); 2446 } else { 2447 sc->hn_ifnet_lnkhand = EVENTHANDLER_REGISTER(ifnet_link_event, 2448 hn_ifnet_lnkevent, sc, EVENTHANDLER_PRI_ANY); 2449 } 2450 2451 /* 2452 * NOTE: 2453 * Subscribe ether_ifattach event, instead of ifnet_arrival event, 2454 * since interface's LLADDR is needed; interface LLADDR is not 2455 * available when ifnet_arrival event is triggered. 2456 */ 2457 sc->hn_ifnet_atthand = EVENTHANDLER_REGISTER(ether_ifattach_event, 2458 hn_ifnet_attevent, sc, EVENTHANDLER_PRI_ANY); 2459 sc->hn_ifnet_dethand = EVENTHANDLER_REGISTER(ifnet_departure_event, 2460 hn_ifnet_detevent, sc, EVENTHANDLER_PRI_ANY); 2461 2462 return (0); 2463 failed: 2464 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) 2465 hn_synth_detach(sc); 2466 hn_detach(dev); 2467 return (error); 2468 } 2469 2470 static int 2471 hn_detach(device_t dev) 2472 { 2473 struct hn_softc *sc = device_get_softc(dev); 2474 struct ifnet *ifp = sc->hn_ifp, *vf_ifp; 2475 2476 if (sc->hn_xact != NULL && vmbus_chan_is_revoked(sc->hn_prichan)) { 2477 /* 2478 * In case that the vmbus missed the orphan handler 2479 * installation. 2480 */ 2481 vmbus_xact_ctx_orphan(sc->hn_xact); 2482 } 2483 2484 if (sc->hn_ifaddr_evthand != NULL) 2485 EVENTHANDLER_DEREGISTER(ifaddr_event, sc->hn_ifaddr_evthand); 2486 if (sc->hn_ifnet_evthand != NULL) 2487 EVENTHANDLER_DEREGISTER(ifnet_event, sc->hn_ifnet_evthand); 2488 if (sc->hn_ifnet_atthand != NULL) { 2489 EVENTHANDLER_DEREGISTER(ether_ifattach_event, 2490 sc->hn_ifnet_atthand); 2491 } 2492 if (sc->hn_ifnet_dethand != NULL) { 2493 EVENTHANDLER_DEREGISTER(ifnet_departure_event, 2494 sc->hn_ifnet_dethand); 2495 } 2496 if (sc->hn_ifnet_lnkhand != NULL) 2497 EVENTHANDLER_DEREGISTER(ifnet_link_event, sc->hn_ifnet_lnkhand); 2498 2499 vf_ifp = sc->hn_vf_ifp; 2500 __compiler_membar(); 2501 if (vf_ifp != NULL) 2502 hn_ifnet_detevent(sc, vf_ifp); 2503 2504 if (device_is_attached(dev)) { 2505 HN_LOCK(sc); 2506 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) { 2507 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 2508 hn_stop(sc, true); 2509 /* 2510 * NOTE: 2511 * hn_stop() only suspends data, so managment 2512 * stuffs have to be suspended manually here. 2513 */ 2514 hn_suspend_mgmt(sc); 2515 hn_synth_detach(sc); 2516 } 2517 HN_UNLOCK(sc); 2518 ether_ifdetach(ifp); 2519 } 2520 2521 ifmedia_removeall(&sc->hn_media); 2522 hn_destroy_rx_data(sc); 2523 hn_destroy_tx_data(sc); 2524 2525 if (sc->hn_tx_taskqs != NULL && sc->hn_tx_taskqs != hn_tx_taskque) { 2526 int i; 2527 2528 for (i = 0; i < hn_tx_taskq_cnt; ++i) 2529 taskqueue_free(sc->hn_tx_taskqs[i]); 2530 free(sc->hn_tx_taskqs, M_DEVBUF); 2531 } 2532 taskqueue_free(sc->hn_mgmt_taskq0); 2533 if (sc->hn_vf_taskq != NULL) 2534 taskqueue_free(sc->hn_vf_taskq); 2535 2536 if (sc->hn_xact != NULL) { 2537 /* 2538 * Uninstall the orphan handler _before_ the xact is 2539 * destructed. 2540 */ 2541 vmbus_chan_unset_orphan(sc->hn_prichan); 2542 vmbus_xact_ctx_destroy(sc->hn_xact); 2543 } 2544 2545 if_free(ifp); 2546 2547 HN_LOCK_DESTROY(sc); 2548 rm_destroy(&sc->hn_vf_lock); 2549 return (0); 2550 } 2551 2552 static int 2553 hn_shutdown(device_t dev) 2554 { 2555 2556 return (0); 2557 } 2558 2559 static void 2560 hn_link_status(struct hn_softc *sc) 2561 { 2562 uint32_t link_status; 2563 int error; 2564 2565 error = hn_rndis_get_linkstatus(sc, &link_status); 2566 if (error) { 2567 /* XXX what to do? */ 2568 return; 2569 } 2570 2571 if (link_status == NDIS_MEDIA_STATE_CONNECTED) 2572 sc->hn_link_flags |= HN_LINK_FLAG_LINKUP; 2573 else 2574 sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP; 2575 if_link_state_change(sc->hn_ifp, 2576 (sc->hn_link_flags & HN_LINK_FLAG_LINKUP) ? 2577 LINK_STATE_UP : LINK_STATE_DOWN); 2578 } 2579 2580 static void 2581 hn_link_taskfunc(void *xsc, int pending __unused) 2582 { 2583 struct hn_softc *sc = xsc; 2584 2585 if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG) 2586 return; 2587 hn_link_status(sc); 2588 } 2589 2590 static void 2591 hn_netchg_init_taskfunc(void *xsc, int pending __unused) 2592 { 2593 struct hn_softc *sc = xsc; 2594 2595 /* Prevent any link status checks from running. */ 2596 sc->hn_link_flags |= HN_LINK_FLAG_NETCHG; 2597 2598 /* 2599 * Fake up a [link down --> link up] state change; 5 seconds 2600 * delay is used, which closely simulates miibus reaction 2601 * upon link down event. 2602 */ 2603 sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP; 2604 if_link_state_change(sc->hn_ifp, LINK_STATE_DOWN); 2605 taskqueue_enqueue_timeout(sc->hn_mgmt_taskq0, 2606 &sc->hn_netchg_status, 5 * hz); 2607 } 2608 2609 static void 2610 hn_netchg_status_taskfunc(void *xsc, int pending __unused) 2611 { 2612 struct hn_softc *sc = xsc; 2613 2614 /* Re-allow link status checks. */ 2615 sc->hn_link_flags &= ~HN_LINK_FLAG_NETCHG; 2616 hn_link_status(sc); 2617 } 2618 2619 static void 2620 hn_update_link_status(struct hn_softc *sc) 2621 { 2622 2623 if (sc->hn_mgmt_taskq != NULL) 2624 taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_link_task); 2625 } 2626 2627 static void 2628 hn_change_network(struct hn_softc *sc) 2629 { 2630 2631 if (sc->hn_mgmt_taskq != NULL) 2632 taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_netchg_init); 2633 } 2634 2635 static __inline int 2636 hn_txdesc_dmamap_load(struct hn_tx_ring *txr, struct hn_txdesc *txd, 2637 struct mbuf **m_head, bus_dma_segment_t *segs, int *nsegs) 2638 { 2639 struct mbuf *m = *m_head; 2640 int error; 2641 2642 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID, ("txd uses chim")); 2643 2644 error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap, 2645 m, segs, nsegs, BUS_DMA_NOWAIT); 2646 if (error == EFBIG) { 2647 struct mbuf *m_new; 2648 2649 m_new = m_collapse(m, M_NOWAIT, HN_TX_DATA_SEGCNT_MAX); 2650 if (m_new == NULL) 2651 return ENOBUFS; 2652 else 2653 *m_head = m = m_new; 2654 txr->hn_tx_collapsed++; 2655 2656 error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, 2657 txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT); 2658 } 2659 if (!error) { 2660 bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap, 2661 BUS_DMASYNC_PREWRITE); 2662 txd->flags |= HN_TXD_FLAG_DMAMAP; 2663 } 2664 return error; 2665 } 2666 2667 static __inline int 2668 hn_txdesc_put(struct hn_tx_ring *txr, struct hn_txdesc *txd) 2669 { 2670 2671 KASSERT((txd->flags & HN_TXD_FLAG_ONLIST) == 0, 2672 ("put an onlist txd %#x", txd->flags)); 2673 KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0, 2674 ("put an onagg txd %#x", txd->flags)); 2675 2676 KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs)); 2677 if (atomic_fetchadd_int(&txd->refs, -1) != 1) 2678 return 0; 2679 2680 if (!STAILQ_EMPTY(&txd->agg_list)) { 2681 struct hn_txdesc *tmp_txd; 2682 2683 while ((tmp_txd = STAILQ_FIRST(&txd->agg_list)) != NULL) { 2684 int freed; 2685 2686 KASSERT(STAILQ_EMPTY(&tmp_txd->agg_list), 2687 ("resursive aggregation on aggregated txdesc")); 2688 KASSERT((tmp_txd->flags & HN_TXD_FLAG_ONAGG), 2689 ("not aggregated txdesc")); 2690 KASSERT((tmp_txd->flags & HN_TXD_FLAG_DMAMAP) == 0, 2691 ("aggregated txdesc uses dmamap")); 2692 KASSERT(tmp_txd->chim_index == HN_NVS_CHIM_IDX_INVALID, 2693 ("aggregated txdesc consumes " 2694 "chimney sending buffer")); 2695 KASSERT(tmp_txd->chim_size == 0, 2696 ("aggregated txdesc has non-zero " 2697 "chimney sending size")); 2698 2699 STAILQ_REMOVE_HEAD(&txd->agg_list, agg_link); 2700 tmp_txd->flags &= ~HN_TXD_FLAG_ONAGG; 2701 freed = hn_txdesc_put(txr, tmp_txd); 2702 KASSERT(freed, ("failed to free aggregated txdesc")); 2703 } 2704 } 2705 2706 if (txd->chim_index != HN_NVS_CHIM_IDX_INVALID) { 2707 KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, 2708 ("chim txd uses dmamap")); 2709 hn_chim_free(txr->hn_sc, txd->chim_index); 2710 txd->chim_index = HN_NVS_CHIM_IDX_INVALID; 2711 txd->chim_size = 0; 2712 } else if (txd->flags & HN_TXD_FLAG_DMAMAP) { 2713 bus_dmamap_sync(txr->hn_tx_data_dtag, 2714 txd->data_dmap, BUS_DMASYNC_POSTWRITE); 2715 bus_dmamap_unload(txr->hn_tx_data_dtag, 2716 txd->data_dmap); 2717 txd->flags &= ~HN_TXD_FLAG_DMAMAP; 2718 } 2719 2720 if (txd->m != NULL) { 2721 m_freem(txd->m); 2722 txd->m = NULL; 2723 } 2724 2725 txd->flags |= HN_TXD_FLAG_ONLIST; 2726 #ifndef HN_USE_TXDESC_BUFRING 2727 mtx_lock_spin(&txr->hn_txlist_spin); 2728 KASSERT(txr->hn_txdesc_avail >= 0 && 2729 txr->hn_txdesc_avail < txr->hn_txdesc_cnt, 2730 ("txdesc_put: invalid txd avail %d", txr->hn_txdesc_avail)); 2731 txr->hn_txdesc_avail++; 2732 SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link); 2733 mtx_unlock_spin(&txr->hn_txlist_spin); 2734 #else /* HN_USE_TXDESC_BUFRING */ 2735 #ifdef HN_DEBUG 2736 atomic_add_int(&txr->hn_txdesc_avail, 1); 2737 #endif 2738 buf_ring_enqueue(txr->hn_txdesc_br, txd); 2739 #endif /* !HN_USE_TXDESC_BUFRING */ 2740 2741 return 1; 2742 } 2743 2744 static __inline struct hn_txdesc * 2745 hn_txdesc_get(struct hn_tx_ring *txr) 2746 { 2747 struct hn_txdesc *txd; 2748 2749 #ifndef HN_USE_TXDESC_BUFRING 2750 mtx_lock_spin(&txr->hn_txlist_spin); 2751 txd = SLIST_FIRST(&txr->hn_txlist); 2752 if (txd != NULL) { 2753 KASSERT(txr->hn_txdesc_avail > 0, 2754 ("txdesc_get: invalid txd avail %d", txr->hn_txdesc_avail)); 2755 txr->hn_txdesc_avail--; 2756 SLIST_REMOVE_HEAD(&txr->hn_txlist, link); 2757 } 2758 mtx_unlock_spin(&txr->hn_txlist_spin); 2759 #else 2760 txd = buf_ring_dequeue_sc(txr->hn_txdesc_br); 2761 #endif 2762 2763 if (txd != NULL) { 2764 #ifdef HN_USE_TXDESC_BUFRING 2765 #ifdef HN_DEBUG 2766 atomic_subtract_int(&txr->hn_txdesc_avail, 1); 2767 #endif 2768 #endif /* HN_USE_TXDESC_BUFRING */ 2769 KASSERT(txd->m == NULL && txd->refs == 0 && 2770 STAILQ_EMPTY(&txd->agg_list) && 2771 txd->chim_index == HN_NVS_CHIM_IDX_INVALID && 2772 txd->chim_size == 0 && 2773 (txd->flags & HN_TXD_FLAG_ONLIST) && 2774 (txd->flags & HN_TXD_FLAG_ONAGG) == 0 && 2775 (txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("invalid txd")); 2776 txd->flags &= ~HN_TXD_FLAG_ONLIST; 2777 txd->refs = 1; 2778 } 2779 return txd; 2780 } 2781 2782 static __inline void 2783 hn_txdesc_hold(struct hn_txdesc *txd) 2784 { 2785 2786 /* 0->1 transition will never work */ 2787 KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs)); 2788 atomic_add_int(&txd->refs, 1); 2789 } 2790 2791 static __inline void 2792 hn_txdesc_agg(struct hn_txdesc *agg_txd, struct hn_txdesc *txd) 2793 { 2794 2795 KASSERT((agg_txd->flags & HN_TXD_FLAG_ONAGG) == 0, 2796 ("recursive aggregation on aggregating txdesc")); 2797 2798 KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0, 2799 ("already aggregated")); 2800 KASSERT(STAILQ_EMPTY(&txd->agg_list), 2801 ("recursive aggregation on to-be-aggregated txdesc")); 2802 2803 txd->flags |= HN_TXD_FLAG_ONAGG; 2804 STAILQ_INSERT_TAIL(&agg_txd->agg_list, txd, agg_link); 2805 } 2806 2807 static bool 2808 hn_tx_ring_pending(struct hn_tx_ring *txr) 2809 { 2810 bool pending = false; 2811 2812 #ifndef HN_USE_TXDESC_BUFRING 2813 mtx_lock_spin(&txr->hn_txlist_spin); 2814 if (txr->hn_txdesc_avail != txr->hn_txdesc_cnt) 2815 pending = true; 2816 mtx_unlock_spin(&txr->hn_txlist_spin); 2817 #else 2818 if (!buf_ring_full(txr->hn_txdesc_br)) 2819 pending = true; 2820 #endif 2821 return (pending); 2822 } 2823 2824 static __inline void 2825 hn_txeof(struct hn_tx_ring *txr) 2826 { 2827 txr->hn_has_txeof = 0; 2828 txr->hn_txeof(txr); 2829 } 2830 2831 static void 2832 hn_txpkt_done(struct hn_nvs_sendctx *sndc, struct hn_softc *sc, 2833 struct vmbus_channel *chan, const void *data __unused, int dlen __unused) 2834 { 2835 struct hn_txdesc *txd = sndc->hn_cbarg; 2836 struct hn_tx_ring *txr; 2837 2838 txr = txd->txr; 2839 KASSERT(txr->hn_chan == chan, 2840 ("channel mismatch, on chan%u, should be chan%u", 2841 vmbus_chan_id(chan), vmbus_chan_id(txr->hn_chan))); 2842 2843 txr->hn_has_txeof = 1; 2844 hn_txdesc_put(txr, txd); 2845 2846 ++txr->hn_txdone_cnt; 2847 if (txr->hn_txdone_cnt >= HN_EARLY_TXEOF_THRESH) { 2848 txr->hn_txdone_cnt = 0; 2849 if (txr->hn_oactive) 2850 hn_txeof(txr); 2851 } 2852 } 2853 2854 static void 2855 hn_chan_rollup(struct hn_rx_ring *rxr, struct hn_tx_ring *txr) 2856 { 2857 #if defined(INET) || defined(INET6) 2858 tcp_lro_flush_all(&rxr->hn_lro); 2859 #endif 2860 2861 /* 2862 * NOTE: 2863 * 'txr' could be NULL, if multiple channels and 2864 * ifnet.if_start method are enabled. 2865 */ 2866 if (txr == NULL || !txr->hn_has_txeof) 2867 return; 2868 2869 txr->hn_txdone_cnt = 0; 2870 hn_txeof(txr); 2871 } 2872 2873 static __inline uint32_t 2874 hn_rndis_pktmsg_offset(uint32_t ofs) 2875 { 2876 2877 KASSERT(ofs >= sizeof(struct rndis_packet_msg), 2878 ("invalid RNDIS packet msg offset %u", ofs)); 2879 return (ofs - __offsetof(struct rndis_packet_msg, rm_dataoffset)); 2880 } 2881 2882 static __inline void * 2883 hn_rndis_pktinfo_append(struct rndis_packet_msg *pkt, size_t pktsize, 2884 size_t pi_dlen, uint32_t pi_type) 2885 { 2886 const size_t pi_size = HN_RNDIS_PKTINFO_SIZE(pi_dlen); 2887 struct rndis_pktinfo *pi; 2888 2889 KASSERT((pi_size & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK) == 0, 2890 ("unaligned pktinfo size %zu, pktinfo dlen %zu", pi_size, pi_dlen)); 2891 2892 /* 2893 * Per-packet-info does not move; it only grows. 2894 * 2895 * NOTE: 2896 * rm_pktinfooffset in this phase counts from the beginning 2897 * of rndis_packet_msg. 2898 */ 2899 KASSERT(pkt->rm_pktinfooffset + pkt->rm_pktinfolen + pi_size <= pktsize, 2900 ("%u pktinfo overflows RNDIS packet msg", pi_type)); 2901 pi = (struct rndis_pktinfo *)((uint8_t *)pkt + pkt->rm_pktinfooffset + 2902 pkt->rm_pktinfolen); 2903 pkt->rm_pktinfolen += pi_size; 2904 2905 pi->rm_size = pi_size; 2906 pi->rm_type = pi_type; 2907 pi->rm_pktinfooffset = RNDIS_PKTINFO_OFFSET; 2908 2909 return (pi->rm_data); 2910 } 2911 2912 static __inline int 2913 hn_flush_txagg(struct ifnet *ifp, struct hn_tx_ring *txr) 2914 { 2915 struct hn_txdesc *txd; 2916 struct mbuf *m; 2917 int error, pkts; 2918 2919 txd = txr->hn_agg_txd; 2920 KASSERT(txd != NULL, ("no aggregate txdesc")); 2921 2922 /* 2923 * Since hn_txpkt() will reset this temporary stat, save 2924 * it now, so that oerrors can be updated properly, if 2925 * hn_txpkt() ever fails. 2926 */ 2927 pkts = txr->hn_stat_pkts; 2928 2929 /* 2930 * Since txd's mbuf will _not_ be freed upon hn_txpkt() 2931 * failure, save it for later freeing, if hn_txpkt() ever 2932 * fails. 2933 */ 2934 m = txd->m; 2935 error = hn_txpkt(ifp, txr, txd); 2936 if (__predict_false(error)) { 2937 /* txd is freed, but m is not. */ 2938 m_freem(m); 2939 2940 txr->hn_flush_failed++; 2941 if_inc_counter(ifp, IFCOUNTER_OERRORS, pkts); 2942 } 2943 2944 /* Reset all aggregation states. */ 2945 txr->hn_agg_txd = NULL; 2946 txr->hn_agg_szleft = 0; 2947 txr->hn_agg_pktleft = 0; 2948 txr->hn_agg_prevpkt = NULL; 2949 2950 return (error); 2951 } 2952 2953 static void * 2954 hn_try_txagg(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd, 2955 int pktsize) 2956 { 2957 void *chim; 2958 2959 if (txr->hn_agg_txd != NULL) { 2960 if (txr->hn_agg_pktleft >= 1 && txr->hn_agg_szleft > pktsize) { 2961 struct hn_txdesc *agg_txd = txr->hn_agg_txd; 2962 struct rndis_packet_msg *pkt = txr->hn_agg_prevpkt; 2963 int olen; 2964 2965 /* 2966 * Update the previous RNDIS packet's total length, 2967 * it can be increased due to the mandatory alignment 2968 * padding for this RNDIS packet. And update the 2969 * aggregating txdesc's chimney sending buffer size 2970 * accordingly. 2971 * 2972 * XXX 2973 * Zero-out the padding, as required by the RNDIS spec. 2974 */ 2975 olen = pkt->rm_len; 2976 pkt->rm_len = roundup2(olen, txr->hn_agg_align); 2977 agg_txd->chim_size += pkt->rm_len - olen; 2978 2979 /* Link this txdesc to the parent. */ 2980 hn_txdesc_agg(agg_txd, txd); 2981 2982 chim = (uint8_t *)pkt + pkt->rm_len; 2983 /* Save the current packet for later fixup. */ 2984 txr->hn_agg_prevpkt = chim; 2985 2986 txr->hn_agg_pktleft--; 2987 txr->hn_agg_szleft -= pktsize; 2988 if (txr->hn_agg_szleft <= 2989 HN_PKTSIZE_MIN(txr->hn_agg_align)) { 2990 /* 2991 * Probably can't aggregate more packets, 2992 * flush this aggregating txdesc proactively. 2993 */ 2994 txr->hn_agg_pktleft = 0; 2995 } 2996 /* Done! */ 2997 return (chim); 2998 } 2999 hn_flush_txagg(ifp, txr); 3000 } 3001 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc")); 3002 3003 txr->hn_tx_chimney_tried++; 3004 txd->chim_index = hn_chim_alloc(txr->hn_sc); 3005 if (txd->chim_index == HN_NVS_CHIM_IDX_INVALID) 3006 return (NULL); 3007 txr->hn_tx_chimney++; 3008 3009 chim = txr->hn_sc->hn_chim + 3010 (txd->chim_index * txr->hn_sc->hn_chim_szmax); 3011 3012 if (txr->hn_agg_pktmax > 1 && 3013 txr->hn_agg_szmax > pktsize + HN_PKTSIZE_MIN(txr->hn_agg_align)) { 3014 txr->hn_agg_txd = txd; 3015 txr->hn_agg_pktleft = txr->hn_agg_pktmax - 1; 3016 txr->hn_agg_szleft = txr->hn_agg_szmax - pktsize; 3017 txr->hn_agg_prevpkt = chim; 3018 } 3019 return (chim); 3020 } 3021 3022 /* 3023 * NOTE: 3024 * If this function fails, then both txd and m_head0 will be freed. 3025 */ 3026 static int 3027 hn_encap(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd, 3028 struct mbuf **m_head0) 3029 { 3030 bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX]; 3031 int error, nsegs, i; 3032 struct mbuf *m_head = *m_head0; 3033 struct rndis_packet_msg *pkt; 3034 uint32_t *pi_data; 3035 void *chim = NULL; 3036 int pkt_hlen, pkt_size; 3037 3038 pkt = txd->rndis_pkt; 3039 pkt_size = HN_PKTSIZE(m_head, txr->hn_agg_align); 3040 if (pkt_size < txr->hn_chim_size) { 3041 chim = hn_try_txagg(ifp, txr, txd, pkt_size); 3042 if (chim != NULL) 3043 pkt = chim; 3044 } else { 3045 if (txr->hn_agg_txd != NULL) 3046 hn_flush_txagg(ifp, txr); 3047 } 3048 3049 pkt->rm_type = REMOTE_NDIS_PACKET_MSG; 3050 pkt->rm_len = m_head->m_pkthdr.len; 3051 pkt->rm_dataoffset = 0; 3052 pkt->rm_datalen = m_head->m_pkthdr.len; 3053 pkt->rm_oobdataoffset = 0; 3054 pkt->rm_oobdatalen = 0; 3055 pkt->rm_oobdataelements = 0; 3056 pkt->rm_pktinfooffset = sizeof(*pkt); 3057 pkt->rm_pktinfolen = 0; 3058 pkt->rm_vchandle = 0; 3059 pkt->rm_reserved = 0; 3060 3061 if (txr->hn_tx_flags & HN_TX_FLAG_HASHVAL) { 3062 /* 3063 * Set the hash value for this packet, so that the host could 3064 * dispatch the TX done event for this packet back to this TX 3065 * ring's channel. 3066 */ 3067 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 3068 HN_NDIS_HASH_VALUE_SIZE, HN_NDIS_PKTINFO_TYPE_HASHVAL); 3069 *pi_data = txr->hn_tx_idx; 3070 } 3071 3072 if (m_head->m_flags & M_VLANTAG) { 3073 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 3074 NDIS_VLAN_INFO_SIZE, NDIS_PKTINFO_TYPE_VLAN); 3075 *pi_data = NDIS_VLAN_INFO_MAKE( 3076 EVL_VLANOFTAG(m_head->m_pkthdr.ether_vtag), 3077 EVL_PRIOFTAG(m_head->m_pkthdr.ether_vtag), 3078 EVL_CFIOFTAG(m_head->m_pkthdr.ether_vtag)); 3079 } 3080 3081 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 3082 #if defined(INET6) || defined(INET) 3083 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 3084 NDIS_LSO2_INFO_SIZE, NDIS_PKTINFO_TYPE_LSO); 3085 #ifdef INET 3086 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) { 3087 *pi_data = NDIS_LSO2_INFO_MAKEIPV4( 3088 m_head->m_pkthdr.l2hlen + m_head->m_pkthdr.l3hlen, 3089 m_head->m_pkthdr.tso_segsz); 3090 } 3091 #endif 3092 #if defined(INET6) && defined(INET) 3093 else 3094 #endif 3095 #ifdef INET6 3096 { 3097 *pi_data = NDIS_LSO2_INFO_MAKEIPV6( 3098 m_head->m_pkthdr.l2hlen + m_head->m_pkthdr.l3hlen, 3099 m_head->m_pkthdr.tso_segsz); 3100 } 3101 #endif 3102 #endif /* INET6 || INET */ 3103 } else if (m_head->m_pkthdr.csum_flags & txr->hn_csum_assist) { 3104 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 3105 NDIS_TXCSUM_INFO_SIZE, NDIS_PKTINFO_TYPE_CSUM); 3106 if (m_head->m_pkthdr.csum_flags & 3107 (CSUM_IP6_TCP | CSUM_IP6_UDP)) { 3108 *pi_data = NDIS_TXCSUM_INFO_IPV6; 3109 } else { 3110 *pi_data = NDIS_TXCSUM_INFO_IPV4; 3111 if (m_head->m_pkthdr.csum_flags & CSUM_IP) 3112 *pi_data |= NDIS_TXCSUM_INFO_IPCS; 3113 } 3114 3115 if (m_head->m_pkthdr.csum_flags & 3116 (CSUM_IP_TCP | CSUM_IP6_TCP)) { 3117 *pi_data |= NDIS_TXCSUM_INFO_MKTCPCS( 3118 m_head->m_pkthdr.l2hlen + m_head->m_pkthdr.l3hlen); 3119 } else if (m_head->m_pkthdr.csum_flags & 3120 (CSUM_IP_UDP | CSUM_IP6_UDP)) { 3121 *pi_data |= NDIS_TXCSUM_INFO_MKUDPCS( 3122 m_head->m_pkthdr.l2hlen + m_head->m_pkthdr.l3hlen); 3123 } 3124 } 3125 3126 pkt_hlen = pkt->rm_pktinfooffset + pkt->rm_pktinfolen; 3127 /* Fixup RNDIS packet message total length */ 3128 pkt->rm_len += pkt_hlen; 3129 /* Convert RNDIS packet message offsets */ 3130 pkt->rm_dataoffset = hn_rndis_pktmsg_offset(pkt_hlen); 3131 pkt->rm_pktinfooffset = hn_rndis_pktmsg_offset(pkt->rm_pktinfooffset); 3132 3133 /* 3134 * Fast path: Chimney sending. 3135 */ 3136 if (chim != NULL) { 3137 struct hn_txdesc *tgt_txd = txd; 3138 3139 if (txr->hn_agg_txd != NULL) { 3140 tgt_txd = txr->hn_agg_txd; 3141 #ifdef INVARIANTS 3142 *m_head0 = NULL; 3143 #endif 3144 } 3145 3146 KASSERT(pkt == chim, 3147 ("RNDIS pkt not in chimney sending buffer")); 3148 KASSERT(tgt_txd->chim_index != HN_NVS_CHIM_IDX_INVALID, 3149 ("chimney sending buffer is not used")); 3150 tgt_txd->chim_size += pkt->rm_len; 3151 3152 m_copydata(m_head, 0, m_head->m_pkthdr.len, 3153 ((uint8_t *)chim) + pkt_hlen); 3154 3155 txr->hn_gpa_cnt = 0; 3156 txr->hn_sendpkt = hn_txpkt_chim; 3157 goto done; 3158 } 3159 3160 KASSERT(txr->hn_agg_txd == NULL, ("aggregating sglist txdesc")); 3161 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID, 3162 ("chimney buffer is used")); 3163 KASSERT(pkt == txd->rndis_pkt, ("RNDIS pkt not in txdesc")); 3164 3165 error = hn_txdesc_dmamap_load(txr, txd, &m_head, segs, &nsegs); 3166 if (__predict_false(error)) { 3167 int freed; 3168 3169 /* 3170 * This mbuf is not linked w/ the txd yet, so free it now. 3171 */ 3172 m_freem(m_head); 3173 *m_head0 = NULL; 3174 3175 freed = hn_txdesc_put(txr, txd); 3176 KASSERT(freed != 0, 3177 ("fail to free txd upon txdma error")); 3178 3179 txr->hn_txdma_failed++; 3180 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 3181 return error; 3182 } 3183 *m_head0 = m_head; 3184 3185 /* +1 RNDIS packet message */ 3186 txr->hn_gpa_cnt = nsegs + 1; 3187 3188 /* send packet with page buffer */ 3189 txr->hn_gpa[0].gpa_page = atop(txd->rndis_pkt_paddr); 3190 txr->hn_gpa[0].gpa_ofs = txd->rndis_pkt_paddr & PAGE_MASK; 3191 txr->hn_gpa[0].gpa_len = pkt_hlen; 3192 3193 /* 3194 * Fill the page buffers with mbuf info after the page 3195 * buffer for RNDIS packet message. 3196 */ 3197 for (i = 0; i < nsegs; ++i) { 3198 struct vmbus_gpa *gpa = &txr->hn_gpa[i + 1]; 3199 3200 gpa->gpa_page = atop(segs[i].ds_addr); 3201 gpa->gpa_ofs = segs[i].ds_addr & PAGE_MASK; 3202 gpa->gpa_len = segs[i].ds_len; 3203 } 3204 3205 txd->chim_index = HN_NVS_CHIM_IDX_INVALID; 3206 txd->chim_size = 0; 3207 txr->hn_sendpkt = hn_txpkt_sglist; 3208 done: 3209 txd->m = m_head; 3210 3211 /* Set the completion routine */ 3212 hn_nvs_sendctx_init(&txd->send_ctx, hn_txpkt_done, txd); 3213 3214 /* Update temporary stats for later use. */ 3215 txr->hn_stat_pkts++; 3216 txr->hn_stat_size += m_head->m_pkthdr.len; 3217 if (m_head->m_flags & M_MCAST) 3218 txr->hn_stat_mcasts++; 3219 3220 return 0; 3221 } 3222 3223 /* 3224 * NOTE: 3225 * If this function fails, then txd will be freed, but the mbuf 3226 * associated w/ the txd will _not_ be freed. 3227 */ 3228 static int 3229 hn_txpkt(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd) 3230 { 3231 int error, send_failed = 0, has_bpf; 3232 3233 again: 3234 has_bpf = bpf_peers_present(ifp->if_bpf); 3235 if (has_bpf) { 3236 /* 3237 * Make sure that this txd and any aggregated txds are not 3238 * freed before ETHER_BPF_MTAP. 3239 */ 3240 hn_txdesc_hold(txd); 3241 } 3242 error = txr->hn_sendpkt(txr, txd); 3243 if (!error) { 3244 if (has_bpf) { 3245 const struct hn_txdesc *tmp_txd; 3246 3247 ETHER_BPF_MTAP(ifp, txd->m); 3248 STAILQ_FOREACH(tmp_txd, &txd->agg_list, agg_link) 3249 ETHER_BPF_MTAP(ifp, tmp_txd->m); 3250 } 3251 3252 if_inc_counter(ifp, IFCOUNTER_OPACKETS, txr->hn_stat_pkts); 3253 #ifdef HN_IFSTART_SUPPORT 3254 if (!hn_use_if_start) 3255 #endif 3256 { 3257 if_inc_counter(ifp, IFCOUNTER_OBYTES, 3258 txr->hn_stat_size); 3259 if (txr->hn_stat_mcasts != 0) { 3260 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 3261 txr->hn_stat_mcasts); 3262 } 3263 } 3264 txr->hn_pkts += txr->hn_stat_pkts; 3265 txr->hn_sends++; 3266 } 3267 if (has_bpf) 3268 hn_txdesc_put(txr, txd); 3269 3270 if (__predict_false(error)) { 3271 int freed; 3272 3273 /* 3274 * This should "really rarely" happen. 3275 * 3276 * XXX Too many RX to be acked or too many sideband 3277 * commands to run? Ask netvsc_channel_rollup() 3278 * to kick start later. 3279 */ 3280 txr->hn_has_txeof = 1; 3281 if (!send_failed) { 3282 txr->hn_send_failed++; 3283 send_failed = 1; 3284 /* 3285 * Try sending again after set hn_has_txeof; 3286 * in case that we missed the last 3287 * netvsc_channel_rollup(). 3288 */ 3289 goto again; 3290 } 3291 if_printf(ifp, "send failed\n"); 3292 3293 /* 3294 * Caller will perform further processing on the 3295 * associated mbuf, so don't free it in hn_txdesc_put(); 3296 * only unload it from the DMA map in hn_txdesc_put(), 3297 * if it was loaded. 3298 */ 3299 txd->m = NULL; 3300 freed = hn_txdesc_put(txr, txd); 3301 KASSERT(freed != 0, 3302 ("fail to free txd upon send error")); 3303 3304 txr->hn_send_failed++; 3305 } 3306 3307 /* Reset temporary stats, after this sending is done. */ 3308 txr->hn_stat_size = 0; 3309 txr->hn_stat_pkts = 0; 3310 txr->hn_stat_mcasts = 0; 3311 3312 return (error); 3313 } 3314 3315 /* 3316 * Append the specified data to the indicated mbuf chain, 3317 * Extend the mbuf chain if the new data does not fit in 3318 * existing space. 3319 * 3320 * This is a minor rewrite of m_append() from sys/kern/uipc_mbuf.c. 3321 * There should be an equivalent in the kernel mbuf code, 3322 * but there does not appear to be one yet. 3323 * 3324 * Differs from m_append() in that additional mbufs are 3325 * allocated with cluster size MJUMPAGESIZE, and filled 3326 * accordingly. 3327 * 3328 * Return 1 if able to complete the job; otherwise 0. 3329 */ 3330 static int 3331 hv_m_append(struct mbuf *m0, int len, c_caddr_t cp) 3332 { 3333 struct mbuf *m, *n; 3334 int remainder, space; 3335 3336 for (m = m0; m->m_next != NULL; m = m->m_next) 3337 ; 3338 remainder = len; 3339 space = M_TRAILINGSPACE(m); 3340 if (space > 0) { 3341 /* 3342 * Copy into available space. 3343 */ 3344 if (space > remainder) 3345 space = remainder; 3346 bcopy(cp, mtod(m, caddr_t) + m->m_len, space); 3347 m->m_len += space; 3348 cp += space; 3349 remainder -= space; 3350 } 3351 while (remainder > 0) { 3352 /* 3353 * Allocate a new mbuf; could check space 3354 * and allocate a cluster instead. 3355 */ 3356 n = m_getjcl(M_NOWAIT, m->m_type, 0, MJUMPAGESIZE); 3357 if (n == NULL) 3358 break; 3359 n->m_len = min(MJUMPAGESIZE, remainder); 3360 bcopy(cp, mtod(n, caddr_t), n->m_len); 3361 cp += n->m_len; 3362 remainder -= n->m_len; 3363 m->m_next = n; 3364 m = n; 3365 } 3366 if (m0->m_flags & M_PKTHDR) 3367 m0->m_pkthdr.len += len - remainder; 3368 3369 return (remainder == 0); 3370 } 3371 3372 #if defined(INET) || defined(INET6) 3373 static __inline int 3374 hn_lro_rx(struct lro_ctrl *lc, struct mbuf *m) 3375 { 3376 #if __FreeBSD_version >= 1100095 3377 if (hn_lro_mbufq_depth) { 3378 tcp_lro_queue_mbuf(lc, m); 3379 return 0; 3380 } 3381 #endif 3382 return tcp_lro_rx(lc, m, 0); 3383 } 3384 #endif 3385 3386 static int 3387 hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen, 3388 const struct hn_rxinfo *info) 3389 { 3390 struct ifnet *ifp, *hn_ifp = rxr->hn_ifp; 3391 struct mbuf *m_new; 3392 int size, do_lro = 0, do_csum = 1, is_vf = 0; 3393 int hash_type = M_HASHTYPE_NONE; 3394 int l3proto = ETHERTYPE_MAX, l4proto = IPPROTO_DONE; 3395 3396 ifp = hn_ifp; 3397 if (rxr->hn_rxvf_ifp != NULL) { 3398 /* 3399 * Non-transparent mode VF; pretend this packet is from 3400 * the VF. 3401 */ 3402 ifp = rxr->hn_rxvf_ifp; 3403 is_vf = 1; 3404 } else if (rxr->hn_rx_flags & HN_RX_FLAG_XPNT_VF) { 3405 /* Transparent mode VF. */ 3406 is_vf = 1; 3407 } 3408 3409 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 3410 /* 3411 * NOTE: 3412 * See the NOTE of hn_rndis_init_fixat(). This 3413 * function can be reached, immediately after the 3414 * RNDIS is initialized but before the ifnet is 3415 * setup on the hn_attach() path; drop the unexpected 3416 * packets. 3417 */ 3418 return (0); 3419 } 3420 3421 if (__predict_false(dlen < ETHER_HDR_LEN)) { 3422 if_inc_counter(hn_ifp, IFCOUNTER_IERRORS, 1); 3423 return (0); 3424 } 3425 3426 if (dlen <= MHLEN) { 3427 m_new = m_gethdr(M_NOWAIT, MT_DATA); 3428 if (m_new == NULL) { 3429 if_inc_counter(hn_ifp, IFCOUNTER_IQDROPS, 1); 3430 return (0); 3431 } 3432 memcpy(mtod(m_new, void *), data, dlen); 3433 m_new->m_pkthdr.len = m_new->m_len = dlen; 3434 rxr->hn_small_pkts++; 3435 } else { 3436 /* 3437 * Get an mbuf with a cluster. For packets 2K or less, 3438 * get a standard 2K cluster. For anything larger, get a 3439 * 4K cluster. Any buffers larger than 4K can cause problems 3440 * if looped around to the Hyper-V TX channel, so avoid them. 3441 */ 3442 size = MCLBYTES; 3443 if (dlen > MCLBYTES) { 3444 /* 4096 */ 3445 size = MJUMPAGESIZE; 3446 } 3447 3448 m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size); 3449 if (m_new == NULL) { 3450 if_inc_counter(hn_ifp, IFCOUNTER_IQDROPS, 1); 3451 return (0); 3452 } 3453 3454 hv_m_append(m_new, dlen, data); 3455 } 3456 m_new->m_pkthdr.rcvif = ifp; 3457 3458 if (__predict_false((hn_ifp->if_capenable & IFCAP_RXCSUM) == 0)) 3459 do_csum = 0; 3460 3461 /* receive side checksum offload */ 3462 if (info->csum_info != HN_NDIS_RXCSUM_INFO_INVALID) { 3463 /* IP csum offload */ 3464 if ((info->csum_info & NDIS_RXCSUM_INFO_IPCS_OK) && do_csum) { 3465 m_new->m_pkthdr.csum_flags |= 3466 (CSUM_IP_CHECKED | CSUM_IP_VALID); 3467 rxr->hn_csum_ip++; 3468 } 3469 3470 /* TCP/UDP csum offload */ 3471 if ((info->csum_info & (NDIS_RXCSUM_INFO_UDPCS_OK | 3472 NDIS_RXCSUM_INFO_TCPCS_OK)) && do_csum) { 3473 m_new->m_pkthdr.csum_flags |= 3474 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 3475 m_new->m_pkthdr.csum_data = 0xffff; 3476 if (info->csum_info & NDIS_RXCSUM_INFO_TCPCS_OK) 3477 rxr->hn_csum_tcp++; 3478 else 3479 rxr->hn_csum_udp++; 3480 } 3481 3482 /* 3483 * XXX 3484 * As of this write (Oct 28th, 2016), host side will turn 3485 * on only TCPCS_OK and IPCS_OK even for UDP datagrams, so 3486 * the do_lro setting here is actually _not_ accurate. We 3487 * depend on the RSS hash type check to reset do_lro. 3488 */ 3489 if ((info->csum_info & 3490 (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) == 3491 (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) 3492 do_lro = 1; 3493 } else { 3494 hn_rxpkt_proto(m_new, &l3proto, &l4proto); 3495 if (l3proto == ETHERTYPE_IP) { 3496 if (l4proto == IPPROTO_TCP) { 3497 if (do_csum && 3498 (rxr->hn_trust_hcsum & 3499 HN_TRUST_HCSUM_TCP)) { 3500 rxr->hn_csum_trusted++; 3501 m_new->m_pkthdr.csum_flags |= 3502 (CSUM_IP_CHECKED | CSUM_IP_VALID | 3503 CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 3504 m_new->m_pkthdr.csum_data = 0xffff; 3505 } 3506 do_lro = 1; 3507 } else if (l4proto == IPPROTO_UDP) { 3508 if (do_csum && 3509 (rxr->hn_trust_hcsum & 3510 HN_TRUST_HCSUM_UDP)) { 3511 rxr->hn_csum_trusted++; 3512 m_new->m_pkthdr.csum_flags |= 3513 (CSUM_IP_CHECKED | CSUM_IP_VALID | 3514 CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 3515 m_new->m_pkthdr.csum_data = 0xffff; 3516 } 3517 } else if (l4proto != IPPROTO_DONE && do_csum && 3518 (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_IP)) { 3519 rxr->hn_csum_trusted++; 3520 m_new->m_pkthdr.csum_flags |= 3521 (CSUM_IP_CHECKED | CSUM_IP_VALID); 3522 } 3523 } 3524 } 3525 3526 if (info->vlan_info != HN_NDIS_VLAN_INFO_INVALID) { 3527 m_new->m_pkthdr.ether_vtag = EVL_MAKETAG( 3528 NDIS_VLAN_INFO_ID(info->vlan_info), 3529 NDIS_VLAN_INFO_PRI(info->vlan_info), 3530 NDIS_VLAN_INFO_CFI(info->vlan_info)); 3531 m_new->m_flags |= M_VLANTAG; 3532 } 3533 3534 /* 3535 * If VF is activated (tranparent/non-transparent mode does not 3536 * matter here). 3537 * 3538 * - Disable LRO 3539 * 3540 * hn(4) will only receive broadcast packets, multicast packets, 3541 * TCP SYN and SYN|ACK (in Azure), LRO is useless for these 3542 * packet types. 3543 * 3544 * For non-transparent, we definitely _cannot_ enable LRO at 3545 * all, since the LRO flush will use hn(4) as the receiving 3546 * interface; i.e. hn_ifp->if_input(hn_ifp, m). 3547 */ 3548 if (is_vf) 3549 do_lro = 0; 3550 3551 /* 3552 * If VF is activated (tranparent/non-transparent mode does not 3553 * matter here), do _not_ mess with unsupported hash types or 3554 * functions. 3555 */ 3556 if (info->hash_info != HN_NDIS_HASH_INFO_INVALID) { 3557 rxr->hn_rss_pkts++; 3558 m_new->m_pkthdr.flowid = info->hash_value; 3559 if (!is_vf) 3560 hash_type = M_HASHTYPE_OPAQUE_HASH; 3561 if ((info->hash_info & NDIS_HASH_FUNCTION_MASK) == 3562 NDIS_HASH_FUNCTION_TOEPLITZ) { 3563 uint32_t type = (info->hash_info & NDIS_HASH_TYPE_MASK & 3564 rxr->hn_mbuf_hash); 3565 3566 /* 3567 * NOTE: 3568 * do_lro is resetted, if the hash types are not TCP 3569 * related. See the comment in the above csum_flags 3570 * setup section. 3571 */ 3572 switch (type) { 3573 case NDIS_HASH_IPV4: 3574 hash_type = M_HASHTYPE_RSS_IPV4; 3575 do_lro = 0; 3576 break; 3577 3578 case NDIS_HASH_TCP_IPV4: 3579 hash_type = M_HASHTYPE_RSS_TCP_IPV4; 3580 if (rxr->hn_rx_flags & HN_RX_FLAG_UDP_HASH) { 3581 int def_htype = M_HASHTYPE_OPAQUE_HASH; 3582 3583 if (is_vf) 3584 def_htype = M_HASHTYPE_NONE; 3585 3586 /* 3587 * UDP 4-tuple hash is delivered as 3588 * TCP 4-tuple hash. 3589 */ 3590 if (l3proto == ETHERTYPE_MAX) { 3591 hn_rxpkt_proto(m_new, 3592 &l3proto, &l4proto); 3593 } 3594 if (l3proto == ETHERTYPE_IP) { 3595 if (l4proto == IPPROTO_UDP && 3596 (rxr->hn_mbuf_hash & 3597 NDIS_HASH_UDP_IPV4_X)) { 3598 hash_type = 3599 M_HASHTYPE_RSS_UDP_IPV4; 3600 do_lro = 0; 3601 } else if (l4proto != 3602 IPPROTO_TCP) { 3603 hash_type = def_htype; 3604 do_lro = 0; 3605 } 3606 } else { 3607 hash_type = def_htype; 3608 do_lro = 0; 3609 } 3610 } 3611 break; 3612 3613 case NDIS_HASH_IPV6: 3614 hash_type = M_HASHTYPE_RSS_IPV6; 3615 do_lro = 0; 3616 break; 3617 3618 case NDIS_HASH_IPV6_EX: 3619 hash_type = M_HASHTYPE_RSS_IPV6_EX; 3620 do_lro = 0; 3621 break; 3622 3623 case NDIS_HASH_TCP_IPV6: 3624 hash_type = M_HASHTYPE_RSS_TCP_IPV6; 3625 break; 3626 3627 case NDIS_HASH_TCP_IPV6_EX: 3628 hash_type = M_HASHTYPE_RSS_TCP_IPV6_EX; 3629 break; 3630 } 3631 } 3632 } else if (!is_vf) { 3633 m_new->m_pkthdr.flowid = rxr->hn_rx_idx; 3634 hash_type = M_HASHTYPE_OPAQUE; 3635 } 3636 M_HASHTYPE_SET(m_new, hash_type); 3637 3638 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); 3639 if (hn_ifp != ifp) { 3640 const struct ether_header *eh; 3641 3642 /* 3643 * Non-transparent mode VF is activated. 3644 */ 3645 3646 /* 3647 * Allow tapping on hn(4). 3648 */ 3649 ETHER_BPF_MTAP(hn_ifp, m_new); 3650 3651 /* 3652 * Update hn(4)'s stats. 3653 */ 3654 if_inc_counter(hn_ifp, IFCOUNTER_IPACKETS, 1); 3655 if_inc_counter(hn_ifp, IFCOUNTER_IBYTES, m_new->m_pkthdr.len); 3656 /* Checked at the beginning of this function. */ 3657 KASSERT(m_new->m_len >= ETHER_HDR_LEN, ("not ethernet frame")); 3658 eh = mtod(m_new, struct ether_header *); 3659 if (ETHER_IS_MULTICAST(eh->ether_dhost)) 3660 if_inc_counter(hn_ifp, IFCOUNTER_IMCASTS, 1); 3661 } 3662 rxr->hn_pkts++; 3663 3664 if ((hn_ifp->if_capenable & IFCAP_LRO) && do_lro) { 3665 #if defined(INET) || defined(INET6) 3666 struct lro_ctrl *lro = &rxr->hn_lro; 3667 3668 if (lro->lro_cnt) { 3669 rxr->hn_lro_tried++; 3670 if (hn_lro_rx(lro, m_new) == 0) { 3671 /* DONE! */ 3672 return 0; 3673 } 3674 } 3675 #endif 3676 } 3677 ifp->if_input(ifp, m_new); 3678 3679 return (0); 3680 } 3681 3682 static int 3683 hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 3684 { 3685 struct hn_softc *sc = ifp->if_softc; 3686 struct ifreq *ifr = (struct ifreq *)data, ifr_vf; 3687 struct ifnet *vf_ifp; 3688 int mask, error = 0; 3689 struct ifrsskey *ifrk; 3690 struct ifrsshash *ifrh; 3691 uint32_t mtu; 3692 3693 switch (cmd) { 3694 case SIOCSIFMTU: 3695 if (ifr->ifr_mtu > HN_MTU_MAX) { 3696 error = EINVAL; 3697 break; 3698 } 3699 3700 HN_LOCK(sc); 3701 3702 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) { 3703 HN_UNLOCK(sc); 3704 break; 3705 } 3706 3707 if ((sc->hn_caps & HN_CAP_MTU) == 0) { 3708 /* Can't change MTU */ 3709 HN_UNLOCK(sc); 3710 error = EOPNOTSUPP; 3711 break; 3712 } 3713 3714 if (ifp->if_mtu == ifr->ifr_mtu) { 3715 HN_UNLOCK(sc); 3716 break; 3717 } 3718 3719 if (hn_xpnt_vf_isready(sc)) { 3720 vf_ifp = sc->hn_vf_ifp; 3721 ifr_vf = *ifr; 3722 strlcpy(ifr_vf.ifr_name, vf_ifp->if_xname, 3723 sizeof(ifr_vf.ifr_name)); 3724 error = vf_ifp->if_ioctl(vf_ifp, SIOCSIFMTU, 3725 (caddr_t)&ifr_vf); 3726 if (error) { 3727 HN_UNLOCK(sc); 3728 if_printf(ifp, "%s SIOCSIFMTU %d failed: %d\n", 3729 vf_ifp->if_xname, ifr->ifr_mtu, error); 3730 break; 3731 } 3732 } 3733 3734 /* 3735 * Suspend this interface before the synthetic parts 3736 * are ripped. 3737 */ 3738 hn_suspend(sc); 3739 3740 /* 3741 * Detach the synthetics parts, i.e. NVS and RNDIS. 3742 */ 3743 hn_synth_detach(sc); 3744 3745 /* 3746 * Reattach the synthetic parts, i.e. NVS and RNDIS, 3747 * with the new MTU setting. 3748 */ 3749 error = hn_synth_attach(sc, ifr->ifr_mtu); 3750 if (error) { 3751 HN_UNLOCK(sc); 3752 break; 3753 } 3754 3755 error = hn_rndis_get_mtu(sc, &mtu); 3756 if (error) 3757 mtu = ifr->ifr_mtu; 3758 else if (bootverbose) 3759 if_printf(ifp, "RNDIS mtu %u\n", mtu); 3760 3761 /* 3762 * Commit the requested MTU, after the synthetic parts 3763 * have been successfully attached. 3764 */ 3765 if (mtu >= ifr->ifr_mtu) { 3766 mtu = ifr->ifr_mtu; 3767 } else { 3768 if_printf(ifp, "fixup mtu %d -> %u\n", 3769 ifr->ifr_mtu, mtu); 3770 } 3771 ifp->if_mtu = mtu; 3772 3773 /* 3774 * Synthetic parts' reattach may change the chimney 3775 * sending size; update it. 3776 */ 3777 if (sc->hn_tx_ring[0].hn_chim_size > sc->hn_chim_szmax) 3778 hn_set_chim_size(sc, sc->hn_chim_szmax); 3779 3780 /* 3781 * Make sure that various parameters based on MTU are 3782 * still valid, after the MTU change. 3783 */ 3784 hn_mtu_change_fixup(sc); 3785 3786 /* 3787 * All done! Resume the interface now. 3788 */ 3789 hn_resume(sc); 3790 3791 if ((sc->hn_flags & HN_FLAG_RXVF) || 3792 (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) { 3793 /* 3794 * Since we have reattached the NVS part, 3795 * change the datapath to VF again; in case 3796 * that it is lost, after the NVS was detached. 3797 */ 3798 hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_VF); 3799 } 3800 3801 HN_UNLOCK(sc); 3802 break; 3803 3804 case SIOCSIFFLAGS: 3805 HN_LOCK(sc); 3806 3807 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) { 3808 HN_UNLOCK(sc); 3809 break; 3810 } 3811 3812 if (hn_xpnt_vf_isready(sc)) 3813 hn_xpnt_vf_saveifflags(sc); 3814 3815 if (ifp->if_flags & IFF_UP) { 3816 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 3817 /* 3818 * Caller meight hold mutex, e.g. 3819 * bpf; use busy-wait for the RNDIS 3820 * reply. 3821 */ 3822 HN_NO_SLEEPING(sc); 3823 hn_rxfilter_config(sc); 3824 HN_SLEEPING_OK(sc); 3825 3826 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) 3827 error = hn_xpnt_vf_iocsetflags(sc); 3828 } else { 3829 hn_init_locked(sc); 3830 } 3831 } else { 3832 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 3833 hn_stop(sc, false); 3834 } 3835 sc->hn_if_flags = ifp->if_flags; 3836 3837 HN_UNLOCK(sc); 3838 break; 3839 3840 case SIOCSIFCAP: 3841 HN_LOCK(sc); 3842 3843 if (hn_xpnt_vf_isready(sc)) { 3844 ifr_vf = *ifr; 3845 strlcpy(ifr_vf.ifr_name, sc->hn_vf_ifp->if_xname, 3846 sizeof(ifr_vf.ifr_name)); 3847 error = hn_xpnt_vf_iocsetcaps(sc, &ifr_vf); 3848 HN_UNLOCK(sc); 3849 break; 3850 } 3851 3852 /* 3853 * Fix up requested capabilities w/ supported capabilities, 3854 * since the supported capabilities could have been changed. 3855 */ 3856 mask = (ifr->ifr_reqcap & ifp->if_capabilities) ^ 3857 ifp->if_capenable; 3858 3859 if (mask & IFCAP_TXCSUM) { 3860 ifp->if_capenable ^= IFCAP_TXCSUM; 3861 if (ifp->if_capenable & IFCAP_TXCSUM) 3862 ifp->if_hwassist |= HN_CSUM_IP_HWASSIST(sc); 3863 else 3864 ifp->if_hwassist &= ~HN_CSUM_IP_HWASSIST(sc); 3865 } 3866 if (mask & IFCAP_TXCSUM_IPV6) { 3867 ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; 3868 if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) 3869 ifp->if_hwassist |= HN_CSUM_IP6_HWASSIST(sc); 3870 else 3871 ifp->if_hwassist &= ~HN_CSUM_IP6_HWASSIST(sc); 3872 } 3873 3874 /* TODO: flip RNDIS offload parameters for RXCSUM. */ 3875 if (mask & IFCAP_RXCSUM) 3876 ifp->if_capenable ^= IFCAP_RXCSUM; 3877 #ifdef foo 3878 /* We can't diff IPv6 packets from IPv4 packets on RX path. */ 3879 if (mask & IFCAP_RXCSUM_IPV6) 3880 ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; 3881 #endif 3882 3883 if (mask & IFCAP_LRO) 3884 ifp->if_capenable ^= IFCAP_LRO; 3885 3886 if (mask & IFCAP_TSO4) { 3887 ifp->if_capenable ^= IFCAP_TSO4; 3888 if (ifp->if_capenable & IFCAP_TSO4) 3889 ifp->if_hwassist |= CSUM_IP_TSO; 3890 else 3891 ifp->if_hwassist &= ~CSUM_IP_TSO; 3892 } 3893 if (mask & IFCAP_TSO6) { 3894 ifp->if_capenable ^= IFCAP_TSO6; 3895 if (ifp->if_capenable & IFCAP_TSO6) 3896 ifp->if_hwassist |= CSUM_IP6_TSO; 3897 else 3898 ifp->if_hwassist &= ~CSUM_IP6_TSO; 3899 } 3900 3901 HN_UNLOCK(sc); 3902 break; 3903 3904 case SIOCADDMULTI: 3905 case SIOCDELMULTI: 3906 HN_LOCK(sc); 3907 3908 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) { 3909 HN_UNLOCK(sc); 3910 break; 3911 } 3912 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 3913 /* 3914 * Multicast uses mutex; use busy-wait for 3915 * the RNDIS reply. 3916 */ 3917 HN_NO_SLEEPING(sc); 3918 hn_rxfilter_config(sc); 3919 HN_SLEEPING_OK(sc); 3920 } 3921 3922 /* XXX vlan(4) style mcast addr maintenance */ 3923 if (hn_xpnt_vf_isready(sc)) { 3924 int old_if_flags; 3925 3926 old_if_flags = sc->hn_vf_ifp->if_flags; 3927 hn_xpnt_vf_saveifflags(sc); 3928 3929 if ((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) && 3930 ((old_if_flags ^ sc->hn_vf_ifp->if_flags) & 3931 IFF_ALLMULTI)) 3932 error = hn_xpnt_vf_iocsetflags(sc); 3933 } 3934 3935 HN_UNLOCK(sc); 3936 break; 3937 3938 case SIOCSIFMEDIA: 3939 case SIOCGIFMEDIA: 3940 HN_LOCK(sc); 3941 if (hn_xpnt_vf_isready(sc)) { 3942 /* 3943 * SIOCGIFMEDIA expects ifmediareq, so don't 3944 * create and pass ifr_vf to the VF here; just 3945 * replace the ifr_name. 3946 */ 3947 vf_ifp = sc->hn_vf_ifp; 3948 strlcpy(ifr->ifr_name, vf_ifp->if_xname, 3949 sizeof(ifr->ifr_name)); 3950 error = vf_ifp->if_ioctl(vf_ifp, cmd, data); 3951 /* Restore the ifr_name. */ 3952 strlcpy(ifr->ifr_name, ifp->if_xname, 3953 sizeof(ifr->ifr_name)); 3954 HN_UNLOCK(sc); 3955 break; 3956 } 3957 HN_UNLOCK(sc); 3958 error = ifmedia_ioctl(ifp, ifr, &sc->hn_media, cmd); 3959 break; 3960 3961 case SIOCGIFRSSHASH: 3962 ifrh = (struct ifrsshash *)data; 3963 HN_LOCK(sc); 3964 if (sc->hn_rx_ring_inuse == 1) { 3965 HN_UNLOCK(sc); 3966 ifrh->ifrh_func = RSS_FUNC_NONE; 3967 ifrh->ifrh_types = 0; 3968 break; 3969 } 3970 3971 if (sc->hn_rss_hash & NDIS_HASH_FUNCTION_TOEPLITZ) 3972 ifrh->ifrh_func = RSS_FUNC_TOEPLITZ; 3973 else 3974 ifrh->ifrh_func = RSS_FUNC_PRIVATE; 3975 ifrh->ifrh_types = hn_rss_type_fromndis(sc->hn_rss_hash); 3976 HN_UNLOCK(sc); 3977 break; 3978 3979 case SIOCGIFRSSKEY: 3980 ifrk = (struct ifrsskey *)data; 3981 HN_LOCK(sc); 3982 if (sc->hn_rx_ring_inuse == 1) { 3983 HN_UNLOCK(sc); 3984 ifrk->ifrk_func = RSS_FUNC_NONE; 3985 ifrk->ifrk_keylen = 0; 3986 break; 3987 } 3988 if (sc->hn_rss_hash & NDIS_HASH_FUNCTION_TOEPLITZ) 3989 ifrk->ifrk_func = RSS_FUNC_TOEPLITZ; 3990 else 3991 ifrk->ifrk_func = RSS_FUNC_PRIVATE; 3992 ifrk->ifrk_keylen = NDIS_HASH_KEYSIZE_TOEPLITZ; 3993 memcpy(ifrk->ifrk_key, sc->hn_rss.rss_key, 3994 NDIS_HASH_KEYSIZE_TOEPLITZ); 3995 HN_UNLOCK(sc); 3996 break; 3997 3998 default: 3999 error = ether_ioctl(ifp, cmd, data); 4000 break; 4001 } 4002 return (error); 4003 } 4004 4005 static void 4006 hn_stop(struct hn_softc *sc, bool detaching) 4007 { 4008 struct ifnet *ifp = sc->hn_ifp; 4009 int i; 4010 4011 HN_LOCK_ASSERT(sc); 4012 4013 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, 4014 ("synthetic parts were not attached")); 4015 4016 /* Clear RUNNING bit ASAP. */ 4017 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_RUNNING); 4018 4019 /* Disable polling. */ 4020 hn_polling(sc, 0); 4021 4022 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) { 4023 KASSERT(sc->hn_vf_ifp != NULL, 4024 ("%s: VF is not attached", ifp->if_xname)); 4025 4026 /* Mark transparent mode VF as disabled. */ 4027 hn_xpnt_vf_setdisable(sc, false /* keep hn_vf_ifp */); 4028 4029 /* 4030 * NOTE: 4031 * Datapath setting must happen _before_ bringing 4032 * the VF down. 4033 */ 4034 hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_SYNTH); 4035 4036 /* 4037 * Bring the VF down. 4038 */ 4039 hn_xpnt_vf_saveifflags(sc); 4040 sc->hn_vf_ifp->if_flags &= ~IFF_UP; 4041 hn_xpnt_vf_iocsetflags(sc); 4042 } 4043 4044 /* Suspend data transfers. */ 4045 hn_suspend_data(sc); 4046 4047 /* Clear OACTIVE bit. */ 4048 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 4049 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) 4050 sc->hn_tx_ring[i].hn_oactive = 0; 4051 4052 /* 4053 * If the non-transparent mode VF is active, make sure 4054 * that the RX filter still allows packet reception. 4055 */ 4056 if (!detaching && (sc->hn_flags & HN_FLAG_RXVF)) 4057 hn_rxfilter_config(sc); 4058 } 4059 4060 static void 4061 hn_init_locked(struct hn_softc *sc) 4062 { 4063 struct ifnet *ifp = sc->hn_ifp; 4064 int i; 4065 4066 HN_LOCK_ASSERT(sc); 4067 4068 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) 4069 return; 4070 4071 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 4072 return; 4073 4074 /* Configure RX filter */ 4075 hn_rxfilter_config(sc); 4076 4077 /* Clear OACTIVE bit. */ 4078 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 4079 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) 4080 sc->hn_tx_ring[i].hn_oactive = 0; 4081 4082 /* Clear TX 'suspended' bit. */ 4083 hn_resume_tx(sc, sc->hn_tx_ring_inuse); 4084 4085 if (hn_xpnt_vf_isready(sc)) { 4086 /* Initialize transparent VF. */ 4087 hn_xpnt_vf_init(sc); 4088 } 4089 4090 /* Everything is ready; unleash! */ 4091 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_RUNNING); 4092 4093 /* Re-enable polling if requested. */ 4094 if (sc->hn_pollhz > 0) 4095 hn_polling(sc, sc->hn_pollhz); 4096 } 4097 4098 static void 4099 hn_init(void *xsc) 4100 { 4101 struct hn_softc *sc = xsc; 4102 4103 HN_LOCK(sc); 4104 hn_init_locked(sc); 4105 HN_UNLOCK(sc); 4106 } 4107 4108 #if __FreeBSD_version >= 1100099 4109 4110 static int 4111 hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS) 4112 { 4113 struct hn_softc *sc = arg1; 4114 unsigned int lenlim; 4115 int error; 4116 4117 lenlim = sc->hn_rx_ring[0].hn_lro.lro_length_lim; 4118 error = sysctl_handle_int(oidp, &lenlim, 0, req); 4119 if (error || req->newptr == NULL) 4120 return error; 4121 4122 HN_LOCK(sc); 4123 if (lenlim < HN_LRO_LENLIM_MIN(sc->hn_ifp) || 4124 lenlim > TCP_LRO_LENGTH_MAX) { 4125 HN_UNLOCK(sc); 4126 return EINVAL; 4127 } 4128 hn_set_lro_lenlim(sc, lenlim); 4129 HN_UNLOCK(sc); 4130 4131 return 0; 4132 } 4133 4134 static int 4135 hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS) 4136 { 4137 struct hn_softc *sc = arg1; 4138 int ackcnt, error, i; 4139 4140 /* 4141 * lro_ackcnt_lim is append count limit, 4142 * +1 to turn it into aggregation limit. 4143 */ 4144 ackcnt = sc->hn_rx_ring[0].hn_lro.lro_ackcnt_lim + 1; 4145 error = sysctl_handle_int(oidp, &ackcnt, 0, req); 4146 if (error || req->newptr == NULL) 4147 return error; 4148 4149 if (ackcnt < 2 || ackcnt > (TCP_LRO_ACKCNT_MAX + 1)) 4150 return EINVAL; 4151 4152 /* 4153 * Convert aggregation limit back to append 4154 * count limit. 4155 */ 4156 --ackcnt; 4157 HN_LOCK(sc); 4158 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 4159 sc->hn_rx_ring[i].hn_lro.lro_ackcnt_lim = ackcnt; 4160 HN_UNLOCK(sc); 4161 return 0; 4162 } 4163 4164 #endif 4165 4166 static int 4167 hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS) 4168 { 4169 struct hn_softc *sc = arg1; 4170 int hcsum = arg2; 4171 int on, error, i; 4172 4173 on = 0; 4174 if (sc->hn_rx_ring[0].hn_trust_hcsum & hcsum) 4175 on = 1; 4176 4177 error = sysctl_handle_int(oidp, &on, 0, req); 4178 if (error || req->newptr == NULL) 4179 return error; 4180 4181 HN_LOCK(sc); 4182 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4183 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 4184 4185 if (on) 4186 rxr->hn_trust_hcsum |= hcsum; 4187 else 4188 rxr->hn_trust_hcsum &= ~hcsum; 4189 } 4190 HN_UNLOCK(sc); 4191 return 0; 4192 } 4193 4194 static int 4195 hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS) 4196 { 4197 struct hn_softc *sc = arg1; 4198 int chim_size, error; 4199 4200 chim_size = sc->hn_tx_ring[0].hn_chim_size; 4201 error = sysctl_handle_int(oidp, &chim_size, 0, req); 4202 if (error || req->newptr == NULL) 4203 return error; 4204 4205 if (chim_size > sc->hn_chim_szmax || chim_size <= 0) 4206 return EINVAL; 4207 4208 HN_LOCK(sc); 4209 hn_set_chim_size(sc, chim_size); 4210 HN_UNLOCK(sc); 4211 return 0; 4212 } 4213 4214 #if __FreeBSD_version < 1100095 4215 static int 4216 hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS) 4217 { 4218 struct hn_softc *sc = arg1; 4219 int ofs = arg2, i, error; 4220 struct hn_rx_ring *rxr; 4221 uint64_t stat; 4222 4223 stat = 0; 4224 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4225 rxr = &sc->hn_rx_ring[i]; 4226 stat += *((int *)((uint8_t *)rxr + ofs)); 4227 } 4228 4229 error = sysctl_handle_64(oidp, &stat, 0, req); 4230 if (error || req->newptr == NULL) 4231 return error; 4232 4233 /* Zero out this stat. */ 4234 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4235 rxr = &sc->hn_rx_ring[i]; 4236 *((int *)((uint8_t *)rxr + ofs)) = 0; 4237 } 4238 return 0; 4239 } 4240 #else 4241 static int 4242 hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS) 4243 { 4244 struct hn_softc *sc = arg1; 4245 int ofs = arg2, i, error; 4246 struct hn_rx_ring *rxr; 4247 uint64_t stat; 4248 4249 stat = 0; 4250 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4251 rxr = &sc->hn_rx_ring[i]; 4252 stat += *((uint64_t *)((uint8_t *)rxr + ofs)); 4253 } 4254 4255 error = sysctl_handle_64(oidp, &stat, 0, req); 4256 if (error || req->newptr == NULL) 4257 return error; 4258 4259 /* Zero out this stat. */ 4260 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4261 rxr = &sc->hn_rx_ring[i]; 4262 *((uint64_t *)((uint8_t *)rxr + ofs)) = 0; 4263 } 4264 return 0; 4265 } 4266 4267 #endif 4268 4269 static int 4270 hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS) 4271 { 4272 struct hn_softc *sc = arg1; 4273 int ofs = arg2, i, error; 4274 struct hn_rx_ring *rxr; 4275 u_long stat; 4276 4277 stat = 0; 4278 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4279 rxr = &sc->hn_rx_ring[i]; 4280 stat += *((u_long *)((uint8_t *)rxr + ofs)); 4281 } 4282 4283 error = sysctl_handle_long(oidp, &stat, 0, req); 4284 if (error || req->newptr == NULL) 4285 return error; 4286 4287 /* Zero out this stat. */ 4288 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4289 rxr = &sc->hn_rx_ring[i]; 4290 *((u_long *)((uint8_t *)rxr + ofs)) = 0; 4291 } 4292 return 0; 4293 } 4294 4295 static int 4296 hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS) 4297 { 4298 struct hn_softc *sc = arg1; 4299 int ofs = arg2, i, error; 4300 struct hn_tx_ring *txr; 4301 u_long stat; 4302 4303 stat = 0; 4304 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 4305 txr = &sc->hn_tx_ring[i]; 4306 stat += *((u_long *)((uint8_t *)txr + ofs)); 4307 } 4308 4309 error = sysctl_handle_long(oidp, &stat, 0, req); 4310 if (error || req->newptr == NULL) 4311 return error; 4312 4313 /* Zero out this stat. */ 4314 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 4315 txr = &sc->hn_tx_ring[i]; 4316 *((u_long *)((uint8_t *)txr + ofs)) = 0; 4317 } 4318 return 0; 4319 } 4320 4321 static int 4322 hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS) 4323 { 4324 struct hn_softc *sc = arg1; 4325 int ofs = arg2, i, error, conf; 4326 struct hn_tx_ring *txr; 4327 4328 txr = &sc->hn_tx_ring[0]; 4329 conf = *((int *)((uint8_t *)txr + ofs)); 4330 4331 error = sysctl_handle_int(oidp, &conf, 0, req); 4332 if (error || req->newptr == NULL) 4333 return error; 4334 4335 HN_LOCK(sc); 4336 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 4337 txr = &sc->hn_tx_ring[i]; 4338 *((int *)((uint8_t *)txr + ofs)) = conf; 4339 } 4340 HN_UNLOCK(sc); 4341 4342 return 0; 4343 } 4344 4345 static int 4346 hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS) 4347 { 4348 struct hn_softc *sc = arg1; 4349 int error, size; 4350 4351 size = sc->hn_agg_size; 4352 error = sysctl_handle_int(oidp, &size, 0, req); 4353 if (error || req->newptr == NULL) 4354 return (error); 4355 4356 HN_LOCK(sc); 4357 sc->hn_agg_size = size; 4358 hn_set_txagg(sc); 4359 HN_UNLOCK(sc); 4360 4361 return (0); 4362 } 4363 4364 static int 4365 hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS) 4366 { 4367 struct hn_softc *sc = arg1; 4368 int error, pkts; 4369 4370 pkts = sc->hn_agg_pkts; 4371 error = sysctl_handle_int(oidp, &pkts, 0, req); 4372 if (error || req->newptr == NULL) 4373 return (error); 4374 4375 HN_LOCK(sc); 4376 sc->hn_agg_pkts = pkts; 4377 hn_set_txagg(sc); 4378 HN_UNLOCK(sc); 4379 4380 return (0); 4381 } 4382 4383 static int 4384 hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS) 4385 { 4386 struct hn_softc *sc = arg1; 4387 int pkts; 4388 4389 pkts = sc->hn_tx_ring[0].hn_agg_pktmax; 4390 return (sysctl_handle_int(oidp, &pkts, 0, req)); 4391 } 4392 4393 static int 4394 hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS) 4395 { 4396 struct hn_softc *sc = arg1; 4397 int align; 4398 4399 align = sc->hn_tx_ring[0].hn_agg_align; 4400 return (sysctl_handle_int(oidp, &align, 0, req)); 4401 } 4402 4403 static void 4404 hn_chan_polling(struct vmbus_channel *chan, u_int pollhz) 4405 { 4406 if (pollhz == 0) 4407 vmbus_chan_poll_disable(chan); 4408 else 4409 vmbus_chan_poll_enable(chan, pollhz); 4410 } 4411 4412 static void 4413 hn_polling(struct hn_softc *sc, u_int pollhz) 4414 { 4415 int nsubch = sc->hn_rx_ring_inuse - 1; 4416 4417 HN_LOCK_ASSERT(sc); 4418 4419 if (nsubch > 0) { 4420 struct vmbus_channel **subch; 4421 int i; 4422 4423 subch = vmbus_subchan_get(sc->hn_prichan, nsubch); 4424 for (i = 0; i < nsubch; ++i) 4425 hn_chan_polling(subch[i], pollhz); 4426 vmbus_subchan_rel(subch, nsubch); 4427 } 4428 hn_chan_polling(sc->hn_prichan, pollhz); 4429 } 4430 4431 static int 4432 hn_polling_sysctl(SYSCTL_HANDLER_ARGS) 4433 { 4434 struct hn_softc *sc = arg1; 4435 int pollhz, error; 4436 4437 pollhz = sc->hn_pollhz; 4438 error = sysctl_handle_int(oidp, &pollhz, 0, req); 4439 if (error || req->newptr == NULL) 4440 return (error); 4441 4442 if (pollhz != 0 && 4443 (pollhz < VMBUS_CHAN_POLLHZ_MIN || pollhz > VMBUS_CHAN_POLLHZ_MAX)) 4444 return (EINVAL); 4445 4446 HN_LOCK(sc); 4447 if (sc->hn_pollhz != pollhz) { 4448 sc->hn_pollhz = pollhz; 4449 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) && 4450 (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED)) 4451 hn_polling(sc, sc->hn_pollhz); 4452 } 4453 HN_UNLOCK(sc); 4454 4455 return (0); 4456 } 4457 4458 static int 4459 hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS) 4460 { 4461 struct hn_softc *sc = arg1; 4462 char verstr[16]; 4463 4464 snprintf(verstr, sizeof(verstr), "%u.%u", 4465 HN_NDIS_VERSION_MAJOR(sc->hn_ndis_ver), 4466 HN_NDIS_VERSION_MINOR(sc->hn_ndis_ver)); 4467 return sysctl_handle_string(oidp, verstr, sizeof(verstr), req); 4468 } 4469 4470 static int 4471 hn_caps_sysctl(SYSCTL_HANDLER_ARGS) 4472 { 4473 struct hn_softc *sc = arg1; 4474 char caps_str[128]; 4475 uint32_t caps; 4476 4477 HN_LOCK(sc); 4478 caps = sc->hn_caps; 4479 HN_UNLOCK(sc); 4480 snprintf(caps_str, sizeof(caps_str), "%b", caps, HN_CAP_BITS); 4481 return sysctl_handle_string(oidp, caps_str, sizeof(caps_str), req); 4482 } 4483 4484 static int 4485 hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS) 4486 { 4487 struct hn_softc *sc = arg1; 4488 char assist_str[128]; 4489 uint32_t hwassist; 4490 4491 HN_LOCK(sc); 4492 hwassist = sc->hn_ifp->if_hwassist; 4493 HN_UNLOCK(sc); 4494 snprintf(assist_str, sizeof(assist_str), "%b", hwassist, CSUM_BITS); 4495 return sysctl_handle_string(oidp, assist_str, sizeof(assist_str), req); 4496 } 4497 4498 static int 4499 hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS) 4500 { 4501 struct hn_softc *sc = arg1; 4502 char filter_str[128]; 4503 uint32_t filter; 4504 4505 HN_LOCK(sc); 4506 filter = sc->hn_rx_filter; 4507 HN_UNLOCK(sc); 4508 snprintf(filter_str, sizeof(filter_str), "%b", filter, 4509 NDIS_PACKET_TYPES); 4510 return sysctl_handle_string(oidp, filter_str, sizeof(filter_str), req); 4511 } 4512 4513 #ifndef RSS 4514 4515 static int 4516 hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS) 4517 { 4518 struct hn_softc *sc = arg1; 4519 int error; 4520 4521 HN_LOCK(sc); 4522 4523 error = SYSCTL_OUT(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key)); 4524 if (error || req->newptr == NULL) 4525 goto back; 4526 4527 if ((sc->hn_flags & HN_FLAG_RXVF) || 4528 (hn_xpnt_vf && sc->hn_vf_ifp != NULL)) { 4529 /* 4530 * RSS key is synchronized w/ VF's, don't allow users 4531 * to change it. 4532 */ 4533 error = EBUSY; 4534 goto back; 4535 } 4536 4537 error = SYSCTL_IN(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key)); 4538 if (error) 4539 goto back; 4540 sc->hn_flags |= HN_FLAG_HAS_RSSKEY; 4541 4542 if (sc->hn_rx_ring_inuse > 1) { 4543 error = hn_rss_reconfig(sc); 4544 } else { 4545 /* Not RSS capable, at least for now; just save the RSS key. */ 4546 error = 0; 4547 } 4548 back: 4549 HN_UNLOCK(sc); 4550 return (error); 4551 } 4552 4553 static int 4554 hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS) 4555 { 4556 struct hn_softc *sc = arg1; 4557 int error; 4558 4559 HN_LOCK(sc); 4560 4561 error = SYSCTL_OUT(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind)); 4562 if (error || req->newptr == NULL) 4563 goto back; 4564 4565 /* 4566 * Don't allow RSS indirect table change, if this interface is not 4567 * RSS capable currently. 4568 */ 4569 if (sc->hn_rx_ring_inuse == 1) { 4570 error = EOPNOTSUPP; 4571 goto back; 4572 } 4573 4574 error = SYSCTL_IN(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind)); 4575 if (error) 4576 goto back; 4577 sc->hn_flags |= HN_FLAG_HAS_RSSIND; 4578 4579 hn_rss_ind_fixup(sc); 4580 error = hn_rss_reconfig(sc); 4581 back: 4582 HN_UNLOCK(sc); 4583 return (error); 4584 } 4585 4586 #endif /* !RSS */ 4587 4588 static int 4589 hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS) 4590 { 4591 struct hn_softc *sc = arg1; 4592 char hash_str[128]; 4593 uint32_t hash; 4594 4595 HN_LOCK(sc); 4596 hash = sc->hn_rss_hash; 4597 HN_UNLOCK(sc); 4598 snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS); 4599 return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req); 4600 } 4601 4602 static int 4603 hn_rss_hcap_sysctl(SYSCTL_HANDLER_ARGS) 4604 { 4605 struct hn_softc *sc = arg1; 4606 char hash_str[128]; 4607 uint32_t hash; 4608 4609 HN_LOCK(sc); 4610 hash = sc->hn_rss_hcap; 4611 HN_UNLOCK(sc); 4612 snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS); 4613 return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req); 4614 } 4615 4616 static int 4617 hn_rss_mbuf_sysctl(SYSCTL_HANDLER_ARGS) 4618 { 4619 struct hn_softc *sc = arg1; 4620 char hash_str[128]; 4621 uint32_t hash; 4622 4623 HN_LOCK(sc); 4624 hash = sc->hn_rx_ring[0].hn_mbuf_hash; 4625 HN_UNLOCK(sc); 4626 snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS); 4627 return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req); 4628 } 4629 4630 static int 4631 hn_vf_sysctl(SYSCTL_HANDLER_ARGS) 4632 { 4633 struct hn_softc *sc = arg1; 4634 char vf_name[IFNAMSIZ + 1]; 4635 struct ifnet *vf_ifp; 4636 4637 HN_LOCK(sc); 4638 vf_name[0] = '\0'; 4639 vf_ifp = sc->hn_vf_ifp; 4640 if (vf_ifp != NULL) 4641 snprintf(vf_name, sizeof(vf_name), "%s", vf_ifp->if_xname); 4642 HN_UNLOCK(sc); 4643 return sysctl_handle_string(oidp, vf_name, sizeof(vf_name), req); 4644 } 4645 4646 static int 4647 hn_rxvf_sysctl(SYSCTL_HANDLER_ARGS) 4648 { 4649 struct hn_softc *sc = arg1; 4650 char vf_name[IFNAMSIZ + 1]; 4651 struct ifnet *vf_ifp; 4652 4653 HN_LOCK(sc); 4654 vf_name[0] = '\0'; 4655 vf_ifp = sc->hn_rx_ring[0].hn_rxvf_ifp; 4656 if (vf_ifp != NULL) 4657 snprintf(vf_name, sizeof(vf_name), "%s", vf_ifp->if_xname); 4658 HN_UNLOCK(sc); 4659 return sysctl_handle_string(oidp, vf_name, sizeof(vf_name), req); 4660 } 4661 4662 static int 4663 hn_vflist_sysctl(SYSCTL_HANDLER_ARGS) 4664 { 4665 struct rm_priotracker pt; 4666 struct sbuf *sb; 4667 int error, i; 4668 bool first; 4669 4670 error = sysctl_wire_old_buffer(req, 0); 4671 if (error != 0) 4672 return (error); 4673 4674 sb = sbuf_new_for_sysctl(NULL, NULL, 128, req); 4675 if (sb == NULL) 4676 return (ENOMEM); 4677 4678 rm_rlock(&hn_vfmap_lock, &pt); 4679 4680 first = true; 4681 for (i = 0; i < hn_vfmap_size; ++i) { 4682 struct ifnet *ifp; 4683 4684 if (hn_vfmap[i] == NULL) 4685 continue; 4686 4687 ifp = ifnet_byindex(i); 4688 if (ifp != NULL) { 4689 if (first) 4690 sbuf_printf(sb, "%s", ifp->if_xname); 4691 else 4692 sbuf_printf(sb, " %s", ifp->if_xname); 4693 first = false; 4694 } 4695 } 4696 4697 rm_runlock(&hn_vfmap_lock, &pt); 4698 4699 error = sbuf_finish(sb); 4700 sbuf_delete(sb); 4701 return (error); 4702 } 4703 4704 static int 4705 hn_vfmap_sysctl(SYSCTL_HANDLER_ARGS) 4706 { 4707 struct rm_priotracker pt; 4708 struct sbuf *sb; 4709 int error, i; 4710 bool first; 4711 4712 error = sysctl_wire_old_buffer(req, 0); 4713 if (error != 0) 4714 return (error); 4715 4716 sb = sbuf_new_for_sysctl(NULL, NULL, 128, req); 4717 if (sb == NULL) 4718 return (ENOMEM); 4719 4720 rm_rlock(&hn_vfmap_lock, &pt); 4721 4722 first = true; 4723 for (i = 0; i < hn_vfmap_size; ++i) { 4724 struct ifnet *ifp, *hn_ifp; 4725 4726 hn_ifp = hn_vfmap[i]; 4727 if (hn_ifp == NULL) 4728 continue; 4729 4730 ifp = ifnet_byindex(i); 4731 if (ifp != NULL) { 4732 if (first) { 4733 sbuf_printf(sb, "%s:%s", ifp->if_xname, 4734 hn_ifp->if_xname); 4735 } else { 4736 sbuf_printf(sb, " %s:%s", ifp->if_xname, 4737 hn_ifp->if_xname); 4738 } 4739 first = false; 4740 } 4741 } 4742 4743 rm_runlock(&hn_vfmap_lock, &pt); 4744 4745 error = sbuf_finish(sb); 4746 sbuf_delete(sb); 4747 return (error); 4748 } 4749 4750 static int 4751 hn_xpnt_vf_accbpf_sysctl(SYSCTL_HANDLER_ARGS) 4752 { 4753 struct hn_softc *sc = arg1; 4754 int error, onoff = 0; 4755 4756 if (sc->hn_xvf_flags & HN_XVFFLAG_ACCBPF) 4757 onoff = 1; 4758 error = sysctl_handle_int(oidp, &onoff, 0, req); 4759 if (error || req->newptr == NULL) 4760 return (error); 4761 4762 HN_LOCK(sc); 4763 /* NOTE: hn_vf_lock for hn_transmit() */ 4764 rm_wlock(&sc->hn_vf_lock); 4765 if (onoff) 4766 sc->hn_xvf_flags |= HN_XVFFLAG_ACCBPF; 4767 else 4768 sc->hn_xvf_flags &= ~HN_XVFFLAG_ACCBPF; 4769 rm_wunlock(&sc->hn_vf_lock); 4770 HN_UNLOCK(sc); 4771 4772 return (0); 4773 } 4774 4775 static int 4776 hn_xpnt_vf_enabled_sysctl(SYSCTL_HANDLER_ARGS) 4777 { 4778 struct hn_softc *sc = arg1; 4779 int enabled = 0; 4780 4781 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) 4782 enabled = 1; 4783 return (sysctl_handle_int(oidp, &enabled, 0, req)); 4784 } 4785 4786 static int 4787 hn_check_iplen(const struct mbuf *m, int hoff) 4788 { 4789 const struct ip *ip; 4790 int len, iphlen, iplen; 4791 const struct tcphdr *th; 4792 int thoff; /* TCP data offset */ 4793 4794 len = hoff + sizeof(struct ip); 4795 4796 /* The packet must be at least the size of an IP header. */ 4797 if (m->m_pkthdr.len < len) 4798 return IPPROTO_DONE; 4799 4800 /* The fixed IP header must reside completely in the first mbuf. */ 4801 if (m->m_len < len) 4802 return IPPROTO_DONE; 4803 4804 ip = mtodo(m, hoff); 4805 4806 /* Bound check the packet's stated IP header length. */ 4807 iphlen = ip->ip_hl << 2; 4808 if (iphlen < sizeof(struct ip)) /* minimum header length */ 4809 return IPPROTO_DONE; 4810 4811 /* The full IP header must reside completely in the one mbuf. */ 4812 if (m->m_len < hoff + iphlen) 4813 return IPPROTO_DONE; 4814 4815 iplen = ntohs(ip->ip_len); 4816 4817 /* 4818 * Check that the amount of data in the buffers is as 4819 * at least much as the IP header would have us expect. 4820 */ 4821 if (m->m_pkthdr.len < hoff + iplen) 4822 return IPPROTO_DONE; 4823 4824 /* 4825 * Ignore IP fragments. 4826 */ 4827 if (ntohs(ip->ip_off) & (IP_OFFMASK | IP_MF)) 4828 return IPPROTO_DONE; 4829 4830 /* 4831 * The TCP/IP or UDP/IP header must be entirely contained within 4832 * the first fragment of a packet. 4833 */ 4834 switch (ip->ip_p) { 4835 case IPPROTO_TCP: 4836 if (iplen < iphlen + sizeof(struct tcphdr)) 4837 return IPPROTO_DONE; 4838 if (m->m_len < hoff + iphlen + sizeof(struct tcphdr)) 4839 return IPPROTO_DONE; 4840 th = (const struct tcphdr *)((const uint8_t *)ip + iphlen); 4841 thoff = th->th_off << 2; 4842 if (thoff < sizeof(struct tcphdr) || thoff + iphlen > iplen) 4843 return IPPROTO_DONE; 4844 if (m->m_len < hoff + iphlen + thoff) 4845 return IPPROTO_DONE; 4846 break; 4847 case IPPROTO_UDP: 4848 if (iplen < iphlen + sizeof(struct udphdr)) 4849 return IPPROTO_DONE; 4850 if (m->m_len < hoff + iphlen + sizeof(struct udphdr)) 4851 return IPPROTO_DONE; 4852 break; 4853 default: 4854 if (iplen < iphlen) 4855 return IPPROTO_DONE; 4856 break; 4857 } 4858 return ip->ip_p; 4859 } 4860 4861 static void 4862 hn_rxpkt_proto(const struct mbuf *m_new, int *l3proto, int *l4proto) 4863 { 4864 const struct ether_header *eh; 4865 uint16_t etype; 4866 int hoff; 4867 4868 hoff = sizeof(*eh); 4869 /* Checked at the beginning of this function. */ 4870 KASSERT(m_new->m_len >= hoff, ("not ethernet frame")); 4871 4872 eh = mtod(m_new, const struct ether_header *); 4873 etype = ntohs(eh->ether_type); 4874 if (etype == ETHERTYPE_VLAN) { 4875 const struct ether_vlan_header *evl; 4876 4877 hoff = sizeof(*evl); 4878 if (m_new->m_len < hoff) 4879 return; 4880 evl = mtod(m_new, const struct ether_vlan_header *); 4881 etype = ntohs(evl->evl_proto); 4882 } 4883 *l3proto = etype; 4884 4885 if (etype == ETHERTYPE_IP) 4886 *l4proto = hn_check_iplen(m_new, hoff); 4887 else 4888 *l4proto = IPPROTO_DONE; 4889 } 4890 4891 static int 4892 hn_create_rx_data(struct hn_softc *sc, int ring_cnt) 4893 { 4894 struct sysctl_oid_list *child; 4895 struct sysctl_ctx_list *ctx; 4896 device_t dev = sc->hn_dev; 4897 #if defined(INET) || defined(INET6) 4898 #if __FreeBSD_version >= 1100095 4899 int lroent_cnt; 4900 #endif 4901 #endif 4902 int i; 4903 4904 /* 4905 * Create RXBUF for reception. 4906 * 4907 * NOTE: 4908 * - It is shared by all channels. 4909 * - A large enough buffer is allocated, certain version of NVSes 4910 * may further limit the usable space. 4911 */ 4912 sc->hn_rxbuf = hyperv_dmamem_alloc(bus_get_dma_tag(dev), 4913 PAGE_SIZE, 0, HN_RXBUF_SIZE, &sc->hn_rxbuf_dma, 4914 BUS_DMA_WAITOK | BUS_DMA_ZERO); 4915 if (sc->hn_rxbuf == NULL) { 4916 device_printf(sc->hn_dev, "allocate rxbuf failed\n"); 4917 return (ENOMEM); 4918 } 4919 4920 sc->hn_rx_ring_cnt = ring_cnt; 4921 sc->hn_rx_ring_inuse = sc->hn_rx_ring_cnt; 4922 4923 sc->hn_rx_ring = malloc(sizeof(struct hn_rx_ring) * sc->hn_rx_ring_cnt, 4924 M_DEVBUF, M_WAITOK | M_ZERO); 4925 4926 #if defined(INET) || defined(INET6) 4927 #if __FreeBSD_version >= 1100095 4928 lroent_cnt = hn_lro_entry_count; 4929 if (lroent_cnt < TCP_LRO_ENTRIES) 4930 lroent_cnt = TCP_LRO_ENTRIES; 4931 if (bootverbose) 4932 device_printf(dev, "LRO: entry count %d\n", lroent_cnt); 4933 #endif 4934 #endif /* INET || INET6 */ 4935 4936 ctx = device_get_sysctl_ctx(dev); 4937 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); 4938 4939 /* Create dev.hn.UNIT.rx sysctl tree */ 4940 sc->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rx", 4941 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 4942 4943 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4944 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 4945 4946 rxr->hn_br = hyperv_dmamem_alloc(bus_get_dma_tag(dev), 4947 PAGE_SIZE, 0, HN_TXBR_SIZE + HN_RXBR_SIZE, 4948 &rxr->hn_br_dma, BUS_DMA_WAITOK); 4949 if (rxr->hn_br == NULL) { 4950 device_printf(dev, "allocate bufring failed\n"); 4951 return (ENOMEM); 4952 } 4953 4954 if (hn_trust_hosttcp) 4955 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_TCP; 4956 if (hn_trust_hostudp) 4957 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_UDP; 4958 if (hn_trust_hostip) 4959 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_IP; 4960 rxr->hn_mbuf_hash = NDIS_HASH_ALL; 4961 rxr->hn_ifp = sc->hn_ifp; 4962 if (i < sc->hn_tx_ring_cnt) 4963 rxr->hn_txr = &sc->hn_tx_ring[i]; 4964 rxr->hn_pktbuf_len = HN_PKTBUF_LEN_DEF; 4965 rxr->hn_pktbuf = malloc(rxr->hn_pktbuf_len, M_DEVBUF, M_WAITOK); 4966 rxr->hn_rx_idx = i; 4967 rxr->hn_rxbuf = sc->hn_rxbuf; 4968 4969 /* 4970 * Initialize LRO. 4971 */ 4972 #if defined(INET) || defined(INET6) 4973 #if __FreeBSD_version >= 1100095 4974 tcp_lro_init_args(&rxr->hn_lro, sc->hn_ifp, lroent_cnt, 4975 hn_lro_mbufq_depth); 4976 #else 4977 tcp_lro_init(&rxr->hn_lro); 4978 rxr->hn_lro.ifp = sc->hn_ifp; 4979 #endif 4980 #if __FreeBSD_version >= 1100099 4981 rxr->hn_lro.lro_length_lim = HN_LRO_LENLIM_DEF; 4982 rxr->hn_lro.lro_ackcnt_lim = HN_LRO_ACKCNT_DEF; 4983 #endif 4984 #endif /* INET || INET6 */ 4985 4986 if (sc->hn_rx_sysctl_tree != NULL) { 4987 char name[16]; 4988 4989 /* 4990 * Create per RX ring sysctl tree: 4991 * dev.hn.UNIT.rx.RINGID 4992 */ 4993 snprintf(name, sizeof(name), "%d", i); 4994 rxr->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, 4995 SYSCTL_CHILDREN(sc->hn_rx_sysctl_tree), 4996 OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 4997 4998 if (rxr->hn_rx_sysctl_tree != NULL) { 4999 SYSCTL_ADD_ULONG(ctx, 5000 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), 5001 OID_AUTO, "packets", CTLFLAG_RW, 5002 &rxr->hn_pkts, "# of packets received"); 5003 SYSCTL_ADD_ULONG(ctx, 5004 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), 5005 OID_AUTO, "rss_pkts", CTLFLAG_RW, 5006 &rxr->hn_rss_pkts, 5007 "# of packets w/ RSS info received"); 5008 SYSCTL_ADD_INT(ctx, 5009 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), 5010 OID_AUTO, "pktbuf_len", CTLFLAG_RD, 5011 &rxr->hn_pktbuf_len, 0, 5012 "Temporary channel packet buffer length"); 5013 } 5014 } 5015 } 5016 5017 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_queued", 5018 CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5019 __offsetof(struct hn_rx_ring, hn_lro.lro_queued), 5020 #if __FreeBSD_version < 1100095 5021 hn_rx_stat_int_sysctl, 5022 #else 5023 hn_rx_stat_u64_sysctl, 5024 #endif 5025 "LU", "LRO queued"); 5026 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_flushed", 5027 CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5028 __offsetof(struct hn_rx_ring, hn_lro.lro_flushed), 5029 #if __FreeBSD_version < 1100095 5030 hn_rx_stat_int_sysctl, 5031 #else 5032 hn_rx_stat_u64_sysctl, 5033 #endif 5034 "LU", "LRO flushed"); 5035 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_tried", 5036 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5037 __offsetof(struct hn_rx_ring, hn_lro_tried), 5038 hn_rx_stat_ulong_sysctl, "LU", "# of LRO tries"); 5039 #if __FreeBSD_version >= 1100099 5040 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_length_lim", 5041 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 5042 hn_lro_lenlim_sysctl, "IU", 5043 "Max # of data bytes to be aggregated by LRO"); 5044 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_ackcnt_lim", 5045 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 5046 hn_lro_ackcnt_sysctl, "I", 5047 "Max # of ACKs to be aggregated by LRO"); 5048 #endif 5049 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hosttcp", 5050 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_TCP, 5051 hn_trust_hcsum_sysctl, "I", 5052 "Trust tcp segement verification on host side, " 5053 "when csum info is missing"); 5054 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostudp", 5055 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_UDP, 5056 hn_trust_hcsum_sysctl, "I", 5057 "Trust udp datagram verification on host side, " 5058 "when csum info is missing"); 5059 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostip", 5060 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_IP, 5061 hn_trust_hcsum_sysctl, "I", 5062 "Trust ip packet verification on host side, " 5063 "when csum info is missing"); 5064 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_ip", 5065 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5066 __offsetof(struct hn_rx_ring, hn_csum_ip), 5067 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM IP"); 5068 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_tcp", 5069 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5070 __offsetof(struct hn_rx_ring, hn_csum_tcp), 5071 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM TCP"); 5072 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_udp", 5073 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5074 __offsetof(struct hn_rx_ring, hn_csum_udp), 5075 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM UDP"); 5076 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_trusted", 5077 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5078 __offsetof(struct hn_rx_ring, hn_csum_trusted), 5079 hn_rx_stat_ulong_sysctl, "LU", 5080 "# of packets that we trust host's csum verification"); 5081 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "small_pkts", 5082 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5083 __offsetof(struct hn_rx_ring, hn_small_pkts), 5084 hn_rx_stat_ulong_sysctl, "LU", "# of small packets received"); 5085 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_ack_failed", 5086 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5087 __offsetof(struct hn_rx_ring, hn_ack_failed), 5088 hn_rx_stat_ulong_sysctl, "LU", "# of RXBUF ack failures"); 5089 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_cnt", 5090 CTLFLAG_RD, &sc->hn_rx_ring_cnt, 0, "# created RX rings"); 5091 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_inuse", 5092 CTLFLAG_RD, &sc->hn_rx_ring_inuse, 0, "# used RX rings"); 5093 5094 return (0); 5095 } 5096 5097 static void 5098 hn_destroy_rx_data(struct hn_softc *sc) 5099 { 5100 int i; 5101 5102 if (sc->hn_rxbuf != NULL) { 5103 if ((sc->hn_flags & HN_FLAG_RXBUF_REF) == 0) 5104 hyperv_dmamem_free(&sc->hn_rxbuf_dma, sc->hn_rxbuf); 5105 else 5106 device_printf(sc->hn_dev, "RXBUF is referenced\n"); 5107 sc->hn_rxbuf = NULL; 5108 } 5109 5110 if (sc->hn_rx_ring_cnt == 0) 5111 return; 5112 5113 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 5114 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 5115 5116 if (rxr->hn_br == NULL) 5117 continue; 5118 if ((rxr->hn_rx_flags & HN_RX_FLAG_BR_REF) == 0) { 5119 hyperv_dmamem_free(&rxr->hn_br_dma, rxr->hn_br); 5120 } else { 5121 device_printf(sc->hn_dev, 5122 "%dth channel bufring is referenced", i); 5123 } 5124 rxr->hn_br = NULL; 5125 5126 #if defined(INET) || defined(INET6) 5127 tcp_lro_free(&rxr->hn_lro); 5128 #endif 5129 free(rxr->hn_pktbuf, M_DEVBUF); 5130 } 5131 free(sc->hn_rx_ring, M_DEVBUF); 5132 sc->hn_rx_ring = NULL; 5133 5134 sc->hn_rx_ring_cnt = 0; 5135 sc->hn_rx_ring_inuse = 0; 5136 } 5137 5138 static int 5139 hn_tx_ring_create(struct hn_softc *sc, int id) 5140 { 5141 struct hn_tx_ring *txr = &sc->hn_tx_ring[id]; 5142 device_t dev = sc->hn_dev; 5143 bus_dma_tag_t parent_dtag; 5144 int error, i; 5145 5146 txr->hn_sc = sc; 5147 txr->hn_tx_idx = id; 5148 5149 #ifndef HN_USE_TXDESC_BUFRING 5150 mtx_init(&txr->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN); 5151 #endif 5152 mtx_init(&txr->hn_tx_lock, "hn tx", NULL, MTX_DEF); 5153 5154 txr->hn_txdesc_cnt = HN_TX_DESC_CNT; 5155 txr->hn_txdesc = malloc(sizeof(struct hn_txdesc) * txr->hn_txdesc_cnt, 5156 M_DEVBUF, M_WAITOK | M_ZERO); 5157 #ifndef HN_USE_TXDESC_BUFRING 5158 SLIST_INIT(&txr->hn_txlist); 5159 #else 5160 txr->hn_txdesc_br = buf_ring_alloc(txr->hn_txdesc_cnt, M_DEVBUF, 5161 M_WAITOK, &txr->hn_tx_lock); 5162 #endif 5163 5164 if (hn_tx_taskq_mode == HN_TX_TASKQ_M_EVTTQ) { 5165 txr->hn_tx_taskq = VMBUS_GET_EVENT_TASKQ( 5166 device_get_parent(dev), dev, HN_RING_IDX2CPU(sc, id)); 5167 } else { 5168 txr->hn_tx_taskq = sc->hn_tx_taskqs[id % hn_tx_taskq_cnt]; 5169 } 5170 5171 #ifdef HN_IFSTART_SUPPORT 5172 if (hn_use_if_start) { 5173 txr->hn_txeof = hn_start_txeof; 5174 TASK_INIT(&txr->hn_tx_task, 0, hn_start_taskfunc, txr); 5175 TASK_INIT(&txr->hn_txeof_task, 0, hn_start_txeof_taskfunc, txr); 5176 } else 5177 #endif 5178 { 5179 int br_depth; 5180 5181 txr->hn_txeof = hn_xmit_txeof; 5182 TASK_INIT(&txr->hn_tx_task, 0, hn_xmit_taskfunc, txr); 5183 TASK_INIT(&txr->hn_txeof_task, 0, hn_xmit_txeof_taskfunc, txr); 5184 5185 br_depth = hn_get_txswq_depth(txr); 5186 txr->hn_mbuf_br = buf_ring_alloc(br_depth, M_DEVBUF, 5187 M_WAITOK, &txr->hn_tx_lock); 5188 } 5189 5190 txr->hn_direct_tx_size = hn_direct_tx_size; 5191 5192 /* 5193 * Always schedule transmission instead of trying to do direct 5194 * transmission. This one gives the best performance so far. 5195 */ 5196 txr->hn_sched_tx = 1; 5197 5198 parent_dtag = bus_get_dma_tag(dev); 5199 5200 /* DMA tag for RNDIS packet messages. */ 5201 error = bus_dma_tag_create(parent_dtag, /* parent */ 5202 HN_RNDIS_PKT_ALIGN, /* alignment */ 5203 HN_RNDIS_PKT_BOUNDARY, /* boundary */ 5204 BUS_SPACE_MAXADDR, /* lowaddr */ 5205 BUS_SPACE_MAXADDR, /* highaddr */ 5206 NULL, NULL, /* filter, filterarg */ 5207 HN_RNDIS_PKT_LEN, /* maxsize */ 5208 1, /* nsegments */ 5209 HN_RNDIS_PKT_LEN, /* maxsegsize */ 5210 0, /* flags */ 5211 NULL, /* lockfunc */ 5212 NULL, /* lockfuncarg */ 5213 &txr->hn_tx_rndis_dtag); 5214 if (error) { 5215 device_printf(dev, "failed to create rndis dmatag\n"); 5216 return error; 5217 } 5218 5219 /* DMA tag for data. */ 5220 error = bus_dma_tag_create(parent_dtag, /* parent */ 5221 1, /* alignment */ 5222 HN_TX_DATA_BOUNDARY, /* boundary */ 5223 BUS_SPACE_MAXADDR, /* lowaddr */ 5224 BUS_SPACE_MAXADDR, /* highaddr */ 5225 NULL, NULL, /* filter, filterarg */ 5226 HN_TX_DATA_MAXSIZE, /* maxsize */ 5227 HN_TX_DATA_SEGCNT_MAX, /* nsegments */ 5228 HN_TX_DATA_SEGSIZE, /* maxsegsize */ 5229 0, /* flags */ 5230 NULL, /* lockfunc */ 5231 NULL, /* lockfuncarg */ 5232 &txr->hn_tx_data_dtag); 5233 if (error) { 5234 device_printf(dev, "failed to create data dmatag\n"); 5235 return error; 5236 } 5237 5238 for (i = 0; i < txr->hn_txdesc_cnt; ++i) { 5239 struct hn_txdesc *txd = &txr->hn_txdesc[i]; 5240 5241 txd->txr = txr; 5242 txd->chim_index = HN_NVS_CHIM_IDX_INVALID; 5243 STAILQ_INIT(&txd->agg_list); 5244 5245 /* 5246 * Allocate and load RNDIS packet message. 5247 */ 5248 error = bus_dmamem_alloc(txr->hn_tx_rndis_dtag, 5249 (void **)&txd->rndis_pkt, 5250 BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, 5251 &txd->rndis_pkt_dmap); 5252 if (error) { 5253 device_printf(dev, 5254 "failed to allocate rndis_packet_msg, %d\n", i); 5255 return error; 5256 } 5257 5258 error = bus_dmamap_load(txr->hn_tx_rndis_dtag, 5259 txd->rndis_pkt_dmap, 5260 txd->rndis_pkt, HN_RNDIS_PKT_LEN, 5261 hyperv_dma_map_paddr, &txd->rndis_pkt_paddr, 5262 BUS_DMA_NOWAIT); 5263 if (error) { 5264 device_printf(dev, 5265 "failed to load rndis_packet_msg, %d\n", i); 5266 bus_dmamem_free(txr->hn_tx_rndis_dtag, 5267 txd->rndis_pkt, txd->rndis_pkt_dmap); 5268 return error; 5269 } 5270 5271 /* DMA map for TX data. */ 5272 error = bus_dmamap_create(txr->hn_tx_data_dtag, 0, 5273 &txd->data_dmap); 5274 if (error) { 5275 device_printf(dev, 5276 "failed to allocate tx data dmamap\n"); 5277 bus_dmamap_unload(txr->hn_tx_rndis_dtag, 5278 txd->rndis_pkt_dmap); 5279 bus_dmamem_free(txr->hn_tx_rndis_dtag, 5280 txd->rndis_pkt, txd->rndis_pkt_dmap); 5281 return error; 5282 } 5283 5284 /* All set, put it to list */ 5285 txd->flags |= HN_TXD_FLAG_ONLIST; 5286 #ifndef HN_USE_TXDESC_BUFRING 5287 SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link); 5288 #else 5289 buf_ring_enqueue(txr->hn_txdesc_br, txd); 5290 #endif 5291 } 5292 txr->hn_txdesc_avail = txr->hn_txdesc_cnt; 5293 5294 if (sc->hn_tx_sysctl_tree != NULL) { 5295 struct sysctl_oid_list *child; 5296 struct sysctl_ctx_list *ctx; 5297 char name[16]; 5298 5299 /* 5300 * Create per TX ring sysctl tree: 5301 * dev.hn.UNIT.tx.RINGID 5302 */ 5303 ctx = device_get_sysctl_ctx(dev); 5304 child = SYSCTL_CHILDREN(sc->hn_tx_sysctl_tree); 5305 5306 snprintf(name, sizeof(name), "%d", id); 5307 txr->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, 5308 name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 5309 5310 if (txr->hn_tx_sysctl_tree != NULL) { 5311 child = SYSCTL_CHILDREN(txr->hn_tx_sysctl_tree); 5312 5313 #ifdef HN_DEBUG 5314 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail", 5315 CTLFLAG_RD, &txr->hn_txdesc_avail, 0, 5316 "# of available TX descs"); 5317 #endif 5318 #ifdef HN_IFSTART_SUPPORT 5319 if (!hn_use_if_start) 5320 #endif 5321 { 5322 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "oactive", 5323 CTLFLAG_RD, &txr->hn_oactive, 0, 5324 "over active"); 5325 } 5326 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "packets", 5327 CTLFLAG_RW, &txr->hn_pkts, 5328 "# of packets transmitted"); 5329 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "sends", 5330 CTLFLAG_RW, &txr->hn_sends, "# of sends"); 5331 } 5332 } 5333 5334 return 0; 5335 } 5336 5337 static void 5338 hn_txdesc_dmamap_destroy(struct hn_txdesc *txd) 5339 { 5340 struct hn_tx_ring *txr = txd->txr; 5341 5342 KASSERT(txd->m == NULL, ("still has mbuf installed")); 5343 KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("still dma mapped")); 5344 5345 bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_pkt_dmap); 5346 bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_pkt, 5347 txd->rndis_pkt_dmap); 5348 bus_dmamap_destroy(txr->hn_tx_data_dtag, txd->data_dmap); 5349 } 5350 5351 static void 5352 hn_txdesc_gc(struct hn_tx_ring *txr, struct hn_txdesc *txd) 5353 { 5354 5355 KASSERT(txd->refs == 0 || txd->refs == 1, 5356 ("invalid txd refs %d", txd->refs)); 5357 5358 /* Aggregated txds will be freed by their aggregating txd. */ 5359 if (txd->refs > 0 && (txd->flags & HN_TXD_FLAG_ONAGG) == 0) { 5360 int freed; 5361 5362 freed = hn_txdesc_put(txr, txd); 5363 KASSERT(freed, ("can't free txdesc")); 5364 } 5365 } 5366 5367 static void 5368 hn_tx_ring_destroy(struct hn_tx_ring *txr) 5369 { 5370 int i; 5371 5372 if (txr->hn_txdesc == NULL) 5373 return; 5374 5375 /* 5376 * NOTE: 5377 * Because the freeing of aggregated txds will be deferred 5378 * to the aggregating txd, two passes are used here: 5379 * - The first pass GCes any pending txds. This GC is necessary, 5380 * since if the channels are revoked, hypervisor will not 5381 * deliver send-done for all pending txds. 5382 * - The second pass frees the busdma stuffs, i.e. after all txds 5383 * were freed. 5384 */ 5385 for (i = 0; i < txr->hn_txdesc_cnt; ++i) 5386 hn_txdesc_gc(txr, &txr->hn_txdesc[i]); 5387 for (i = 0; i < txr->hn_txdesc_cnt; ++i) 5388 hn_txdesc_dmamap_destroy(&txr->hn_txdesc[i]); 5389 5390 if (txr->hn_tx_data_dtag != NULL) 5391 bus_dma_tag_destroy(txr->hn_tx_data_dtag); 5392 if (txr->hn_tx_rndis_dtag != NULL) 5393 bus_dma_tag_destroy(txr->hn_tx_rndis_dtag); 5394 5395 #ifdef HN_USE_TXDESC_BUFRING 5396 buf_ring_free(txr->hn_txdesc_br, M_DEVBUF); 5397 #endif 5398 5399 free(txr->hn_txdesc, M_DEVBUF); 5400 txr->hn_txdesc = NULL; 5401 5402 if (txr->hn_mbuf_br != NULL) 5403 buf_ring_free(txr->hn_mbuf_br, M_DEVBUF); 5404 5405 #ifndef HN_USE_TXDESC_BUFRING 5406 mtx_destroy(&txr->hn_txlist_spin); 5407 #endif 5408 mtx_destroy(&txr->hn_tx_lock); 5409 } 5410 5411 static int 5412 hn_create_tx_data(struct hn_softc *sc, int ring_cnt) 5413 { 5414 struct sysctl_oid_list *child; 5415 struct sysctl_ctx_list *ctx; 5416 int i; 5417 5418 /* 5419 * Create TXBUF for chimney sending. 5420 * 5421 * NOTE: It is shared by all channels. 5422 */ 5423 sc->hn_chim = hyperv_dmamem_alloc(bus_get_dma_tag(sc->hn_dev), 5424 PAGE_SIZE, 0, HN_CHIM_SIZE, &sc->hn_chim_dma, 5425 BUS_DMA_WAITOK | BUS_DMA_ZERO); 5426 if (sc->hn_chim == NULL) { 5427 device_printf(sc->hn_dev, "allocate txbuf failed\n"); 5428 return (ENOMEM); 5429 } 5430 5431 sc->hn_tx_ring_cnt = ring_cnt; 5432 sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt; 5433 5434 sc->hn_tx_ring = malloc(sizeof(struct hn_tx_ring) * sc->hn_tx_ring_cnt, 5435 M_DEVBUF, M_WAITOK | M_ZERO); 5436 5437 ctx = device_get_sysctl_ctx(sc->hn_dev); 5438 child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->hn_dev)); 5439 5440 /* Create dev.hn.UNIT.tx sysctl tree */ 5441 sc->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "tx", 5442 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 5443 5444 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 5445 int error; 5446 5447 error = hn_tx_ring_create(sc, i); 5448 if (error) 5449 return error; 5450 } 5451 5452 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "no_txdescs", 5453 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5454 __offsetof(struct hn_tx_ring, hn_no_txdescs), 5455 hn_tx_stat_ulong_sysctl, "LU", "# of times short of TX descs"); 5456 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "send_failed", 5457 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5458 __offsetof(struct hn_tx_ring, hn_send_failed), 5459 hn_tx_stat_ulong_sysctl, "LU", "# of hyper-v sending failure"); 5460 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txdma_failed", 5461 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5462 __offsetof(struct hn_tx_ring, hn_txdma_failed), 5463 hn_tx_stat_ulong_sysctl, "LU", "# of TX DMA failure"); 5464 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_flush_failed", 5465 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5466 __offsetof(struct hn_tx_ring, hn_flush_failed), 5467 hn_tx_stat_ulong_sysctl, "LU", 5468 "# of packet transmission aggregation flush failure"); 5469 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_collapsed", 5470 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5471 __offsetof(struct hn_tx_ring, hn_tx_collapsed), 5472 hn_tx_stat_ulong_sysctl, "LU", "# of TX mbuf collapsed"); 5473 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney", 5474 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5475 __offsetof(struct hn_tx_ring, hn_tx_chimney), 5476 hn_tx_stat_ulong_sysctl, "LU", "# of chimney send"); 5477 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_tried", 5478 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5479 __offsetof(struct hn_tx_ring, hn_tx_chimney_tried), 5480 hn_tx_stat_ulong_sysctl, "LU", "# of chimney send tries"); 5481 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt", 5482 CTLFLAG_RD, &sc->hn_tx_ring[0].hn_txdesc_cnt, 0, 5483 "# of total TX descs"); 5484 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max", 5485 CTLFLAG_RD, &sc->hn_chim_szmax, 0, 5486 "Chimney send packet size upper boundary"); 5487 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size", 5488 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 5489 hn_chim_size_sysctl, "I", "Chimney send packet size limit"); 5490 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "direct_tx_size", 5491 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5492 __offsetof(struct hn_tx_ring, hn_direct_tx_size), 5493 hn_tx_conf_int_sysctl, "I", 5494 "Size of the packet for direct transmission"); 5495 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "sched_tx", 5496 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5497 __offsetof(struct hn_tx_ring, hn_sched_tx), 5498 hn_tx_conf_int_sysctl, "I", 5499 "Always schedule transmission " 5500 "instead of doing direct transmission"); 5501 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_cnt", 5502 CTLFLAG_RD, &sc->hn_tx_ring_cnt, 0, "# created TX rings"); 5503 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_inuse", 5504 CTLFLAG_RD, &sc->hn_tx_ring_inuse, 0, "# used TX rings"); 5505 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "agg_szmax", 5506 CTLFLAG_RD, &sc->hn_tx_ring[0].hn_agg_szmax, 0, 5507 "Applied packet transmission aggregation size"); 5508 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pktmax", 5509 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 5510 hn_txagg_pktmax_sysctl, "I", 5511 "Applied packet transmission aggregation packets"); 5512 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_align", 5513 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 5514 hn_txagg_align_sysctl, "I", 5515 "Applied packet transmission aggregation alignment"); 5516 5517 return 0; 5518 } 5519 5520 static void 5521 hn_set_chim_size(struct hn_softc *sc, int chim_size) 5522 { 5523 int i; 5524 5525 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 5526 sc->hn_tx_ring[i].hn_chim_size = chim_size; 5527 } 5528 5529 static void 5530 hn_set_tso_maxsize(struct hn_softc *sc, int tso_maxlen, int mtu) 5531 { 5532 struct ifnet *ifp = sc->hn_ifp; 5533 u_int hw_tsomax; 5534 int tso_minlen; 5535 5536 HN_LOCK_ASSERT(sc); 5537 5538 if ((ifp->if_capabilities & (IFCAP_TSO4 | IFCAP_TSO6)) == 0) 5539 return; 5540 5541 KASSERT(sc->hn_ndis_tso_sgmin >= 2, 5542 ("invalid NDIS tso sgmin %d", sc->hn_ndis_tso_sgmin)); 5543 tso_minlen = sc->hn_ndis_tso_sgmin * mtu; 5544 5545 KASSERT(sc->hn_ndis_tso_szmax >= tso_minlen && 5546 sc->hn_ndis_tso_szmax <= IP_MAXPACKET, 5547 ("invalid NDIS tso szmax %d", sc->hn_ndis_tso_szmax)); 5548 5549 if (tso_maxlen < tso_minlen) 5550 tso_maxlen = tso_minlen; 5551 else if (tso_maxlen > IP_MAXPACKET) 5552 tso_maxlen = IP_MAXPACKET; 5553 if (tso_maxlen > sc->hn_ndis_tso_szmax) 5554 tso_maxlen = sc->hn_ndis_tso_szmax; 5555 hw_tsomax = tso_maxlen - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); 5556 5557 if (hn_xpnt_vf_isready(sc)) { 5558 if (hw_tsomax > sc->hn_vf_ifp->if_hw_tsomax) 5559 hw_tsomax = sc->hn_vf_ifp->if_hw_tsomax; 5560 } 5561 ifp->if_hw_tsomax = hw_tsomax; 5562 if (bootverbose) 5563 if_printf(ifp, "TSO size max %u\n", ifp->if_hw_tsomax); 5564 } 5565 5566 static void 5567 hn_fixup_tx_data(struct hn_softc *sc) 5568 { 5569 uint64_t csum_assist; 5570 int i; 5571 5572 hn_set_chim_size(sc, sc->hn_chim_szmax); 5573 if (hn_tx_chimney_size > 0 && 5574 hn_tx_chimney_size < sc->hn_chim_szmax) 5575 hn_set_chim_size(sc, hn_tx_chimney_size); 5576 5577 csum_assist = 0; 5578 if (sc->hn_caps & HN_CAP_IPCS) 5579 csum_assist |= CSUM_IP; 5580 if (sc->hn_caps & HN_CAP_TCP4CS) 5581 csum_assist |= CSUM_IP_TCP; 5582 if ((sc->hn_caps & HN_CAP_UDP4CS) && hn_enable_udp4cs) 5583 csum_assist |= CSUM_IP_UDP; 5584 if (sc->hn_caps & HN_CAP_TCP6CS) 5585 csum_assist |= CSUM_IP6_TCP; 5586 if ((sc->hn_caps & HN_CAP_UDP6CS) && hn_enable_udp6cs) 5587 csum_assist |= CSUM_IP6_UDP; 5588 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 5589 sc->hn_tx_ring[i].hn_csum_assist = csum_assist; 5590 5591 if (sc->hn_caps & HN_CAP_HASHVAL) { 5592 /* 5593 * Support HASHVAL pktinfo on TX path. 5594 */ 5595 if (bootverbose) 5596 if_printf(sc->hn_ifp, "support HASHVAL pktinfo\n"); 5597 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 5598 sc->hn_tx_ring[i].hn_tx_flags |= HN_TX_FLAG_HASHVAL; 5599 } 5600 } 5601 5602 static void 5603 hn_fixup_rx_data(struct hn_softc *sc) 5604 { 5605 5606 if (sc->hn_caps & HN_CAP_UDPHASH) { 5607 int i; 5608 5609 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 5610 sc->hn_rx_ring[i].hn_rx_flags |= HN_RX_FLAG_UDP_HASH; 5611 } 5612 } 5613 5614 static void 5615 hn_destroy_tx_data(struct hn_softc *sc) 5616 { 5617 int i; 5618 5619 if (sc->hn_chim != NULL) { 5620 if ((sc->hn_flags & HN_FLAG_CHIM_REF) == 0) { 5621 hyperv_dmamem_free(&sc->hn_chim_dma, sc->hn_chim); 5622 } else { 5623 device_printf(sc->hn_dev, 5624 "chimney sending buffer is referenced"); 5625 } 5626 sc->hn_chim = NULL; 5627 } 5628 5629 if (sc->hn_tx_ring_cnt == 0) 5630 return; 5631 5632 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 5633 hn_tx_ring_destroy(&sc->hn_tx_ring[i]); 5634 5635 free(sc->hn_tx_ring, M_DEVBUF); 5636 sc->hn_tx_ring = NULL; 5637 5638 sc->hn_tx_ring_cnt = 0; 5639 sc->hn_tx_ring_inuse = 0; 5640 } 5641 5642 #ifdef HN_IFSTART_SUPPORT 5643 5644 static void 5645 hn_start_taskfunc(void *xtxr, int pending __unused) 5646 { 5647 struct hn_tx_ring *txr = xtxr; 5648 5649 mtx_lock(&txr->hn_tx_lock); 5650 hn_start_locked(txr, 0); 5651 mtx_unlock(&txr->hn_tx_lock); 5652 } 5653 5654 static int 5655 hn_start_locked(struct hn_tx_ring *txr, int len) 5656 { 5657 struct hn_softc *sc = txr->hn_sc; 5658 struct ifnet *ifp = sc->hn_ifp; 5659 int sched = 0; 5660 5661 KASSERT(hn_use_if_start, 5662 ("hn_start_locked is called, when if_start is disabled")); 5663 KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring")); 5664 mtx_assert(&txr->hn_tx_lock, MA_OWNED); 5665 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc")); 5666 5667 if (__predict_false(txr->hn_suspended)) 5668 return (0); 5669 5670 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != 5671 IFF_DRV_RUNNING) 5672 return (0); 5673 5674 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { 5675 struct hn_txdesc *txd; 5676 struct mbuf *m_head; 5677 int error; 5678 5679 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); 5680 if (m_head == NULL) 5681 break; 5682 5683 if (len > 0 && m_head->m_pkthdr.len > len) { 5684 /* 5685 * This sending could be time consuming; let callers 5686 * dispatch this packet sending (and sending of any 5687 * following up packets) to tx taskqueue. 5688 */ 5689 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 5690 sched = 1; 5691 break; 5692 } 5693 5694 #if defined(INET6) || defined(INET) 5695 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 5696 m_head = hn_tso_fixup(m_head); 5697 if (__predict_false(m_head == NULL)) { 5698 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 5699 continue; 5700 } 5701 } else if (m_head->m_pkthdr.csum_flags & 5702 (CSUM_IP_UDP | CSUM_IP_TCP | CSUM_IP6_UDP | CSUM_IP6_TCP)) { 5703 m_head = hn_set_hlen(m_head); 5704 if (__predict_false(m_head == NULL)) { 5705 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 5706 continue; 5707 } 5708 } 5709 #endif 5710 5711 txd = hn_txdesc_get(txr); 5712 if (txd == NULL) { 5713 txr->hn_no_txdescs++; 5714 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 5715 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 5716 break; 5717 } 5718 5719 error = hn_encap(ifp, txr, txd, &m_head); 5720 if (error) { 5721 /* Both txd and m_head are freed */ 5722 KASSERT(txr->hn_agg_txd == NULL, 5723 ("encap failed w/ pending aggregating txdesc")); 5724 continue; 5725 } 5726 5727 if (txr->hn_agg_pktleft == 0) { 5728 if (txr->hn_agg_txd != NULL) { 5729 KASSERT(m_head == NULL, 5730 ("pending mbuf for aggregating txdesc")); 5731 error = hn_flush_txagg(ifp, txr); 5732 if (__predict_false(error)) { 5733 atomic_set_int(&ifp->if_drv_flags, 5734 IFF_DRV_OACTIVE); 5735 break; 5736 } 5737 } else { 5738 KASSERT(m_head != NULL, ("mbuf was freed")); 5739 error = hn_txpkt(ifp, txr, txd); 5740 if (__predict_false(error)) { 5741 /* txd is freed, but m_head is not */ 5742 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 5743 atomic_set_int(&ifp->if_drv_flags, 5744 IFF_DRV_OACTIVE); 5745 break; 5746 } 5747 } 5748 } 5749 #ifdef INVARIANTS 5750 else { 5751 KASSERT(txr->hn_agg_txd != NULL, 5752 ("no aggregating txdesc")); 5753 KASSERT(m_head == NULL, 5754 ("pending mbuf for aggregating txdesc")); 5755 } 5756 #endif 5757 } 5758 5759 /* Flush pending aggerated transmission. */ 5760 if (txr->hn_agg_txd != NULL) 5761 hn_flush_txagg(ifp, txr); 5762 return (sched); 5763 } 5764 5765 static void 5766 hn_start(struct ifnet *ifp) 5767 { 5768 struct hn_softc *sc = ifp->if_softc; 5769 struct hn_tx_ring *txr = &sc->hn_tx_ring[0]; 5770 5771 if (txr->hn_sched_tx) 5772 goto do_sched; 5773 5774 if (mtx_trylock(&txr->hn_tx_lock)) { 5775 int sched; 5776 5777 sched = hn_start_locked(txr, txr->hn_direct_tx_size); 5778 mtx_unlock(&txr->hn_tx_lock); 5779 if (!sched) 5780 return; 5781 } 5782 do_sched: 5783 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); 5784 } 5785 5786 static void 5787 hn_start_txeof_taskfunc(void *xtxr, int pending __unused) 5788 { 5789 struct hn_tx_ring *txr = xtxr; 5790 5791 mtx_lock(&txr->hn_tx_lock); 5792 atomic_clear_int(&txr->hn_sc->hn_ifp->if_drv_flags, IFF_DRV_OACTIVE); 5793 hn_start_locked(txr, 0); 5794 mtx_unlock(&txr->hn_tx_lock); 5795 } 5796 5797 static void 5798 hn_start_txeof(struct hn_tx_ring *txr) 5799 { 5800 struct hn_softc *sc = txr->hn_sc; 5801 struct ifnet *ifp = sc->hn_ifp; 5802 5803 KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring")); 5804 5805 if (txr->hn_sched_tx) 5806 goto do_sched; 5807 5808 if (mtx_trylock(&txr->hn_tx_lock)) { 5809 int sched; 5810 5811 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 5812 sched = hn_start_locked(txr, txr->hn_direct_tx_size); 5813 mtx_unlock(&txr->hn_tx_lock); 5814 if (sched) { 5815 taskqueue_enqueue(txr->hn_tx_taskq, 5816 &txr->hn_tx_task); 5817 } 5818 } else { 5819 do_sched: 5820 /* 5821 * Release the OACTIVE earlier, with the hope, that 5822 * others could catch up. The task will clear the 5823 * flag again with the hn_tx_lock to avoid possible 5824 * races. 5825 */ 5826 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 5827 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); 5828 } 5829 } 5830 5831 #endif /* HN_IFSTART_SUPPORT */ 5832 5833 static int 5834 hn_xmit(struct hn_tx_ring *txr, int len) 5835 { 5836 struct hn_softc *sc = txr->hn_sc; 5837 struct ifnet *ifp = sc->hn_ifp; 5838 struct mbuf *m_head; 5839 int sched = 0; 5840 5841 mtx_assert(&txr->hn_tx_lock, MA_OWNED); 5842 #ifdef HN_IFSTART_SUPPORT 5843 KASSERT(hn_use_if_start == 0, 5844 ("hn_xmit is called, when if_start is enabled")); 5845 #endif 5846 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc")); 5847 5848 if (__predict_false(txr->hn_suspended)) 5849 return (0); 5850 5851 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || txr->hn_oactive) 5852 return (0); 5853 5854 while ((m_head = drbr_peek(ifp, txr->hn_mbuf_br)) != NULL) { 5855 struct hn_txdesc *txd; 5856 int error; 5857 5858 if (len > 0 && m_head->m_pkthdr.len > len) { 5859 /* 5860 * This sending could be time consuming; let callers 5861 * dispatch this packet sending (and sending of any 5862 * following up packets) to tx taskqueue. 5863 */ 5864 drbr_putback(ifp, txr->hn_mbuf_br, m_head); 5865 sched = 1; 5866 break; 5867 } 5868 5869 txd = hn_txdesc_get(txr); 5870 if (txd == NULL) { 5871 txr->hn_no_txdescs++; 5872 drbr_putback(ifp, txr->hn_mbuf_br, m_head); 5873 txr->hn_oactive = 1; 5874 break; 5875 } 5876 5877 error = hn_encap(ifp, txr, txd, &m_head); 5878 if (error) { 5879 /* Both txd and m_head are freed; discard */ 5880 KASSERT(txr->hn_agg_txd == NULL, 5881 ("encap failed w/ pending aggregating txdesc")); 5882 drbr_advance(ifp, txr->hn_mbuf_br); 5883 continue; 5884 } 5885 5886 if (txr->hn_agg_pktleft == 0) { 5887 if (txr->hn_agg_txd != NULL) { 5888 KASSERT(m_head == NULL, 5889 ("pending mbuf for aggregating txdesc")); 5890 error = hn_flush_txagg(ifp, txr); 5891 if (__predict_false(error)) { 5892 txr->hn_oactive = 1; 5893 break; 5894 } 5895 } else { 5896 KASSERT(m_head != NULL, ("mbuf was freed")); 5897 error = hn_txpkt(ifp, txr, txd); 5898 if (__predict_false(error)) { 5899 /* txd is freed, but m_head is not */ 5900 drbr_putback(ifp, txr->hn_mbuf_br, 5901 m_head); 5902 txr->hn_oactive = 1; 5903 break; 5904 } 5905 } 5906 } 5907 #ifdef INVARIANTS 5908 else { 5909 KASSERT(txr->hn_agg_txd != NULL, 5910 ("no aggregating txdesc")); 5911 KASSERT(m_head == NULL, 5912 ("pending mbuf for aggregating txdesc")); 5913 } 5914 #endif 5915 5916 /* Sent */ 5917 drbr_advance(ifp, txr->hn_mbuf_br); 5918 } 5919 5920 /* Flush pending aggerated transmission. */ 5921 if (txr->hn_agg_txd != NULL) 5922 hn_flush_txagg(ifp, txr); 5923 return (sched); 5924 } 5925 5926 static int 5927 hn_transmit(struct ifnet *ifp, struct mbuf *m) 5928 { 5929 struct hn_softc *sc = ifp->if_softc; 5930 struct hn_tx_ring *txr; 5931 int error, idx = 0; 5932 5933 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) { 5934 struct rm_priotracker pt; 5935 5936 rm_rlock(&sc->hn_vf_lock, &pt); 5937 if (__predict_true(sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) { 5938 struct mbuf *m_bpf = NULL; 5939 int obytes, omcast; 5940 5941 obytes = m->m_pkthdr.len; 5942 if (m->m_flags & M_MCAST) 5943 omcast = 1; 5944 5945 if (sc->hn_xvf_flags & HN_XVFFLAG_ACCBPF) { 5946 if (bpf_peers_present(ifp->if_bpf)) { 5947 m_bpf = m_copypacket(m, M_NOWAIT); 5948 if (m_bpf == NULL) { 5949 /* 5950 * Failed to grab a shallow 5951 * copy; tap now. 5952 */ 5953 ETHER_BPF_MTAP(ifp, m); 5954 } 5955 } 5956 } else { 5957 ETHER_BPF_MTAP(ifp, m); 5958 } 5959 5960 error = sc->hn_vf_ifp->if_transmit(sc->hn_vf_ifp, m); 5961 rm_runlock(&sc->hn_vf_lock, &pt); 5962 5963 if (m_bpf != NULL) { 5964 if (!error) 5965 ETHER_BPF_MTAP(ifp, m_bpf); 5966 m_freem(m_bpf); 5967 } 5968 5969 if (error == ENOBUFS) { 5970 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); 5971 } else if (error) { 5972 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 5973 } else { 5974 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 5975 if_inc_counter(ifp, IFCOUNTER_OBYTES, obytes); 5976 if (omcast) { 5977 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 5978 omcast); 5979 } 5980 } 5981 return (error); 5982 } 5983 rm_runlock(&sc->hn_vf_lock, &pt); 5984 } 5985 5986 #if defined(INET6) || defined(INET) 5987 /* 5988 * Perform TSO packet header fixup or get l2/l3 header length now, 5989 * since packet headers should be cache-hot. 5990 */ 5991 if (m->m_pkthdr.csum_flags & CSUM_TSO) { 5992 m = hn_tso_fixup(m); 5993 if (__predict_false(m == NULL)) { 5994 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 5995 return EIO; 5996 } 5997 } else if (m->m_pkthdr.csum_flags & 5998 (CSUM_IP_UDP | CSUM_IP_TCP | CSUM_IP6_UDP | CSUM_IP6_TCP)) { 5999 m = hn_set_hlen(m); 6000 if (__predict_false(m == NULL)) { 6001 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 6002 return EIO; 6003 } 6004 } 6005 #endif 6006 6007 /* 6008 * Select the TX ring based on flowid 6009 */ 6010 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 6011 #ifdef RSS 6012 uint32_t bid; 6013 6014 if (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m), 6015 &bid) == 0) 6016 idx = bid % sc->hn_tx_ring_inuse; 6017 else 6018 #endif 6019 { 6020 #if defined(INET6) || defined(INET) 6021 int tcpsyn = 0; 6022 6023 if (m->m_pkthdr.len < 128 && 6024 (m->m_pkthdr.csum_flags & 6025 (CSUM_IP_TCP | CSUM_IP6_TCP)) && 6026 (m->m_pkthdr.csum_flags & CSUM_TSO) == 0) { 6027 m = hn_check_tcpsyn(m, &tcpsyn); 6028 if (__predict_false(m == NULL)) { 6029 if_inc_counter(ifp, 6030 IFCOUNTER_OERRORS, 1); 6031 return (EIO); 6032 } 6033 } 6034 #else 6035 const int tcpsyn = 0; 6036 #endif 6037 if (tcpsyn) 6038 idx = 0; 6039 else 6040 idx = m->m_pkthdr.flowid % sc->hn_tx_ring_inuse; 6041 } 6042 } 6043 txr = &sc->hn_tx_ring[idx]; 6044 6045 error = drbr_enqueue(ifp, txr->hn_mbuf_br, m); 6046 if (error) { 6047 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); 6048 return error; 6049 } 6050 6051 if (txr->hn_oactive) 6052 return 0; 6053 6054 if (txr->hn_sched_tx) 6055 goto do_sched; 6056 6057 if (mtx_trylock(&txr->hn_tx_lock)) { 6058 int sched; 6059 6060 sched = hn_xmit(txr, txr->hn_direct_tx_size); 6061 mtx_unlock(&txr->hn_tx_lock); 6062 if (!sched) 6063 return 0; 6064 } 6065 do_sched: 6066 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); 6067 return 0; 6068 } 6069 6070 static void 6071 hn_tx_ring_qflush(struct hn_tx_ring *txr) 6072 { 6073 struct mbuf *m; 6074 6075 mtx_lock(&txr->hn_tx_lock); 6076 while ((m = buf_ring_dequeue_sc(txr->hn_mbuf_br)) != NULL) 6077 m_freem(m); 6078 mtx_unlock(&txr->hn_tx_lock); 6079 } 6080 6081 static void 6082 hn_xmit_qflush(struct ifnet *ifp) 6083 { 6084 struct hn_softc *sc = ifp->if_softc; 6085 struct rm_priotracker pt; 6086 int i; 6087 6088 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) 6089 hn_tx_ring_qflush(&sc->hn_tx_ring[i]); 6090 if_qflush(ifp); 6091 6092 rm_rlock(&sc->hn_vf_lock, &pt); 6093 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) 6094 sc->hn_vf_ifp->if_qflush(sc->hn_vf_ifp); 6095 rm_runlock(&sc->hn_vf_lock, &pt); 6096 } 6097 6098 static void 6099 hn_xmit_txeof(struct hn_tx_ring *txr) 6100 { 6101 6102 if (txr->hn_sched_tx) 6103 goto do_sched; 6104 6105 if (mtx_trylock(&txr->hn_tx_lock)) { 6106 int sched; 6107 6108 txr->hn_oactive = 0; 6109 sched = hn_xmit(txr, txr->hn_direct_tx_size); 6110 mtx_unlock(&txr->hn_tx_lock); 6111 if (sched) { 6112 taskqueue_enqueue(txr->hn_tx_taskq, 6113 &txr->hn_tx_task); 6114 } 6115 } else { 6116 do_sched: 6117 /* 6118 * Release the oactive earlier, with the hope, that 6119 * others could catch up. The task will clear the 6120 * oactive again with the hn_tx_lock to avoid possible 6121 * races. 6122 */ 6123 txr->hn_oactive = 0; 6124 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); 6125 } 6126 } 6127 6128 static void 6129 hn_xmit_taskfunc(void *xtxr, int pending __unused) 6130 { 6131 struct hn_tx_ring *txr = xtxr; 6132 6133 mtx_lock(&txr->hn_tx_lock); 6134 hn_xmit(txr, 0); 6135 mtx_unlock(&txr->hn_tx_lock); 6136 } 6137 6138 static void 6139 hn_xmit_txeof_taskfunc(void *xtxr, int pending __unused) 6140 { 6141 struct hn_tx_ring *txr = xtxr; 6142 6143 mtx_lock(&txr->hn_tx_lock); 6144 txr->hn_oactive = 0; 6145 hn_xmit(txr, 0); 6146 mtx_unlock(&txr->hn_tx_lock); 6147 } 6148 6149 static int 6150 hn_chan_attach(struct hn_softc *sc, struct vmbus_channel *chan) 6151 { 6152 struct vmbus_chan_br cbr; 6153 struct hn_rx_ring *rxr; 6154 struct hn_tx_ring *txr = NULL; 6155 int idx, error; 6156 6157 idx = vmbus_chan_subidx(chan); 6158 6159 /* 6160 * Link this channel to RX/TX ring. 6161 */ 6162 KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse, 6163 ("invalid channel index %d, should > 0 && < %d", 6164 idx, sc->hn_rx_ring_inuse)); 6165 rxr = &sc->hn_rx_ring[idx]; 6166 KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED) == 0, 6167 ("RX ring %d already attached", idx)); 6168 rxr->hn_rx_flags |= HN_RX_FLAG_ATTACHED; 6169 rxr->hn_chan = chan; 6170 6171 if (bootverbose) { 6172 if_printf(sc->hn_ifp, "link RX ring %d to chan%u\n", 6173 idx, vmbus_chan_id(chan)); 6174 } 6175 6176 if (idx < sc->hn_tx_ring_inuse) { 6177 txr = &sc->hn_tx_ring[idx]; 6178 KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED) == 0, 6179 ("TX ring %d already attached", idx)); 6180 txr->hn_tx_flags |= HN_TX_FLAG_ATTACHED; 6181 6182 txr->hn_chan = chan; 6183 if (bootverbose) { 6184 if_printf(sc->hn_ifp, "link TX ring %d to chan%u\n", 6185 idx, vmbus_chan_id(chan)); 6186 } 6187 } 6188 6189 /* Bind this channel to a proper CPU. */ 6190 vmbus_chan_cpu_set(chan, HN_RING_IDX2CPU(sc, idx)); 6191 6192 /* 6193 * Open this channel 6194 */ 6195 cbr.cbr = rxr->hn_br; 6196 cbr.cbr_paddr = rxr->hn_br_dma.hv_paddr; 6197 cbr.cbr_txsz = HN_TXBR_SIZE; 6198 cbr.cbr_rxsz = HN_RXBR_SIZE; 6199 error = vmbus_chan_open_br(chan, &cbr, NULL, 0, hn_chan_callback, rxr); 6200 if (error) { 6201 if (error == EISCONN) { 6202 if_printf(sc->hn_ifp, "bufring is connected after " 6203 "chan%u open failure\n", vmbus_chan_id(chan)); 6204 rxr->hn_rx_flags |= HN_RX_FLAG_BR_REF; 6205 } else { 6206 if_printf(sc->hn_ifp, "open chan%u failed: %d\n", 6207 vmbus_chan_id(chan), error); 6208 } 6209 } 6210 return (error); 6211 } 6212 6213 static void 6214 hn_chan_detach(struct hn_softc *sc, struct vmbus_channel *chan) 6215 { 6216 struct hn_rx_ring *rxr; 6217 int idx, error; 6218 6219 idx = vmbus_chan_subidx(chan); 6220 6221 /* 6222 * Link this channel to RX/TX ring. 6223 */ 6224 KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse, 6225 ("invalid channel index %d, should > 0 && < %d", 6226 idx, sc->hn_rx_ring_inuse)); 6227 rxr = &sc->hn_rx_ring[idx]; 6228 KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED), 6229 ("RX ring %d is not attached", idx)); 6230 rxr->hn_rx_flags &= ~HN_RX_FLAG_ATTACHED; 6231 6232 if (idx < sc->hn_tx_ring_inuse) { 6233 struct hn_tx_ring *txr = &sc->hn_tx_ring[idx]; 6234 6235 KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED), 6236 ("TX ring %d is not attached attached", idx)); 6237 txr->hn_tx_flags &= ~HN_TX_FLAG_ATTACHED; 6238 } 6239 6240 /* 6241 * Close this channel. 6242 * 6243 * NOTE: 6244 * Channel closing does _not_ destroy the target channel. 6245 */ 6246 error = vmbus_chan_close_direct(chan); 6247 if (error == EISCONN) { 6248 if_printf(sc->hn_ifp, "chan%u bufring is connected " 6249 "after being closed\n", vmbus_chan_id(chan)); 6250 rxr->hn_rx_flags |= HN_RX_FLAG_BR_REF; 6251 } else if (error) { 6252 if_printf(sc->hn_ifp, "chan%u close failed: %d\n", 6253 vmbus_chan_id(chan), error); 6254 } 6255 } 6256 6257 static int 6258 hn_attach_subchans(struct hn_softc *sc) 6259 { 6260 struct vmbus_channel **subchans; 6261 int subchan_cnt = sc->hn_rx_ring_inuse - 1; 6262 int i, error = 0; 6263 6264 KASSERT(subchan_cnt > 0, ("no sub-channels")); 6265 6266 /* Attach the sub-channels. */ 6267 subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt); 6268 for (i = 0; i < subchan_cnt; ++i) { 6269 int error1; 6270 6271 error1 = hn_chan_attach(sc, subchans[i]); 6272 if (error1) { 6273 error = error1; 6274 /* Move on; all channels will be detached later. */ 6275 } 6276 } 6277 vmbus_subchan_rel(subchans, subchan_cnt); 6278 6279 if (error) { 6280 if_printf(sc->hn_ifp, "sub-channels attach failed: %d\n", error); 6281 } else { 6282 if (bootverbose) { 6283 if_printf(sc->hn_ifp, "%d sub-channels attached\n", 6284 subchan_cnt); 6285 } 6286 } 6287 return (error); 6288 } 6289 6290 static void 6291 hn_detach_allchans(struct hn_softc *sc) 6292 { 6293 struct vmbus_channel **subchans; 6294 int subchan_cnt = sc->hn_rx_ring_inuse - 1; 6295 int i; 6296 6297 if (subchan_cnt == 0) 6298 goto back; 6299 6300 /* Detach the sub-channels. */ 6301 subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt); 6302 for (i = 0; i < subchan_cnt; ++i) 6303 hn_chan_detach(sc, subchans[i]); 6304 vmbus_subchan_rel(subchans, subchan_cnt); 6305 6306 back: 6307 /* 6308 * Detach the primary channel, _after_ all sub-channels 6309 * are detached. 6310 */ 6311 hn_chan_detach(sc, sc->hn_prichan); 6312 6313 /* Wait for sub-channels to be destroyed, if any. */ 6314 vmbus_subchan_drain(sc->hn_prichan); 6315 6316 #ifdef INVARIANTS 6317 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 6318 KASSERT((sc->hn_rx_ring[i].hn_rx_flags & 6319 HN_RX_FLAG_ATTACHED) == 0, 6320 ("%dth RX ring is still attached", i)); 6321 } 6322 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 6323 KASSERT((sc->hn_tx_ring[i].hn_tx_flags & 6324 HN_TX_FLAG_ATTACHED) == 0, 6325 ("%dth TX ring is still attached", i)); 6326 } 6327 #endif 6328 } 6329 6330 static int 6331 hn_synth_alloc_subchans(struct hn_softc *sc, int *nsubch) 6332 { 6333 struct vmbus_channel **subchans; 6334 int nchan, rxr_cnt, error; 6335 6336 nchan = *nsubch + 1; 6337 if (nchan == 1) { 6338 /* 6339 * Multiple RX/TX rings are not requested. 6340 */ 6341 *nsubch = 0; 6342 return (0); 6343 } 6344 6345 /* 6346 * Query RSS capabilities, e.g. # of RX rings, and # of indirect 6347 * table entries. 6348 */ 6349 error = hn_rndis_query_rsscaps(sc, &rxr_cnt); 6350 if (error) { 6351 /* No RSS; this is benign. */ 6352 *nsubch = 0; 6353 return (0); 6354 } 6355 if (bootverbose) { 6356 if_printf(sc->hn_ifp, "RX rings offered %u, requested %d\n", 6357 rxr_cnt, nchan); 6358 } 6359 6360 if (nchan > rxr_cnt) 6361 nchan = rxr_cnt; 6362 if (nchan == 1) { 6363 if_printf(sc->hn_ifp, "only 1 channel is supported, no vRSS\n"); 6364 *nsubch = 0; 6365 return (0); 6366 } 6367 6368 /* 6369 * Allocate sub-channels from NVS. 6370 */ 6371 *nsubch = nchan - 1; 6372 error = hn_nvs_alloc_subchans(sc, nsubch); 6373 if (error || *nsubch == 0) { 6374 /* Failed to allocate sub-channels. */ 6375 *nsubch = 0; 6376 return (0); 6377 } 6378 6379 /* 6380 * Wait for all sub-channels to become ready before moving on. 6381 */ 6382 subchans = vmbus_subchan_get(sc->hn_prichan, *nsubch); 6383 vmbus_subchan_rel(subchans, *nsubch); 6384 return (0); 6385 } 6386 6387 static bool 6388 hn_synth_attachable(const struct hn_softc *sc) 6389 { 6390 int i; 6391 6392 if (sc->hn_flags & HN_FLAG_ERRORS) 6393 return (false); 6394 6395 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 6396 const struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 6397 6398 if (rxr->hn_rx_flags & HN_RX_FLAG_BR_REF) 6399 return (false); 6400 } 6401 return (true); 6402 } 6403 6404 /* 6405 * Make sure that the RX filter is zero after the successful 6406 * RNDIS initialization. 6407 * 6408 * NOTE: 6409 * Under certain conditions on certain versions of Hyper-V, 6410 * the RNDIS rxfilter is _not_ zero on the hypervisor side 6411 * after the successful RNDIS initialization, which breaks 6412 * the assumption of any following code (well, it breaks the 6413 * RNDIS API contract actually). Clear the RNDIS rxfilter 6414 * explicitly, drain packets sneaking through, and drain the 6415 * interrupt taskqueues scheduled due to the stealth packets. 6416 */ 6417 static void 6418 hn_rndis_init_fixat(struct hn_softc *sc, int nchan) 6419 { 6420 6421 hn_disable_rx(sc); 6422 hn_drain_rxtx(sc, nchan); 6423 } 6424 6425 static int 6426 hn_synth_attach(struct hn_softc *sc, int mtu) 6427 { 6428 #define ATTACHED_NVS 0x0002 6429 #define ATTACHED_RNDIS 0x0004 6430 6431 struct ndis_rssprm_toeplitz *rss = &sc->hn_rss; 6432 int error, nsubch, nchan = 1, i, rndis_inited; 6433 uint32_t old_caps, attached = 0; 6434 6435 KASSERT((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0, 6436 ("synthetic parts were attached")); 6437 6438 if (!hn_synth_attachable(sc)) 6439 return (ENXIO); 6440 6441 /* Save capabilities for later verification. */ 6442 old_caps = sc->hn_caps; 6443 sc->hn_caps = 0; 6444 6445 /* Clear RSS stuffs. */ 6446 sc->hn_rss_ind_size = 0; 6447 sc->hn_rss_hash = 0; 6448 sc->hn_rss_hcap = 0; 6449 6450 /* 6451 * Attach the primary channel _before_ attaching NVS and RNDIS. 6452 */ 6453 error = hn_chan_attach(sc, sc->hn_prichan); 6454 if (error) 6455 goto failed; 6456 6457 /* 6458 * Attach NVS. 6459 */ 6460 error = hn_nvs_attach(sc, mtu); 6461 if (error) 6462 goto failed; 6463 attached |= ATTACHED_NVS; 6464 6465 /* 6466 * Attach RNDIS _after_ NVS is attached. 6467 */ 6468 error = hn_rndis_attach(sc, mtu, &rndis_inited); 6469 if (rndis_inited) 6470 attached |= ATTACHED_RNDIS; 6471 if (error) 6472 goto failed; 6473 6474 /* 6475 * Make sure capabilities are not changed. 6476 */ 6477 if (device_is_attached(sc->hn_dev) && old_caps != sc->hn_caps) { 6478 if_printf(sc->hn_ifp, "caps mismatch old 0x%08x, new 0x%08x\n", 6479 old_caps, sc->hn_caps); 6480 error = ENXIO; 6481 goto failed; 6482 } 6483 6484 /* 6485 * Allocate sub-channels for multi-TX/RX rings. 6486 * 6487 * NOTE: 6488 * The # of RX rings that can be used is equivalent to the # of 6489 * channels to be requested. 6490 */ 6491 nsubch = sc->hn_rx_ring_cnt - 1; 6492 error = hn_synth_alloc_subchans(sc, &nsubch); 6493 if (error) 6494 goto failed; 6495 /* NOTE: _Full_ synthetic parts detach is required now. */ 6496 sc->hn_flags |= HN_FLAG_SYNTH_ATTACHED; 6497 6498 /* 6499 * Set the # of TX/RX rings that could be used according to 6500 * the # of channels that NVS offered. 6501 */ 6502 nchan = nsubch + 1; 6503 hn_set_ring_inuse(sc, nchan); 6504 if (nchan == 1) { 6505 /* Only the primary channel can be used; done */ 6506 goto back; 6507 } 6508 6509 /* 6510 * Attach the sub-channels. 6511 * 6512 * NOTE: hn_set_ring_inuse() _must_ have been called. 6513 */ 6514 error = hn_attach_subchans(sc); 6515 if (error) 6516 goto failed; 6517 6518 /* 6519 * Configure RSS key and indirect table _after_ all sub-channels 6520 * are attached. 6521 */ 6522 if ((sc->hn_flags & HN_FLAG_HAS_RSSKEY) == 0) { 6523 /* 6524 * RSS key is not set yet; set it to the default RSS key. 6525 */ 6526 if (bootverbose) 6527 if_printf(sc->hn_ifp, "setup default RSS key\n"); 6528 #ifdef RSS 6529 rss_getkey(rss->rss_key); 6530 #else 6531 memcpy(rss->rss_key, hn_rss_key_default, sizeof(rss->rss_key)); 6532 #endif 6533 sc->hn_flags |= HN_FLAG_HAS_RSSKEY; 6534 } 6535 6536 if ((sc->hn_flags & HN_FLAG_HAS_RSSIND) == 0) { 6537 /* 6538 * RSS indirect table is not set yet; set it up in round- 6539 * robin fashion. 6540 */ 6541 if (bootverbose) { 6542 if_printf(sc->hn_ifp, "setup default RSS indirect " 6543 "table\n"); 6544 } 6545 for (i = 0; i < NDIS_HASH_INDCNT; ++i) { 6546 uint32_t subidx; 6547 6548 #ifdef RSS 6549 subidx = rss_get_indirection_to_bucket(i); 6550 #else 6551 subidx = i; 6552 #endif 6553 rss->rss_ind[i] = subidx % nchan; 6554 } 6555 sc->hn_flags |= HN_FLAG_HAS_RSSIND; 6556 } else { 6557 /* 6558 * # of usable channels may be changed, so we have to 6559 * make sure that all entries in RSS indirect table 6560 * are valid. 6561 * 6562 * NOTE: hn_set_ring_inuse() _must_ have been called. 6563 */ 6564 hn_rss_ind_fixup(sc); 6565 } 6566 6567 sc->hn_rss_hash = sc->hn_rss_hcap; 6568 if ((sc->hn_flags & HN_FLAG_RXVF) || 6569 (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) { 6570 /* NOTE: Don't reconfigure RSS; will do immediately. */ 6571 hn_vf_rss_fixup(sc, false); 6572 } 6573 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE); 6574 if (error) 6575 goto failed; 6576 back: 6577 /* 6578 * Fixup transmission aggregation setup. 6579 */ 6580 hn_set_txagg(sc); 6581 hn_rndis_init_fixat(sc, nchan); 6582 return (0); 6583 6584 failed: 6585 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) { 6586 hn_rndis_init_fixat(sc, nchan); 6587 hn_synth_detach(sc); 6588 } else { 6589 if (attached & ATTACHED_RNDIS) { 6590 hn_rndis_init_fixat(sc, nchan); 6591 hn_rndis_detach(sc); 6592 } 6593 if (attached & ATTACHED_NVS) 6594 hn_nvs_detach(sc); 6595 hn_chan_detach(sc, sc->hn_prichan); 6596 /* Restore old capabilities. */ 6597 sc->hn_caps = old_caps; 6598 } 6599 return (error); 6600 6601 #undef ATTACHED_RNDIS 6602 #undef ATTACHED_NVS 6603 } 6604 6605 /* 6606 * NOTE: 6607 * The interface must have been suspended though hn_suspend(), before 6608 * this function get called. 6609 */ 6610 static void 6611 hn_synth_detach(struct hn_softc *sc) 6612 { 6613 6614 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, 6615 ("synthetic parts were not attached")); 6616 6617 /* Detach the RNDIS first. */ 6618 hn_rndis_detach(sc); 6619 6620 /* Detach NVS. */ 6621 hn_nvs_detach(sc); 6622 6623 /* Detach all of the channels. */ 6624 hn_detach_allchans(sc); 6625 6626 sc->hn_flags &= ~HN_FLAG_SYNTH_ATTACHED; 6627 } 6628 6629 static void 6630 hn_set_ring_inuse(struct hn_softc *sc, int ring_cnt) 6631 { 6632 KASSERT(ring_cnt > 0 && ring_cnt <= sc->hn_rx_ring_cnt, 6633 ("invalid ring count %d", ring_cnt)); 6634 6635 if (sc->hn_tx_ring_cnt > ring_cnt) 6636 sc->hn_tx_ring_inuse = ring_cnt; 6637 else 6638 sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt; 6639 sc->hn_rx_ring_inuse = ring_cnt; 6640 6641 #ifdef RSS 6642 if (sc->hn_rx_ring_inuse != rss_getnumbuckets()) { 6643 if_printf(sc->hn_ifp, "# of RX rings (%d) does not match " 6644 "# of RSS buckets (%d)\n", sc->hn_rx_ring_inuse, 6645 rss_getnumbuckets()); 6646 } 6647 #endif 6648 6649 if (bootverbose) { 6650 if_printf(sc->hn_ifp, "%d TX ring, %d RX ring\n", 6651 sc->hn_tx_ring_inuse, sc->hn_rx_ring_inuse); 6652 } 6653 } 6654 6655 static void 6656 hn_chan_drain(struct hn_softc *sc, struct vmbus_channel *chan) 6657 { 6658 6659 /* 6660 * NOTE: 6661 * The TX bufring will not be drained by the hypervisor, 6662 * if the primary channel is revoked. 6663 */ 6664 while (!vmbus_chan_rx_empty(chan) || 6665 (!vmbus_chan_is_revoked(sc->hn_prichan) && 6666 !vmbus_chan_tx_empty(chan))) 6667 pause("waitch", 1); 6668 vmbus_chan_intr_drain(chan); 6669 } 6670 6671 static void 6672 hn_disable_rx(struct hn_softc *sc) 6673 { 6674 6675 /* 6676 * Disable RX by clearing RX filter forcefully. 6677 */ 6678 sc->hn_rx_filter = NDIS_PACKET_TYPE_NONE; 6679 hn_rndis_set_rxfilter(sc, sc->hn_rx_filter); /* ignore error */ 6680 6681 /* 6682 * Give RNDIS enough time to flush all pending data packets. 6683 */ 6684 pause("waitrx", (200 * hz) / 1000); 6685 } 6686 6687 /* 6688 * NOTE: 6689 * RX/TX _must_ have been suspended/disabled, before this function 6690 * is called. 6691 */ 6692 static void 6693 hn_drain_rxtx(struct hn_softc *sc, int nchan) 6694 { 6695 struct vmbus_channel **subch = NULL; 6696 int nsubch; 6697 6698 /* 6699 * Drain RX/TX bufrings and interrupts. 6700 */ 6701 nsubch = nchan - 1; 6702 if (nsubch > 0) 6703 subch = vmbus_subchan_get(sc->hn_prichan, nsubch); 6704 6705 if (subch != NULL) { 6706 int i; 6707 6708 for (i = 0; i < nsubch; ++i) 6709 hn_chan_drain(sc, subch[i]); 6710 } 6711 hn_chan_drain(sc, sc->hn_prichan); 6712 6713 if (subch != NULL) 6714 vmbus_subchan_rel(subch, nsubch); 6715 } 6716 6717 static void 6718 hn_suspend_data(struct hn_softc *sc) 6719 { 6720 struct hn_tx_ring *txr; 6721 int i; 6722 6723 HN_LOCK_ASSERT(sc); 6724 6725 /* 6726 * Suspend TX. 6727 */ 6728 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { 6729 txr = &sc->hn_tx_ring[i]; 6730 6731 mtx_lock(&txr->hn_tx_lock); 6732 txr->hn_suspended = 1; 6733 mtx_unlock(&txr->hn_tx_lock); 6734 /* No one is able send more packets now. */ 6735 6736 /* 6737 * Wait for all pending sends to finish. 6738 * 6739 * NOTE: 6740 * We will _not_ receive all pending send-done, if the 6741 * primary channel is revoked. 6742 */ 6743 while (hn_tx_ring_pending(txr) && 6744 !vmbus_chan_is_revoked(sc->hn_prichan)) 6745 pause("hnwtx", 1 /* 1 tick */); 6746 } 6747 6748 /* 6749 * Disable RX. 6750 */ 6751 hn_disable_rx(sc); 6752 6753 /* 6754 * Drain RX/TX. 6755 */ 6756 hn_drain_rxtx(sc, sc->hn_rx_ring_inuse); 6757 6758 /* 6759 * Drain any pending TX tasks. 6760 * 6761 * NOTE: 6762 * The above hn_drain_rxtx() can dispatch TX tasks, so the TX 6763 * tasks will have to be drained _after_ the above hn_drain_rxtx(). 6764 */ 6765 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { 6766 txr = &sc->hn_tx_ring[i]; 6767 6768 taskqueue_drain(txr->hn_tx_taskq, &txr->hn_tx_task); 6769 taskqueue_drain(txr->hn_tx_taskq, &txr->hn_txeof_task); 6770 } 6771 } 6772 6773 static void 6774 hn_suspend_mgmt_taskfunc(void *xsc, int pending __unused) 6775 { 6776 6777 ((struct hn_softc *)xsc)->hn_mgmt_taskq = NULL; 6778 } 6779 6780 static void 6781 hn_suspend_mgmt(struct hn_softc *sc) 6782 { 6783 struct task task; 6784 6785 HN_LOCK_ASSERT(sc); 6786 6787 /* 6788 * Make sure that hn_mgmt_taskq0 can nolonger be accessed 6789 * through hn_mgmt_taskq. 6790 */ 6791 TASK_INIT(&task, 0, hn_suspend_mgmt_taskfunc, sc); 6792 vmbus_chan_run_task(sc->hn_prichan, &task); 6793 6794 /* 6795 * Make sure that all pending management tasks are completed. 6796 */ 6797 taskqueue_drain(sc->hn_mgmt_taskq0, &sc->hn_netchg_init); 6798 taskqueue_drain_timeout(sc->hn_mgmt_taskq0, &sc->hn_netchg_status); 6799 taskqueue_drain_all(sc->hn_mgmt_taskq0); 6800 } 6801 6802 static void 6803 hn_suspend(struct hn_softc *sc) 6804 { 6805 6806 /* Disable polling. */ 6807 hn_polling(sc, 0); 6808 6809 /* 6810 * If the non-transparent mode VF is activated, the synthetic 6811 * device is receiving packets, so the data path of the 6812 * synthetic device must be suspended. 6813 */ 6814 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) || 6815 (sc->hn_flags & HN_FLAG_RXVF)) 6816 hn_suspend_data(sc); 6817 hn_suspend_mgmt(sc); 6818 } 6819 6820 static void 6821 hn_resume_tx(struct hn_softc *sc, int tx_ring_cnt) 6822 { 6823 int i; 6824 6825 KASSERT(tx_ring_cnt <= sc->hn_tx_ring_cnt, 6826 ("invalid TX ring count %d", tx_ring_cnt)); 6827 6828 for (i = 0; i < tx_ring_cnt; ++i) { 6829 struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; 6830 6831 mtx_lock(&txr->hn_tx_lock); 6832 txr->hn_suspended = 0; 6833 mtx_unlock(&txr->hn_tx_lock); 6834 } 6835 } 6836 6837 static void 6838 hn_resume_data(struct hn_softc *sc) 6839 { 6840 int i; 6841 6842 HN_LOCK_ASSERT(sc); 6843 6844 /* 6845 * Re-enable RX. 6846 */ 6847 hn_rxfilter_config(sc); 6848 6849 /* 6850 * Make sure to clear suspend status on "all" TX rings, 6851 * since hn_tx_ring_inuse can be changed after 6852 * hn_suspend_data(). 6853 */ 6854 hn_resume_tx(sc, sc->hn_tx_ring_cnt); 6855 6856 #ifdef HN_IFSTART_SUPPORT 6857 if (!hn_use_if_start) 6858 #endif 6859 { 6860 /* 6861 * Flush unused drbrs, since hn_tx_ring_inuse may be 6862 * reduced. 6863 */ 6864 for (i = sc->hn_tx_ring_inuse; i < sc->hn_tx_ring_cnt; ++i) 6865 hn_tx_ring_qflush(&sc->hn_tx_ring[i]); 6866 } 6867 6868 /* 6869 * Kick start TX. 6870 */ 6871 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { 6872 struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; 6873 6874 /* 6875 * Use txeof task, so that any pending oactive can be 6876 * cleared properly. 6877 */ 6878 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); 6879 } 6880 } 6881 6882 static void 6883 hn_resume_mgmt(struct hn_softc *sc) 6884 { 6885 6886 sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0; 6887 6888 /* 6889 * Kick off network change detection, if it was pending. 6890 * If no network change was pending, start link status 6891 * checks, which is more lightweight than network change 6892 * detection. 6893 */ 6894 if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG) 6895 hn_change_network(sc); 6896 else 6897 hn_update_link_status(sc); 6898 } 6899 6900 static void 6901 hn_resume(struct hn_softc *sc) 6902 { 6903 6904 /* 6905 * If the non-transparent mode VF is activated, the synthetic 6906 * device have to receive packets, so the data path of the 6907 * synthetic device must be resumed. 6908 */ 6909 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) || 6910 (sc->hn_flags & HN_FLAG_RXVF)) 6911 hn_resume_data(sc); 6912 6913 /* 6914 * Don't resume link status change if VF is attached/activated. 6915 * - In the non-transparent VF mode, the synthetic device marks 6916 * link down until the VF is deactivated; i.e. VF is down. 6917 * - In transparent VF mode, VF's media status is used until 6918 * the VF is detached. 6919 */ 6920 if ((sc->hn_flags & HN_FLAG_RXVF) == 0 && 6921 !(hn_xpnt_vf && sc->hn_vf_ifp != NULL)) 6922 hn_resume_mgmt(sc); 6923 6924 /* 6925 * Re-enable polling if this interface is running and 6926 * the polling is requested. 6927 */ 6928 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) && sc->hn_pollhz > 0) 6929 hn_polling(sc, sc->hn_pollhz); 6930 } 6931 6932 static void 6933 hn_rndis_rx_status(struct hn_softc *sc, const void *data, int dlen) 6934 { 6935 const struct rndis_status_msg *msg; 6936 int ofs; 6937 6938 if (dlen < sizeof(*msg)) { 6939 if_printf(sc->hn_ifp, "invalid RNDIS status\n"); 6940 return; 6941 } 6942 msg = data; 6943 6944 switch (msg->rm_status) { 6945 case RNDIS_STATUS_MEDIA_CONNECT: 6946 case RNDIS_STATUS_MEDIA_DISCONNECT: 6947 hn_update_link_status(sc); 6948 break; 6949 6950 case RNDIS_STATUS_TASK_OFFLOAD_CURRENT_CONFIG: 6951 case RNDIS_STATUS_LINK_SPEED_CHANGE: 6952 /* Not really useful; ignore. */ 6953 break; 6954 6955 case RNDIS_STATUS_NETWORK_CHANGE: 6956 ofs = RNDIS_STBUFOFFSET_ABS(msg->rm_stbufoffset); 6957 if (dlen < ofs + msg->rm_stbuflen || 6958 msg->rm_stbuflen < sizeof(uint32_t)) { 6959 if_printf(sc->hn_ifp, "network changed\n"); 6960 } else { 6961 uint32_t change; 6962 6963 memcpy(&change, ((const uint8_t *)msg) + ofs, 6964 sizeof(change)); 6965 if_printf(sc->hn_ifp, "network changed, change %u\n", 6966 change); 6967 } 6968 hn_change_network(sc); 6969 break; 6970 6971 default: 6972 if_printf(sc->hn_ifp, "unknown RNDIS status 0x%08x\n", 6973 msg->rm_status); 6974 break; 6975 } 6976 } 6977 6978 static int 6979 hn_rndis_rxinfo(const void *info_data, int info_dlen, struct hn_rxinfo *info) 6980 { 6981 const struct rndis_pktinfo *pi = info_data; 6982 uint32_t mask = 0; 6983 6984 while (info_dlen != 0) { 6985 const void *data; 6986 uint32_t dlen; 6987 6988 if (__predict_false(info_dlen < sizeof(*pi))) 6989 return (EINVAL); 6990 if (__predict_false(info_dlen < pi->rm_size)) 6991 return (EINVAL); 6992 info_dlen -= pi->rm_size; 6993 6994 if (__predict_false(pi->rm_size & RNDIS_PKTINFO_SIZE_ALIGNMASK)) 6995 return (EINVAL); 6996 if (__predict_false(pi->rm_size < pi->rm_pktinfooffset)) 6997 return (EINVAL); 6998 dlen = pi->rm_size - pi->rm_pktinfooffset; 6999 data = pi->rm_data; 7000 7001 switch (pi->rm_type) { 7002 case NDIS_PKTINFO_TYPE_VLAN: 7003 if (__predict_false(dlen < NDIS_VLAN_INFO_SIZE)) 7004 return (EINVAL); 7005 info->vlan_info = *((const uint32_t *)data); 7006 mask |= HN_RXINFO_VLAN; 7007 break; 7008 7009 case NDIS_PKTINFO_TYPE_CSUM: 7010 if (__predict_false(dlen < NDIS_RXCSUM_INFO_SIZE)) 7011 return (EINVAL); 7012 info->csum_info = *((const uint32_t *)data); 7013 mask |= HN_RXINFO_CSUM; 7014 break; 7015 7016 case HN_NDIS_PKTINFO_TYPE_HASHVAL: 7017 if (__predict_false(dlen < HN_NDIS_HASH_VALUE_SIZE)) 7018 return (EINVAL); 7019 info->hash_value = *((const uint32_t *)data); 7020 mask |= HN_RXINFO_HASHVAL; 7021 break; 7022 7023 case HN_NDIS_PKTINFO_TYPE_HASHINF: 7024 if (__predict_false(dlen < HN_NDIS_HASH_INFO_SIZE)) 7025 return (EINVAL); 7026 info->hash_info = *((const uint32_t *)data); 7027 mask |= HN_RXINFO_HASHINF; 7028 break; 7029 7030 default: 7031 goto next; 7032 } 7033 7034 if (mask == HN_RXINFO_ALL) { 7035 /* All found; done */ 7036 break; 7037 } 7038 next: 7039 pi = (const struct rndis_pktinfo *) 7040 ((const uint8_t *)pi + pi->rm_size); 7041 } 7042 7043 /* 7044 * Final fixup. 7045 * - If there is no hash value, invalidate the hash info. 7046 */ 7047 if ((mask & HN_RXINFO_HASHVAL) == 0) 7048 info->hash_info = HN_NDIS_HASH_INFO_INVALID; 7049 return (0); 7050 } 7051 7052 static __inline bool 7053 hn_rndis_check_overlap(int off, int len, int check_off, int check_len) 7054 { 7055 7056 if (off < check_off) { 7057 if (__predict_true(off + len <= check_off)) 7058 return (false); 7059 } else if (off > check_off) { 7060 if (__predict_true(check_off + check_len <= off)) 7061 return (false); 7062 } 7063 return (true); 7064 } 7065 7066 static void 7067 hn_rndis_rx_data(struct hn_rx_ring *rxr, const void *data, int dlen) 7068 { 7069 const struct rndis_packet_msg *pkt; 7070 struct hn_rxinfo info; 7071 int data_off, pktinfo_off, data_len, pktinfo_len; 7072 7073 /* 7074 * Check length. 7075 */ 7076 if (__predict_false(dlen < sizeof(*pkt))) { 7077 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg\n"); 7078 return; 7079 } 7080 pkt = data; 7081 7082 if (__predict_false(dlen < pkt->rm_len)) { 7083 if_printf(rxr->hn_ifp, "truncated RNDIS packet msg, " 7084 "dlen %d, msglen %u\n", dlen, pkt->rm_len); 7085 return; 7086 } 7087 if (__predict_false(pkt->rm_len < 7088 pkt->rm_datalen + pkt->rm_oobdatalen + pkt->rm_pktinfolen)) { 7089 if_printf(rxr->hn_ifp, "invalid RNDIS packet msglen, " 7090 "msglen %u, data %u, oob %u, pktinfo %u\n", 7091 pkt->rm_len, pkt->rm_datalen, pkt->rm_oobdatalen, 7092 pkt->rm_pktinfolen); 7093 return; 7094 } 7095 if (__predict_false(pkt->rm_datalen == 0)) { 7096 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, no data\n"); 7097 return; 7098 } 7099 7100 /* 7101 * Check offests. 7102 */ 7103 #define IS_OFFSET_INVALID(ofs) \ 7104 ((ofs) < RNDIS_PACKET_MSG_OFFSET_MIN || \ 7105 ((ofs) & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK)) 7106 7107 /* XXX Hyper-V does not meet data offset alignment requirement */ 7108 if (__predict_false(pkt->rm_dataoffset < RNDIS_PACKET_MSG_OFFSET_MIN)) { 7109 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7110 "data offset %u\n", pkt->rm_dataoffset); 7111 return; 7112 } 7113 if (__predict_false(pkt->rm_oobdataoffset > 0 && 7114 IS_OFFSET_INVALID(pkt->rm_oobdataoffset))) { 7115 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7116 "oob offset %u\n", pkt->rm_oobdataoffset); 7117 return; 7118 } 7119 if (__predict_true(pkt->rm_pktinfooffset > 0) && 7120 __predict_false(IS_OFFSET_INVALID(pkt->rm_pktinfooffset))) { 7121 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7122 "pktinfo offset %u\n", pkt->rm_pktinfooffset); 7123 return; 7124 } 7125 7126 #undef IS_OFFSET_INVALID 7127 7128 data_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_dataoffset); 7129 data_len = pkt->rm_datalen; 7130 pktinfo_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_pktinfooffset); 7131 pktinfo_len = pkt->rm_pktinfolen; 7132 7133 /* 7134 * Check OOB coverage. 7135 */ 7136 if (__predict_false(pkt->rm_oobdatalen != 0)) { 7137 int oob_off, oob_len; 7138 7139 if_printf(rxr->hn_ifp, "got oobdata\n"); 7140 oob_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_oobdataoffset); 7141 oob_len = pkt->rm_oobdatalen; 7142 7143 if (__predict_false(oob_off + oob_len > pkt->rm_len)) { 7144 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7145 "oob overflow, msglen %u, oob abs %d len %d\n", 7146 pkt->rm_len, oob_off, oob_len); 7147 return; 7148 } 7149 7150 /* 7151 * Check against data. 7152 */ 7153 if (hn_rndis_check_overlap(oob_off, oob_len, 7154 data_off, data_len)) { 7155 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7156 "oob overlaps data, oob abs %d len %d, " 7157 "data abs %d len %d\n", 7158 oob_off, oob_len, data_off, data_len); 7159 return; 7160 } 7161 7162 /* 7163 * Check against pktinfo. 7164 */ 7165 if (pktinfo_len != 0 && 7166 hn_rndis_check_overlap(oob_off, oob_len, 7167 pktinfo_off, pktinfo_len)) { 7168 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7169 "oob overlaps pktinfo, oob abs %d len %d, " 7170 "pktinfo abs %d len %d\n", 7171 oob_off, oob_len, pktinfo_off, pktinfo_len); 7172 return; 7173 } 7174 } 7175 7176 /* 7177 * Check per-packet-info coverage and find useful per-packet-info. 7178 */ 7179 info.vlan_info = HN_NDIS_VLAN_INFO_INVALID; 7180 info.csum_info = HN_NDIS_RXCSUM_INFO_INVALID; 7181 info.hash_info = HN_NDIS_HASH_INFO_INVALID; 7182 if (__predict_true(pktinfo_len != 0)) { 7183 bool overlap; 7184 int error; 7185 7186 if (__predict_false(pktinfo_off + pktinfo_len > pkt->rm_len)) { 7187 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7188 "pktinfo overflow, msglen %u, " 7189 "pktinfo abs %d len %d\n", 7190 pkt->rm_len, pktinfo_off, pktinfo_len); 7191 return; 7192 } 7193 7194 /* 7195 * Check packet info coverage. 7196 */ 7197 overlap = hn_rndis_check_overlap(pktinfo_off, pktinfo_len, 7198 data_off, data_len); 7199 if (__predict_false(overlap)) { 7200 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7201 "pktinfo overlap data, pktinfo abs %d len %d, " 7202 "data abs %d len %d\n", 7203 pktinfo_off, pktinfo_len, data_off, data_len); 7204 return; 7205 } 7206 7207 /* 7208 * Find useful per-packet-info. 7209 */ 7210 error = hn_rndis_rxinfo(((const uint8_t *)pkt) + pktinfo_off, 7211 pktinfo_len, &info); 7212 if (__predict_false(error)) { 7213 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg " 7214 "pktinfo\n"); 7215 return; 7216 } 7217 } 7218 7219 if (__predict_false(data_off + data_len > pkt->rm_len)) { 7220 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7221 "data overflow, msglen %u, data abs %d len %d\n", 7222 pkt->rm_len, data_off, data_len); 7223 return; 7224 } 7225 hn_rxpkt(rxr, ((const uint8_t *)pkt) + data_off, data_len, &info); 7226 } 7227 7228 static __inline void 7229 hn_rndis_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen) 7230 { 7231 const struct rndis_msghdr *hdr; 7232 7233 if (__predict_false(dlen < sizeof(*hdr))) { 7234 if_printf(rxr->hn_ifp, "invalid RNDIS msg\n"); 7235 return; 7236 } 7237 hdr = data; 7238 7239 if (__predict_true(hdr->rm_type == REMOTE_NDIS_PACKET_MSG)) { 7240 /* Hot data path. */ 7241 hn_rndis_rx_data(rxr, data, dlen); 7242 /* Done! */ 7243 return; 7244 } 7245 7246 if (hdr->rm_type == REMOTE_NDIS_INDICATE_STATUS_MSG) 7247 hn_rndis_rx_status(rxr->hn_ifp->if_softc, data, dlen); 7248 else 7249 hn_rndis_rx_ctrl(rxr->hn_ifp->if_softc, data, dlen); 7250 } 7251 7252 static void 7253 hn_nvs_handle_notify(struct hn_softc *sc, const struct vmbus_chanpkt_hdr *pkt) 7254 { 7255 const struct hn_nvs_hdr *hdr; 7256 7257 if (VMBUS_CHANPKT_DATALEN(pkt) < sizeof(*hdr)) { 7258 if_printf(sc->hn_ifp, "invalid nvs notify\n"); 7259 return; 7260 } 7261 hdr = VMBUS_CHANPKT_CONST_DATA(pkt); 7262 7263 if (hdr->nvs_type == HN_NVS_TYPE_TXTBL_NOTE) { 7264 /* Useless; ignore */ 7265 return; 7266 } 7267 if_printf(sc->hn_ifp, "got notify, nvs type %u\n", hdr->nvs_type); 7268 } 7269 7270 static void 7271 hn_nvs_handle_comp(struct hn_softc *sc, struct vmbus_channel *chan, 7272 const struct vmbus_chanpkt_hdr *pkt) 7273 { 7274 struct hn_nvs_sendctx *sndc; 7275 7276 sndc = (struct hn_nvs_sendctx *)(uintptr_t)pkt->cph_xactid; 7277 sndc->hn_cb(sndc, sc, chan, VMBUS_CHANPKT_CONST_DATA(pkt), 7278 VMBUS_CHANPKT_DATALEN(pkt)); 7279 /* 7280 * NOTE: 7281 * 'sndc' CAN NOT be accessed anymore, since it can be freed by 7282 * its callback. 7283 */ 7284 } 7285 7286 static void 7287 hn_nvs_handle_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan, 7288 const struct vmbus_chanpkt_hdr *pkthdr) 7289 { 7290 const struct vmbus_chanpkt_rxbuf *pkt; 7291 const struct hn_nvs_hdr *nvs_hdr; 7292 int count, i, hlen; 7293 7294 if (__predict_false(VMBUS_CHANPKT_DATALEN(pkthdr) < sizeof(*nvs_hdr))) { 7295 if_printf(rxr->hn_ifp, "invalid nvs RNDIS\n"); 7296 return; 7297 } 7298 nvs_hdr = VMBUS_CHANPKT_CONST_DATA(pkthdr); 7299 7300 /* Make sure that this is a RNDIS message. */ 7301 if (__predict_false(nvs_hdr->nvs_type != HN_NVS_TYPE_RNDIS)) { 7302 if_printf(rxr->hn_ifp, "nvs type %u, not RNDIS\n", 7303 nvs_hdr->nvs_type); 7304 return; 7305 } 7306 7307 hlen = VMBUS_CHANPKT_GETLEN(pkthdr->cph_hlen); 7308 if (__predict_false(hlen < sizeof(*pkt))) { 7309 if_printf(rxr->hn_ifp, "invalid rxbuf chanpkt\n"); 7310 return; 7311 } 7312 pkt = (const struct vmbus_chanpkt_rxbuf *)pkthdr; 7313 7314 if (__predict_false(pkt->cp_rxbuf_id != HN_NVS_RXBUF_SIG)) { 7315 if_printf(rxr->hn_ifp, "invalid rxbuf_id 0x%08x\n", 7316 pkt->cp_rxbuf_id); 7317 return; 7318 } 7319 7320 count = pkt->cp_rxbuf_cnt; 7321 if (__predict_false(hlen < 7322 __offsetof(struct vmbus_chanpkt_rxbuf, cp_rxbuf[count]))) { 7323 if_printf(rxr->hn_ifp, "invalid rxbuf_cnt %d\n", count); 7324 return; 7325 } 7326 7327 /* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */ 7328 for (i = 0; i < count; ++i) { 7329 int ofs, len; 7330 7331 ofs = pkt->cp_rxbuf[i].rb_ofs; 7332 len = pkt->cp_rxbuf[i].rb_len; 7333 if (__predict_false(ofs + len > HN_RXBUF_SIZE)) { 7334 if_printf(rxr->hn_ifp, "%dth RNDIS msg overflow rxbuf, " 7335 "ofs %d, len %d\n", i, ofs, len); 7336 continue; 7337 } 7338 hn_rndis_rxpkt(rxr, rxr->hn_rxbuf + ofs, len); 7339 } 7340 7341 /* 7342 * Ack the consumed RXBUF associated w/ this channel packet, 7343 * so that this RXBUF can be recycled by the hypervisor. 7344 */ 7345 hn_nvs_ack_rxbuf(rxr, chan, pkt->cp_hdr.cph_xactid); 7346 } 7347 7348 static void 7349 hn_nvs_ack_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan, 7350 uint64_t tid) 7351 { 7352 struct hn_nvs_rndis_ack ack; 7353 int retries, error; 7354 7355 ack.nvs_type = HN_NVS_TYPE_RNDIS_ACK; 7356 ack.nvs_status = HN_NVS_STATUS_OK; 7357 7358 retries = 0; 7359 again: 7360 error = vmbus_chan_send(chan, VMBUS_CHANPKT_TYPE_COMP, 7361 VMBUS_CHANPKT_FLAG_NONE, &ack, sizeof(ack), tid); 7362 if (__predict_false(error == EAGAIN)) { 7363 /* 7364 * NOTE: 7365 * This should _not_ happen in real world, since the 7366 * consumption of the TX bufring from the TX path is 7367 * controlled. 7368 */ 7369 if (rxr->hn_ack_failed == 0) 7370 if_printf(rxr->hn_ifp, "RXBUF ack retry\n"); 7371 rxr->hn_ack_failed++; 7372 retries++; 7373 if (retries < 10) { 7374 DELAY(100); 7375 goto again; 7376 } 7377 /* RXBUF leaks! */ 7378 if_printf(rxr->hn_ifp, "RXBUF ack failed\n"); 7379 } 7380 } 7381 7382 static void 7383 hn_chan_callback(struct vmbus_channel *chan, void *xrxr) 7384 { 7385 struct hn_rx_ring *rxr = xrxr; 7386 struct hn_softc *sc = rxr->hn_ifp->if_softc; 7387 7388 for (;;) { 7389 struct vmbus_chanpkt_hdr *pkt = rxr->hn_pktbuf; 7390 int error, pktlen; 7391 7392 pktlen = rxr->hn_pktbuf_len; 7393 error = vmbus_chan_recv_pkt(chan, pkt, &pktlen); 7394 if (__predict_false(error == ENOBUFS)) { 7395 void *nbuf; 7396 int nlen; 7397 7398 /* 7399 * Expand channel packet buffer. 7400 * 7401 * XXX 7402 * Use M_WAITOK here, since allocation failure 7403 * is fatal. 7404 */ 7405 nlen = rxr->hn_pktbuf_len * 2; 7406 while (nlen < pktlen) 7407 nlen *= 2; 7408 nbuf = malloc(nlen, M_DEVBUF, M_WAITOK); 7409 7410 if_printf(rxr->hn_ifp, "expand pktbuf %d -> %d\n", 7411 rxr->hn_pktbuf_len, nlen); 7412 7413 free(rxr->hn_pktbuf, M_DEVBUF); 7414 rxr->hn_pktbuf = nbuf; 7415 rxr->hn_pktbuf_len = nlen; 7416 /* Retry! */ 7417 continue; 7418 } else if (__predict_false(error == EAGAIN)) { 7419 /* No more channel packets; done! */ 7420 break; 7421 } 7422 KASSERT(!error, ("vmbus_chan_recv_pkt failed: %d", error)); 7423 7424 switch (pkt->cph_type) { 7425 case VMBUS_CHANPKT_TYPE_COMP: 7426 hn_nvs_handle_comp(sc, chan, pkt); 7427 break; 7428 7429 case VMBUS_CHANPKT_TYPE_RXBUF: 7430 hn_nvs_handle_rxbuf(rxr, chan, pkt); 7431 break; 7432 7433 case VMBUS_CHANPKT_TYPE_INBAND: 7434 hn_nvs_handle_notify(sc, pkt); 7435 break; 7436 7437 default: 7438 if_printf(rxr->hn_ifp, "unknown chan pkt %u\n", 7439 pkt->cph_type); 7440 break; 7441 } 7442 } 7443 hn_chan_rollup(rxr, rxr->hn_txr); 7444 } 7445 7446 static void 7447 hn_sysinit(void *arg __unused) 7448 { 7449 int i; 7450 7451 hn_udpcs_fixup = counter_u64_alloc(M_WAITOK); 7452 7453 #ifdef HN_IFSTART_SUPPORT 7454 /* 7455 * Don't use ifnet.if_start if transparent VF mode is requested; 7456 * mainly due to the IFF_DRV_OACTIVE flag. 7457 */ 7458 if (hn_xpnt_vf && hn_use_if_start) { 7459 hn_use_if_start = 0; 7460 printf("hn: tranparent VF mode, if_transmit will be used, " 7461 "instead of if_start\n"); 7462 } 7463 #endif 7464 if (hn_xpnt_vf_attwait < HN_XPNT_VF_ATTWAIT_MIN) { 7465 printf("hn: invalid transparent VF attach routing " 7466 "wait timeout %d, reset to %d\n", 7467 hn_xpnt_vf_attwait, HN_XPNT_VF_ATTWAIT_MIN); 7468 hn_xpnt_vf_attwait = HN_XPNT_VF_ATTWAIT_MIN; 7469 } 7470 7471 /* 7472 * Initialize VF map. 7473 */ 7474 rm_init_flags(&hn_vfmap_lock, "hn_vfmap", RM_SLEEPABLE); 7475 hn_vfmap_size = HN_VFMAP_SIZE_DEF; 7476 hn_vfmap = malloc(sizeof(struct ifnet *) * hn_vfmap_size, M_DEVBUF, 7477 M_WAITOK | M_ZERO); 7478 7479 /* 7480 * Fix the # of TX taskqueues. 7481 */ 7482 if (hn_tx_taskq_cnt <= 0) 7483 hn_tx_taskq_cnt = 1; 7484 else if (hn_tx_taskq_cnt > mp_ncpus) 7485 hn_tx_taskq_cnt = mp_ncpus; 7486 7487 /* 7488 * Fix the TX taskqueue mode. 7489 */ 7490 switch (hn_tx_taskq_mode) { 7491 case HN_TX_TASKQ_M_INDEP: 7492 case HN_TX_TASKQ_M_GLOBAL: 7493 case HN_TX_TASKQ_M_EVTTQ: 7494 break; 7495 default: 7496 hn_tx_taskq_mode = HN_TX_TASKQ_M_INDEP; 7497 break; 7498 } 7499 7500 if (vm_guest != VM_GUEST_HV) 7501 return; 7502 7503 if (hn_tx_taskq_mode != HN_TX_TASKQ_M_GLOBAL) 7504 return; 7505 7506 hn_tx_taskque = malloc(hn_tx_taskq_cnt * sizeof(struct taskqueue *), 7507 M_DEVBUF, M_WAITOK); 7508 for (i = 0; i < hn_tx_taskq_cnt; ++i) { 7509 hn_tx_taskque[i] = taskqueue_create("hn_tx", M_WAITOK, 7510 taskqueue_thread_enqueue, &hn_tx_taskque[i]); 7511 taskqueue_start_threads(&hn_tx_taskque[i], 1, PI_NET, 7512 "hn tx%d", i); 7513 } 7514 } 7515 SYSINIT(hn_sysinit, SI_SUB_DRIVERS, SI_ORDER_SECOND, hn_sysinit, NULL); 7516 7517 static void 7518 hn_sysuninit(void *arg __unused) 7519 { 7520 7521 if (hn_tx_taskque != NULL) { 7522 int i; 7523 7524 for (i = 0; i < hn_tx_taskq_cnt; ++i) 7525 taskqueue_free(hn_tx_taskque[i]); 7526 free(hn_tx_taskque, M_DEVBUF); 7527 } 7528 7529 if (hn_vfmap != NULL) 7530 free(hn_vfmap, M_DEVBUF); 7531 rm_destroy(&hn_vfmap_lock); 7532 7533 counter_u64_free(hn_udpcs_fixup); 7534 } 7535 SYSUNINIT(hn_sysuninit, SI_SUB_DRIVERS, SI_ORDER_SECOND, hn_sysuninit, NULL); 7536