1 /*- 2 * Copyright (c) 2010-2012 Citrix Inc. 3 * Copyright (c) 2009-2012,2016-2017 Microsoft Corp. 4 * Copyright (c) 2012 NetApp Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /*- 30 * Copyright (c) 2004-2006 Kip Macy 31 * All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 45 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 52 * SUCH DAMAGE. 53 */ 54 55 #include <sys/cdefs.h> 56 __FBSDID("$FreeBSD$"); 57 58 #include "opt_hn.h" 59 #include "opt_inet6.h" 60 #include "opt_inet.h" 61 #include "opt_rss.h" 62 63 #include <sys/param.h> 64 #include <sys/bus.h> 65 #include <sys/kernel.h> 66 #include <sys/limits.h> 67 #include <sys/malloc.h> 68 #include <sys/mbuf.h> 69 #include <sys/module.h> 70 #include <sys/queue.h> 71 #include <sys/lock.h> 72 #include <sys/rmlock.h> 73 #include <sys/sbuf.h> 74 #include <sys/smp.h> 75 #include <sys/socket.h> 76 #include <sys/sockio.h> 77 #include <sys/sx.h> 78 #include <sys/sysctl.h> 79 #include <sys/systm.h> 80 #include <sys/taskqueue.h> 81 #include <sys/buf_ring.h> 82 #include <sys/eventhandler.h> 83 84 #include <machine/atomic.h> 85 #include <machine/in_cksum.h> 86 87 #include <net/bpf.h> 88 #include <net/ethernet.h> 89 #include <net/if.h> 90 #include <net/if_dl.h> 91 #include <net/if_media.h> 92 #include <net/if_types.h> 93 #include <net/if_var.h> 94 #include <net/rndis.h> 95 #ifdef RSS 96 #include <net/rss_config.h> 97 #endif 98 99 #include <netinet/in_systm.h> 100 #include <netinet/in.h> 101 #include <netinet/ip.h> 102 #include <netinet/ip6.h> 103 #include <netinet/tcp.h> 104 #include <netinet/tcp_lro.h> 105 #include <netinet/udp.h> 106 107 #include <dev/hyperv/include/hyperv.h> 108 #include <dev/hyperv/include/hyperv_busdma.h> 109 #include <dev/hyperv/include/vmbus.h> 110 #include <dev/hyperv/include/vmbus_xact.h> 111 112 #include <dev/hyperv/netvsc/ndis.h> 113 #include <dev/hyperv/netvsc/if_hnreg.h> 114 #include <dev/hyperv/netvsc/if_hnvar.h> 115 #include <dev/hyperv/netvsc/hn_nvs.h> 116 #include <dev/hyperv/netvsc/hn_rndis.h> 117 118 #include "vmbus_if.h" 119 120 #define HN_IFSTART_SUPPORT 121 122 #define HN_RING_CNT_DEF_MAX 8 123 124 #define HN_VFMAP_SIZE_DEF 8 125 126 #define HN_XPNT_VF_ATTWAIT_MIN 2 /* seconds */ 127 128 /* YYY should get it from the underlying channel */ 129 #define HN_TX_DESC_CNT 512 130 131 #define HN_RNDIS_PKT_LEN \ 132 (sizeof(struct rndis_packet_msg) + \ 133 HN_RNDIS_PKTINFO_SIZE(HN_NDIS_HASH_VALUE_SIZE) + \ 134 HN_RNDIS_PKTINFO_SIZE(NDIS_VLAN_INFO_SIZE) + \ 135 HN_RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) + \ 136 HN_RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE)) 137 #define HN_RNDIS_PKT_BOUNDARY PAGE_SIZE 138 #define HN_RNDIS_PKT_ALIGN CACHE_LINE_SIZE 139 140 #define HN_TX_DATA_BOUNDARY PAGE_SIZE 141 #define HN_TX_DATA_MAXSIZE IP_MAXPACKET 142 #define HN_TX_DATA_SEGSIZE PAGE_SIZE 143 /* -1 for RNDIS packet message */ 144 #define HN_TX_DATA_SEGCNT_MAX (HN_GPACNT_MAX - 1) 145 146 #define HN_DIRECT_TX_SIZE_DEF 128 147 148 #define HN_EARLY_TXEOF_THRESH 8 149 150 #define HN_PKTBUF_LEN_DEF (16 * 1024) 151 152 #define HN_LROENT_CNT_DEF 128 153 154 #define HN_LRO_LENLIM_MULTIRX_DEF (12 * ETHERMTU) 155 #define HN_LRO_LENLIM_DEF (25 * ETHERMTU) 156 /* YYY 2*MTU is a bit rough, but should be good enough. */ 157 #define HN_LRO_LENLIM_MIN(ifp) (2 * (ifp)->if_mtu) 158 159 #define HN_LRO_ACKCNT_DEF 1 160 161 #define HN_LOCK_INIT(sc) \ 162 sx_init(&(sc)->hn_lock, device_get_nameunit((sc)->hn_dev)) 163 #define HN_LOCK_DESTROY(sc) sx_destroy(&(sc)->hn_lock) 164 #define HN_LOCK_ASSERT(sc) sx_assert(&(sc)->hn_lock, SA_XLOCKED) 165 #define HN_LOCK(sc) \ 166 do { \ 167 while (sx_try_xlock(&(sc)->hn_lock) == 0) \ 168 DELAY(1000); \ 169 } while (0) 170 #define HN_UNLOCK(sc) sx_xunlock(&(sc)->hn_lock) 171 172 #define HN_CSUM_IP_MASK (CSUM_IP | CSUM_IP_TCP | CSUM_IP_UDP) 173 #define HN_CSUM_IP6_MASK (CSUM_IP6_TCP | CSUM_IP6_UDP) 174 #define HN_CSUM_IP_HWASSIST(sc) \ 175 ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP_MASK) 176 #define HN_CSUM_IP6_HWASSIST(sc) \ 177 ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP6_MASK) 178 179 #define HN_PKTSIZE_MIN(align) \ 180 roundup2(ETHER_MIN_LEN + ETHER_VLAN_ENCAP_LEN - ETHER_CRC_LEN + \ 181 HN_RNDIS_PKT_LEN, (align)) 182 #define HN_PKTSIZE(m, align) \ 183 roundup2((m)->m_pkthdr.len + HN_RNDIS_PKT_LEN, (align)) 184 185 #ifdef RSS 186 #define HN_RING_IDX2CPU(sc, idx) rss_getcpu((idx) % rss_getnumbuckets()) 187 #else 188 #define HN_RING_IDX2CPU(sc, idx) (((sc)->hn_cpu + (idx)) % mp_ncpus) 189 #endif 190 191 struct hn_txdesc { 192 #ifndef HN_USE_TXDESC_BUFRING 193 SLIST_ENTRY(hn_txdesc) link; 194 #endif 195 STAILQ_ENTRY(hn_txdesc) agg_link; 196 197 /* Aggregated txdescs, in sending order. */ 198 STAILQ_HEAD(, hn_txdesc) agg_list; 199 200 /* The oldest packet, if transmission aggregation happens. */ 201 struct mbuf *m; 202 struct hn_tx_ring *txr; 203 int refs; 204 uint32_t flags; /* HN_TXD_FLAG_ */ 205 struct hn_nvs_sendctx send_ctx; 206 uint32_t chim_index; 207 int chim_size; 208 209 bus_dmamap_t data_dmap; 210 211 bus_addr_t rndis_pkt_paddr; 212 struct rndis_packet_msg *rndis_pkt; 213 bus_dmamap_t rndis_pkt_dmap; 214 }; 215 216 #define HN_TXD_FLAG_ONLIST 0x0001 217 #define HN_TXD_FLAG_DMAMAP 0x0002 218 #define HN_TXD_FLAG_ONAGG 0x0004 219 220 struct hn_rxinfo { 221 uint32_t vlan_info; 222 uint32_t csum_info; 223 uint32_t hash_info; 224 uint32_t hash_value; 225 }; 226 227 struct hn_rxvf_setarg { 228 struct hn_rx_ring *rxr; 229 struct ifnet *vf_ifp; 230 }; 231 232 #define HN_RXINFO_VLAN 0x0001 233 #define HN_RXINFO_CSUM 0x0002 234 #define HN_RXINFO_HASHINF 0x0004 235 #define HN_RXINFO_HASHVAL 0x0008 236 #define HN_RXINFO_ALL \ 237 (HN_RXINFO_VLAN | \ 238 HN_RXINFO_CSUM | \ 239 HN_RXINFO_HASHINF | \ 240 HN_RXINFO_HASHVAL) 241 242 #define HN_NDIS_VLAN_INFO_INVALID 0xffffffff 243 #define HN_NDIS_RXCSUM_INFO_INVALID 0 244 #define HN_NDIS_HASH_INFO_INVALID 0 245 246 static int hn_probe(device_t); 247 static int hn_attach(device_t); 248 static int hn_detach(device_t); 249 static int hn_shutdown(device_t); 250 static void hn_chan_callback(struct vmbus_channel *, 251 void *); 252 253 static void hn_init(void *); 254 static int hn_ioctl(struct ifnet *, u_long, caddr_t); 255 #ifdef HN_IFSTART_SUPPORT 256 static void hn_start(struct ifnet *); 257 #endif 258 static int hn_transmit(struct ifnet *, struct mbuf *); 259 static void hn_xmit_qflush(struct ifnet *); 260 static int hn_ifmedia_upd(struct ifnet *); 261 static void hn_ifmedia_sts(struct ifnet *, 262 struct ifmediareq *); 263 264 static void hn_ifnet_event(void *, struct ifnet *, int); 265 static void hn_ifaddr_event(void *, struct ifnet *); 266 static void hn_ifnet_attevent(void *, struct ifnet *); 267 static void hn_ifnet_detevent(void *, struct ifnet *); 268 static void hn_ifnet_lnkevent(void *, struct ifnet *, int); 269 270 static bool hn_ismyvf(const struct hn_softc *, 271 const struct ifnet *); 272 static void hn_rxvf_change(struct hn_softc *, 273 struct ifnet *, bool); 274 static void hn_rxvf_set(struct hn_softc *, struct ifnet *); 275 static void hn_rxvf_set_task(void *, int); 276 static void hn_xpnt_vf_input(struct ifnet *, struct mbuf *); 277 static int hn_xpnt_vf_iocsetflags(struct hn_softc *); 278 static int hn_xpnt_vf_iocsetcaps(struct hn_softc *, 279 struct ifreq *); 280 static void hn_xpnt_vf_saveifflags(struct hn_softc *); 281 static bool hn_xpnt_vf_isready(struct hn_softc *); 282 static void hn_xpnt_vf_setready(struct hn_softc *); 283 static void hn_xpnt_vf_init_taskfunc(void *, int); 284 static void hn_xpnt_vf_init(struct hn_softc *); 285 static void hn_xpnt_vf_setenable(struct hn_softc *); 286 static void hn_xpnt_vf_setdisable(struct hn_softc *, bool); 287 288 static int hn_rndis_rxinfo(const void *, int, 289 struct hn_rxinfo *); 290 static void hn_rndis_rx_data(struct hn_rx_ring *, 291 const void *, int); 292 static void hn_rndis_rx_status(struct hn_softc *, 293 const void *, int); 294 static void hn_rndis_init_fixat(struct hn_softc *, int); 295 296 static void hn_nvs_handle_notify(struct hn_softc *, 297 const struct vmbus_chanpkt_hdr *); 298 static void hn_nvs_handle_comp(struct hn_softc *, 299 struct vmbus_channel *, 300 const struct vmbus_chanpkt_hdr *); 301 static void hn_nvs_handle_rxbuf(struct hn_rx_ring *, 302 struct vmbus_channel *, 303 const struct vmbus_chanpkt_hdr *); 304 static void hn_nvs_ack_rxbuf(struct hn_rx_ring *, 305 struct vmbus_channel *, uint64_t); 306 307 #if __FreeBSD_version >= 1100099 308 static int hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS); 309 static int hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS); 310 #endif 311 static int hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS); 312 static int hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS); 313 #if __FreeBSD_version < 1100095 314 static int hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS); 315 #else 316 static int hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS); 317 #endif 318 static int hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS); 319 static int hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS); 320 static int hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS); 321 static int hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS); 322 static int hn_caps_sysctl(SYSCTL_HANDLER_ARGS); 323 static int hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS); 324 static int hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS); 325 #ifndef RSS 326 static int hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS); 327 static int hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS); 328 #endif 329 static int hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS); 330 static int hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS); 331 static int hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS); 332 static int hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS); 333 static int hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS); 334 static int hn_polling_sysctl(SYSCTL_HANDLER_ARGS); 335 static int hn_vf_sysctl(SYSCTL_HANDLER_ARGS); 336 static int hn_rxvf_sysctl(SYSCTL_HANDLER_ARGS); 337 static int hn_vflist_sysctl(SYSCTL_HANDLER_ARGS); 338 static int hn_vfmap_sysctl(SYSCTL_HANDLER_ARGS); 339 static int hn_xpnt_vf_accbpf_sysctl(SYSCTL_HANDLER_ARGS); 340 static int hn_xpnt_vf_enabled_sysctl(SYSCTL_HANDLER_ARGS); 341 342 static void hn_stop(struct hn_softc *, bool); 343 static void hn_init_locked(struct hn_softc *); 344 static int hn_chan_attach(struct hn_softc *, 345 struct vmbus_channel *); 346 static void hn_chan_detach(struct hn_softc *, 347 struct vmbus_channel *); 348 static int hn_attach_subchans(struct hn_softc *); 349 static void hn_detach_allchans(struct hn_softc *); 350 static void hn_chan_rollup(struct hn_rx_ring *, 351 struct hn_tx_ring *); 352 static void hn_set_ring_inuse(struct hn_softc *, int); 353 static int hn_synth_attach(struct hn_softc *, int); 354 static void hn_synth_detach(struct hn_softc *); 355 static int hn_synth_alloc_subchans(struct hn_softc *, 356 int *); 357 static bool hn_synth_attachable(const struct hn_softc *); 358 static void hn_suspend(struct hn_softc *); 359 static void hn_suspend_data(struct hn_softc *); 360 static void hn_suspend_mgmt(struct hn_softc *); 361 static void hn_resume(struct hn_softc *); 362 static void hn_resume_data(struct hn_softc *); 363 static void hn_resume_mgmt(struct hn_softc *); 364 static void hn_suspend_mgmt_taskfunc(void *, int); 365 static void hn_chan_drain(struct hn_softc *, 366 struct vmbus_channel *); 367 static void hn_disable_rx(struct hn_softc *); 368 static void hn_drain_rxtx(struct hn_softc *, int); 369 static void hn_polling(struct hn_softc *, u_int); 370 static void hn_chan_polling(struct vmbus_channel *, u_int); 371 static void hn_mtu_change_fixup(struct hn_softc *); 372 373 static void hn_update_link_status(struct hn_softc *); 374 static void hn_change_network(struct hn_softc *); 375 static void hn_link_taskfunc(void *, int); 376 static void hn_netchg_init_taskfunc(void *, int); 377 static void hn_netchg_status_taskfunc(void *, int); 378 static void hn_link_status(struct hn_softc *); 379 380 static int hn_create_rx_data(struct hn_softc *, int); 381 static void hn_destroy_rx_data(struct hn_softc *); 382 static int hn_check_iplen(const struct mbuf *, int); 383 static int hn_set_rxfilter(struct hn_softc *, uint32_t); 384 static int hn_rxfilter_config(struct hn_softc *); 385 #ifndef RSS 386 static int hn_rss_reconfig(struct hn_softc *); 387 #endif 388 static void hn_rss_ind_fixup(struct hn_softc *); 389 static int hn_rxpkt(struct hn_rx_ring *, const void *, 390 int, const struct hn_rxinfo *); 391 392 static int hn_tx_ring_create(struct hn_softc *, int); 393 static void hn_tx_ring_destroy(struct hn_tx_ring *); 394 static int hn_create_tx_data(struct hn_softc *, int); 395 static void hn_fixup_tx_data(struct hn_softc *); 396 static void hn_destroy_tx_data(struct hn_softc *); 397 static void hn_txdesc_dmamap_destroy(struct hn_txdesc *); 398 static void hn_txdesc_gc(struct hn_tx_ring *, 399 struct hn_txdesc *); 400 static int hn_encap(struct ifnet *, struct hn_tx_ring *, 401 struct hn_txdesc *, struct mbuf **); 402 static int hn_txpkt(struct ifnet *, struct hn_tx_ring *, 403 struct hn_txdesc *); 404 static void hn_set_chim_size(struct hn_softc *, int); 405 static void hn_set_tso_maxsize(struct hn_softc *, int, int); 406 static bool hn_tx_ring_pending(struct hn_tx_ring *); 407 static void hn_tx_ring_qflush(struct hn_tx_ring *); 408 static void hn_resume_tx(struct hn_softc *, int); 409 static void hn_set_txagg(struct hn_softc *); 410 static void *hn_try_txagg(struct ifnet *, 411 struct hn_tx_ring *, struct hn_txdesc *, 412 int); 413 static int hn_get_txswq_depth(const struct hn_tx_ring *); 414 static void hn_txpkt_done(struct hn_nvs_sendctx *, 415 struct hn_softc *, struct vmbus_channel *, 416 const void *, int); 417 static int hn_txpkt_sglist(struct hn_tx_ring *, 418 struct hn_txdesc *); 419 static int hn_txpkt_chim(struct hn_tx_ring *, 420 struct hn_txdesc *); 421 static int hn_xmit(struct hn_tx_ring *, int); 422 static void hn_xmit_taskfunc(void *, int); 423 static void hn_xmit_txeof(struct hn_tx_ring *); 424 static void hn_xmit_txeof_taskfunc(void *, int); 425 #ifdef HN_IFSTART_SUPPORT 426 static int hn_start_locked(struct hn_tx_ring *, int); 427 static void hn_start_taskfunc(void *, int); 428 static void hn_start_txeof(struct hn_tx_ring *); 429 static void hn_start_txeof_taskfunc(void *, int); 430 #endif 431 432 SYSCTL_NODE(_hw, OID_AUTO, hn, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 433 "Hyper-V network interface"); 434 435 /* Trust tcp segements verification on host side. */ 436 static int hn_trust_hosttcp = 1; 437 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hosttcp, CTLFLAG_RDTUN, 438 &hn_trust_hosttcp, 0, 439 "Trust tcp segement verification on host side, " 440 "when csum info is missing (global setting)"); 441 442 /* Trust udp datagrams verification on host side. */ 443 static int hn_trust_hostudp = 1; 444 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostudp, CTLFLAG_RDTUN, 445 &hn_trust_hostudp, 0, 446 "Trust udp datagram verification on host side, " 447 "when csum info is missing (global setting)"); 448 449 /* Trust ip packets verification on host side. */ 450 static int hn_trust_hostip = 1; 451 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostip, CTLFLAG_RDTUN, 452 &hn_trust_hostip, 0, 453 "Trust ip packet verification on host side, " 454 "when csum info is missing (global setting)"); 455 456 /* Limit TSO burst size */ 457 static int hn_tso_maxlen = IP_MAXPACKET; 458 SYSCTL_INT(_hw_hn, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN, 459 &hn_tso_maxlen, 0, "TSO burst limit"); 460 461 /* Limit chimney send size */ 462 static int hn_tx_chimney_size = 0; 463 SYSCTL_INT(_hw_hn, OID_AUTO, tx_chimney_size, CTLFLAG_RDTUN, 464 &hn_tx_chimney_size, 0, "Chimney send packet size limit"); 465 466 /* Limit the size of packet for direct transmission */ 467 static int hn_direct_tx_size = HN_DIRECT_TX_SIZE_DEF; 468 SYSCTL_INT(_hw_hn, OID_AUTO, direct_tx_size, CTLFLAG_RDTUN, 469 &hn_direct_tx_size, 0, "Size of the packet for direct transmission"); 470 471 /* # of LRO entries per RX ring */ 472 #if defined(INET) || defined(INET6) 473 #if __FreeBSD_version >= 1100095 474 static int hn_lro_entry_count = HN_LROENT_CNT_DEF; 475 SYSCTL_INT(_hw_hn, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN, 476 &hn_lro_entry_count, 0, "LRO entry count"); 477 #endif 478 #endif 479 480 static int hn_tx_taskq_cnt = 1; 481 SYSCTL_INT(_hw_hn, OID_AUTO, tx_taskq_cnt, CTLFLAG_RDTUN, 482 &hn_tx_taskq_cnt, 0, "# of TX taskqueues"); 483 484 #define HN_TX_TASKQ_M_INDEP 0 485 #define HN_TX_TASKQ_M_GLOBAL 1 486 #define HN_TX_TASKQ_M_EVTTQ 2 487 488 static int hn_tx_taskq_mode = HN_TX_TASKQ_M_INDEP; 489 SYSCTL_INT(_hw_hn, OID_AUTO, tx_taskq_mode, CTLFLAG_RDTUN, 490 &hn_tx_taskq_mode, 0, "TX taskqueue modes: " 491 "0 - independent, 1 - share global tx taskqs, 2 - share event taskqs"); 492 493 #ifndef HN_USE_TXDESC_BUFRING 494 static int hn_use_txdesc_bufring = 0; 495 #else 496 static int hn_use_txdesc_bufring = 1; 497 #endif 498 SYSCTL_INT(_hw_hn, OID_AUTO, use_txdesc_bufring, CTLFLAG_RD, 499 &hn_use_txdesc_bufring, 0, "Use buf_ring for TX descriptors"); 500 501 #ifdef HN_IFSTART_SUPPORT 502 /* Use ifnet.if_start instead of ifnet.if_transmit */ 503 static int hn_use_if_start = 0; 504 SYSCTL_INT(_hw_hn, OID_AUTO, use_if_start, CTLFLAG_RDTUN, 505 &hn_use_if_start, 0, "Use if_start TX method"); 506 #endif 507 508 /* # of channels to use */ 509 static int hn_chan_cnt = 0; 510 SYSCTL_INT(_hw_hn, OID_AUTO, chan_cnt, CTLFLAG_RDTUN, 511 &hn_chan_cnt, 0, 512 "# of channels to use; each channel has one RX ring and one TX ring"); 513 514 /* # of transmit rings to use */ 515 static int hn_tx_ring_cnt = 0; 516 SYSCTL_INT(_hw_hn, OID_AUTO, tx_ring_cnt, CTLFLAG_RDTUN, 517 &hn_tx_ring_cnt, 0, "# of TX rings to use"); 518 519 /* Software TX ring deptch */ 520 static int hn_tx_swq_depth = 0; 521 SYSCTL_INT(_hw_hn, OID_AUTO, tx_swq_depth, CTLFLAG_RDTUN, 522 &hn_tx_swq_depth, 0, "Depth of IFQ or BUFRING"); 523 524 /* Enable sorted LRO, and the depth of the per-channel mbuf queue */ 525 #if __FreeBSD_version >= 1100095 526 static u_int hn_lro_mbufq_depth = 0; 527 SYSCTL_UINT(_hw_hn, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN, 528 &hn_lro_mbufq_depth, 0, "Depth of LRO mbuf queue"); 529 #endif 530 531 /* Packet transmission aggregation size limit */ 532 static int hn_tx_agg_size = -1; 533 SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_size, CTLFLAG_RDTUN, 534 &hn_tx_agg_size, 0, "Packet transmission aggregation size limit"); 535 536 /* Packet transmission aggregation count limit */ 537 static int hn_tx_agg_pkts = -1; 538 SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_pkts, CTLFLAG_RDTUN, 539 &hn_tx_agg_pkts, 0, "Packet transmission aggregation packet limit"); 540 541 /* VF list */ 542 SYSCTL_PROC(_hw_hn, OID_AUTO, vflist, CTLFLAG_RD | CTLTYPE_STRING, 543 0, 0, hn_vflist_sysctl, "A", "VF list"); 544 545 /* VF mapping */ 546 SYSCTL_PROC(_hw_hn, OID_AUTO, vfmap, CTLFLAG_RD | CTLTYPE_STRING, 547 0, 0, hn_vfmap_sysctl, "A", "VF mapping"); 548 549 /* Transparent VF */ 550 static int hn_xpnt_vf = 0; 551 SYSCTL_INT(_hw_hn, OID_AUTO, vf_transparent, CTLFLAG_RDTUN, 552 &hn_xpnt_vf, 0, "Transparent VF mod"); 553 554 /* Accurate BPF support for Transparent VF */ 555 static int hn_xpnt_vf_accbpf = 0; 556 SYSCTL_INT(_hw_hn, OID_AUTO, vf_xpnt_accbpf, CTLFLAG_RDTUN, 557 &hn_xpnt_vf_accbpf, 0, "Accurate BPF for transparent VF"); 558 559 /* Extra wait for transparent VF attach routing; unit seconds. */ 560 static int hn_xpnt_vf_attwait = HN_XPNT_VF_ATTWAIT_MIN; 561 SYSCTL_INT(_hw_hn, OID_AUTO, vf_xpnt_attwait, CTLFLAG_RWTUN, 562 &hn_xpnt_vf_attwait, 0, 563 "Extra wait for transparent VF attach routing; unit: seconds"); 564 565 static u_int hn_cpu_index; /* next CPU for channel */ 566 static struct taskqueue **hn_tx_taskque;/* shared TX taskqueues */ 567 568 static struct rmlock hn_vfmap_lock; 569 static int hn_vfmap_size; 570 static struct ifnet **hn_vfmap; 571 572 #ifndef RSS 573 static const uint8_t 574 hn_rss_key_default[NDIS_HASH_KEYSIZE_TOEPLITZ] = { 575 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 576 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 577 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, 578 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, 579 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa 580 }; 581 #endif /* !RSS */ 582 583 static const struct hyperv_guid hn_guid = { 584 .hv_guid = { 585 0x63, 0x51, 0x61, 0xf8, 0x3e, 0xdf, 0xc5, 0x46, 586 0x91, 0x3f, 0xf2, 0xd2, 0xf9, 0x65, 0xed, 0x0e } 587 }; 588 589 static device_method_t hn_methods[] = { 590 /* Device interface */ 591 DEVMETHOD(device_probe, hn_probe), 592 DEVMETHOD(device_attach, hn_attach), 593 DEVMETHOD(device_detach, hn_detach), 594 DEVMETHOD(device_shutdown, hn_shutdown), 595 DEVMETHOD_END 596 }; 597 598 static driver_t hn_driver = { 599 "hn", 600 hn_methods, 601 sizeof(struct hn_softc) 602 }; 603 604 static devclass_t hn_devclass; 605 606 DRIVER_MODULE(hn, vmbus, hn_driver, hn_devclass, 0, 0); 607 MODULE_VERSION(hn, 1); 608 MODULE_DEPEND(hn, vmbus, 1, 1, 1); 609 610 #if __FreeBSD_version >= 1100099 611 static void 612 hn_set_lro_lenlim(struct hn_softc *sc, int lenlim) 613 { 614 int i; 615 616 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 617 sc->hn_rx_ring[i].hn_lro.lro_length_lim = lenlim; 618 } 619 #endif 620 621 static int 622 hn_txpkt_sglist(struct hn_tx_ring *txr, struct hn_txdesc *txd) 623 { 624 625 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID && 626 txd->chim_size == 0, ("invalid rndis sglist txd")); 627 return (hn_nvs_send_rndis_sglist(txr->hn_chan, HN_NVS_RNDIS_MTYPE_DATA, 628 &txd->send_ctx, txr->hn_gpa, txr->hn_gpa_cnt)); 629 } 630 631 static int 632 hn_txpkt_chim(struct hn_tx_ring *txr, struct hn_txdesc *txd) 633 { 634 struct hn_nvs_rndis rndis; 635 636 KASSERT(txd->chim_index != HN_NVS_CHIM_IDX_INVALID && 637 txd->chim_size > 0, ("invalid rndis chim txd")); 638 639 rndis.nvs_type = HN_NVS_TYPE_RNDIS; 640 rndis.nvs_rndis_mtype = HN_NVS_RNDIS_MTYPE_DATA; 641 rndis.nvs_chim_idx = txd->chim_index; 642 rndis.nvs_chim_sz = txd->chim_size; 643 644 return (hn_nvs_send(txr->hn_chan, VMBUS_CHANPKT_FLAG_RC, 645 &rndis, sizeof(rndis), &txd->send_ctx)); 646 } 647 648 static __inline uint32_t 649 hn_chim_alloc(struct hn_softc *sc) 650 { 651 int i, bmap_cnt = sc->hn_chim_bmap_cnt; 652 u_long *bmap = sc->hn_chim_bmap; 653 uint32_t ret = HN_NVS_CHIM_IDX_INVALID; 654 655 for (i = 0; i < bmap_cnt; ++i) { 656 int idx; 657 658 idx = ffsl(~bmap[i]); 659 if (idx == 0) 660 continue; 661 662 --idx; /* ffsl is 1-based */ 663 KASSERT(i * LONG_BIT + idx < sc->hn_chim_cnt, 664 ("invalid i %d and idx %d", i, idx)); 665 666 if (atomic_testandset_long(&bmap[i], idx)) 667 continue; 668 669 ret = i * LONG_BIT + idx; 670 break; 671 } 672 return (ret); 673 } 674 675 static __inline void 676 hn_chim_free(struct hn_softc *sc, uint32_t chim_idx) 677 { 678 u_long mask; 679 uint32_t idx; 680 681 idx = chim_idx / LONG_BIT; 682 KASSERT(idx < sc->hn_chim_bmap_cnt, 683 ("invalid chimney index 0x%x", chim_idx)); 684 685 mask = 1UL << (chim_idx % LONG_BIT); 686 KASSERT(sc->hn_chim_bmap[idx] & mask, 687 ("index bitmap 0x%lx, chimney index %u, " 688 "bitmap idx %d, bitmask 0x%lx", 689 sc->hn_chim_bmap[idx], chim_idx, idx, mask)); 690 691 atomic_clear_long(&sc->hn_chim_bmap[idx], mask); 692 } 693 694 #if defined(INET6) || defined(INET) 695 696 #define PULLUP_HDR(m, len) \ 697 do { \ 698 if (__predict_false((m)->m_len < (len))) { \ 699 (m) = m_pullup((m), (len)); \ 700 if ((m) == NULL) \ 701 return (NULL); \ 702 } \ 703 } while (0) 704 705 /* 706 * NOTE: If this function failed, the m_head would be freed. 707 */ 708 static __inline struct mbuf * 709 hn_tso_fixup(struct mbuf *m_head) 710 { 711 struct ether_vlan_header *evl; 712 struct tcphdr *th; 713 int ehlen; 714 715 KASSERT(M_WRITABLE(m_head), ("TSO mbuf not writable")); 716 717 PULLUP_HDR(m_head, sizeof(*evl)); 718 evl = mtod(m_head, struct ether_vlan_header *); 719 if (evl->evl_encap_proto == ntohs(ETHERTYPE_VLAN)) 720 ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 721 else 722 ehlen = ETHER_HDR_LEN; 723 724 #ifdef INET 725 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) { 726 struct ip *ip; 727 int iphlen; 728 729 PULLUP_HDR(m_head, ehlen + sizeof(*ip)); 730 ip = mtodo(m_head, ehlen); 731 iphlen = ip->ip_hl << 2; 732 733 PULLUP_HDR(m_head, ehlen + iphlen + sizeof(*th)); 734 th = mtodo(m_head, ehlen + iphlen); 735 736 ip->ip_len = 0; 737 ip->ip_sum = 0; 738 th->th_sum = in_pseudo(ip->ip_src.s_addr, 739 ip->ip_dst.s_addr, htons(IPPROTO_TCP)); 740 } 741 #endif 742 #if defined(INET6) && defined(INET) 743 else 744 #endif 745 #ifdef INET6 746 { 747 struct ip6_hdr *ip6; 748 749 PULLUP_HDR(m_head, ehlen + sizeof(*ip6)); 750 ip6 = mtodo(m_head, ehlen); 751 if (ip6->ip6_nxt != IPPROTO_TCP) { 752 m_freem(m_head); 753 return (NULL); 754 } 755 756 PULLUP_HDR(m_head, ehlen + sizeof(*ip6) + sizeof(*th)); 757 th = mtodo(m_head, ehlen + sizeof(*ip6)); 758 759 ip6->ip6_plen = 0; 760 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); 761 } 762 #endif 763 return (m_head); 764 765 } 766 767 /* 768 * NOTE: If this function failed, the m_head would be freed. 769 */ 770 static __inline struct mbuf * 771 hn_check_tcpsyn(struct mbuf *m_head, int *tcpsyn) 772 { 773 const struct ether_vlan_header *evl; 774 const struct tcphdr *th; 775 int ehlen; 776 777 *tcpsyn = 0; 778 779 PULLUP_HDR(m_head, sizeof(*evl)); 780 evl = mtod(m_head, const struct ether_vlan_header *); 781 if (evl->evl_encap_proto == ntohs(ETHERTYPE_VLAN)) 782 ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 783 else 784 ehlen = ETHER_HDR_LEN; 785 786 #ifdef INET 787 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TCP) { 788 const struct ip *ip; 789 int iphlen; 790 791 PULLUP_HDR(m_head, ehlen + sizeof(*ip)); 792 ip = mtodo(m_head, ehlen); 793 iphlen = ip->ip_hl << 2; 794 795 PULLUP_HDR(m_head, ehlen + iphlen + sizeof(*th)); 796 th = mtodo(m_head, ehlen + iphlen); 797 if (th->th_flags & TH_SYN) 798 *tcpsyn = 1; 799 } 800 #endif 801 #if defined(INET6) && defined(INET) 802 else 803 #endif 804 #ifdef INET6 805 { 806 const struct ip6_hdr *ip6; 807 808 PULLUP_HDR(m_head, ehlen + sizeof(*ip6)); 809 ip6 = mtodo(m_head, ehlen); 810 if (ip6->ip6_nxt != IPPROTO_TCP) 811 return (m_head); 812 813 PULLUP_HDR(m_head, ehlen + sizeof(*ip6) + sizeof(*th)); 814 th = mtodo(m_head, ehlen + sizeof(*ip6)); 815 if (th->th_flags & TH_SYN) 816 *tcpsyn = 1; 817 } 818 #endif 819 return (m_head); 820 } 821 822 #undef PULLUP_HDR 823 824 #endif /* INET6 || INET */ 825 826 static int 827 hn_set_rxfilter(struct hn_softc *sc, uint32_t filter) 828 { 829 int error = 0; 830 831 HN_LOCK_ASSERT(sc); 832 833 if (sc->hn_rx_filter != filter) { 834 error = hn_rndis_set_rxfilter(sc, filter); 835 if (!error) 836 sc->hn_rx_filter = filter; 837 } 838 return (error); 839 } 840 841 static int 842 hn_rxfilter_config(struct hn_softc *sc) 843 { 844 struct ifnet *ifp = sc->hn_ifp; 845 uint32_t filter; 846 847 HN_LOCK_ASSERT(sc); 848 849 /* 850 * If the non-transparent mode VF is activated, we don't know how 851 * its RX filter is configured, so stick the synthetic device in 852 * the promiscous mode. 853 */ 854 if ((ifp->if_flags & IFF_PROMISC) || (sc->hn_flags & HN_FLAG_RXVF)) { 855 filter = NDIS_PACKET_TYPE_PROMISCUOUS; 856 } else { 857 filter = NDIS_PACKET_TYPE_DIRECTED; 858 if (ifp->if_flags & IFF_BROADCAST) 859 filter |= NDIS_PACKET_TYPE_BROADCAST; 860 /* TODO: support multicast list */ 861 if ((ifp->if_flags & IFF_ALLMULTI) || 862 !TAILQ_EMPTY(&ifp->if_multiaddrs)) 863 filter |= NDIS_PACKET_TYPE_ALL_MULTICAST; 864 } 865 return (hn_set_rxfilter(sc, filter)); 866 } 867 868 static void 869 hn_set_txagg(struct hn_softc *sc) 870 { 871 uint32_t size, pkts; 872 int i; 873 874 /* 875 * Setup aggregation size. 876 */ 877 if (sc->hn_agg_size < 0) 878 size = UINT32_MAX; 879 else 880 size = sc->hn_agg_size; 881 882 if (sc->hn_rndis_agg_size < size) 883 size = sc->hn_rndis_agg_size; 884 885 /* NOTE: We only aggregate packets using chimney sending buffers. */ 886 if (size > (uint32_t)sc->hn_chim_szmax) 887 size = sc->hn_chim_szmax; 888 889 if (size <= 2 * HN_PKTSIZE_MIN(sc->hn_rndis_agg_align)) { 890 /* Disable */ 891 size = 0; 892 pkts = 0; 893 goto done; 894 } 895 896 /* NOTE: Type of the per TX ring setting is 'int'. */ 897 if (size > INT_MAX) 898 size = INT_MAX; 899 900 /* 901 * Setup aggregation packet count. 902 */ 903 if (sc->hn_agg_pkts < 0) 904 pkts = UINT32_MAX; 905 else 906 pkts = sc->hn_agg_pkts; 907 908 if (sc->hn_rndis_agg_pkts < pkts) 909 pkts = sc->hn_rndis_agg_pkts; 910 911 if (pkts <= 1) { 912 /* Disable */ 913 size = 0; 914 pkts = 0; 915 goto done; 916 } 917 918 /* NOTE: Type of the per TX ring setting is 'short'. */ 919 if (pkts > SHRT_MAX) 920 pkts = SHRT_MAX; 921 922 done: 923 /* NOTE: Type of the per TX ring setting is 'short'. */ 924 if (sc->hn_rndis_agg_align > SHRT_MAX) { 925 /* Disable */ 926 size = 0; 927 pkts = 0; 928 } 929 930 if (bootverbose) { 931 if_printf(sc->hn_ifp, "TX agg size %u, pkts %u, align %u\n", 932 size, pkts, sc->hn_rndis_agg_align); 933 } 934 935 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 936 struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; 937 938 mtx_lock(&txr->hn_tx_lock); 939 txr->hn_agg_szmax = size; 940 txr->hn_agg_pktmax = pkts; 941 txr->hn_agg_align = sc->hn_rndis_agg_align; 942 mtx_unlock(&txr->hn_tx_lock); 943 } 944 } 945 946 static int 947 hn_get_txswq_depth(const struct hn_tx_ring *txr) 948 { 949 950 KASSERT(txr->hn_txdesc_cnt > 0, ("tx ring is not setup yet")); 951 if (hn_tx_swq_depth < txr->hn_txdesc_cnt) 952 return txr->hn_txdesc_cnt; 953 return hn_tx_swq_depth; 954 } 955 956 #ifndef RSS 957 static int 958 hn_rss_reconfig(struct hn_softc *sc) 959 { 960 int error; 961 962 HN_LOCK_ASSERT(sc); 963 964 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) 965 return (ENXIO); 966 967 /* 968 * Disable RSS first. 969 * 970 * NOTE: 971 * Direct reconfiguration by setting the UNCHG flags does 972 * _not_ work properly. 973 */ 974 if (bootverbose) 975 if_printf(sc->hn_ifp, "disable RSS\n"); 976 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_DISABLE); 977 if (error) { 978 if_printf(sc->hn_ifp, "RSS disable failed\n"); 979 return (error); 980 } 981 982 /* 983 * Reenable the RSS w/ the updated RSS key or indirect 984 * table. 985 */ 986 if (bootverbose) 987 if_printf(sc->hn_ifp, "reconfig RSS\n"); 988 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE); 989 if (error) { 990 if_printf(sc->hn_ifp, "RSS reconfig failed\n"); 991 return (error); 992 } 993 return (0); 994 } 995 #endif /* !RSS */ 996 997 static void 998 hn_rss_ind_fixup(struct hn_softc *sc) 999 { 1000 struct ndis_rssprm_toeplitz *rss = &sc->hn_rss; 1001 int i, nchan; 1002 1003 nchan = sc->hn_rx_ring_inuse; 1004 KASSERT(nchan > 1, ("invalid # of channels %d", nchan)); 1005 1006 /* 1007 * Check indirect table to make sure that all channels in it 1008 * can be used. 1009 */ 1010 for (i = 0; i < NDIS_HASH_INDCNT; ++i) { 1011 if (rss->rss_ind[i] >= nchan) { 1012 if_printf(sc->hn_ifp, 1013 "RSS indirect table %d fixup: %u -> %d\n", 1014 i, rss->rss_ind[i], nchan - 1); 1015 rss->rss_ind[i] = nchan - 1; 1016 } 1017 } 1018 } 1019 1020 static int 1021 hn_ifmedia_upd(struct ifnet *ifp __unused) 1022 { 1023 1024 return EOPNOTSUPP; 1025 } 1026 1027 static void 1028 hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) 1029 { 1030 struct hn_softc *sc = ifp->if_softc; 1031 1032 ifmr->ifm_status = IFM_AVALID; 1033 ifmr->ifm_active = IFM_ETHER; 1034 1035 if ((sc->hn_link_flags & HN_LINK_FLAG_LINKUP) == 0) { 1036 ifmr->ifm_active |= IFM_NONE; 1037 return; 1038 } 1039 ifmr->ifm_status |= IFM_ACTIVE; 1040 ifmr->ifm_active |= IFM_10G_T | IFM_FDX; 1041 } 1042 1043 static void 1044 hn_rxvf_set_task(void *xarg, int pending __unused) 1045 { 1046 struct hn_rxvf_setarg *arg = xarg; 1047 1048 arg->rxr->hn_rxvf_ifp = arg->vf_ifp; 1049 } 1050 1051 static void 1052 hn_rxvf_set(struct hn_softc *sc, struct ifnet *vf_ifp) 1053 { 1054 struct hn_rx_ring *rxr; 1055 struct hn_rxvf_setarg arg; 1056 struct task task; 1057 int i; 1058 1059 HN_LOCK_ASSERT(sc); 1060 1061 TASK_INIT(&task, 0, hn_rxvf_set_task, &arg); 1062 1063 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 1064 rxr = &sc->hn_rx_ring[i]; 1065 1066 if (i < sc->hn_rx_ring_inuse) { 1067 arg.rxr = rxr; 1068 arg.vf_ifp = vf_ifp; 1069 vmbus_chan_run_task(rxr->hn_chan, &task); 1070 } else { 1071 rxr->hn_rxvf_ifp = vf_ifp; 1072 } 1073 } 1074 } 1075 1076 static bool 1077 hn_ismyvf(const struct hn_softc *sc, const struct ifnet *ifp) 1078 { 1079 const struct ifnet *hn_ifp; 1080 1081 hn_ifp = sc->hn_ifp; 1082 1083 if (ifp == hn_ifp) 1084 return (false); 1085 1086 if (ifp->if_alloctype != IFT_ETHER) 1087 return (false); 1088 1089 /* Ignore lagg/vlan interfaces */ 1090 if (strcmp(ifp->if_dname, "lagg") == 0 || 1091 strcmp(ifp->if_dname, "vlan") == 0) 1092 return (false); 1093 1094 if (bcmp(IF_LLADDR(ifp), IF_LLADDR(hn_ifp), ETHER_ADDR_LEN) != 0) 1095 return (false); 1096 1097 return (true); 1098 } 1099 1100 static void 1101 hn_rxvf_change(struct hn_softc *sc, struct ifnet *ifp, bool rxvf) 1102 { 1103 struct ifnet *hn_ifp; 1104 1105 HN_LOCK(sc); 1106 1107 if (!(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED)) 1108 goto out; 1109 1110 if (!hn_ismyvf(sc, ifp)) 1111 goto out; 1112 hn_ifp = sc->hn_ifp; 1113 1114 if (rxvf) { 1115 if (sc->hn_flags & HN_FLAG_RXVF) 1116 goto out; 1117 1118 sc->hn_flags |= HN_FLAG_RXVF; 1119 hn_rxfilter_config(sc); 1120 } else { 1121 if (!(sc->hn_flags & HN_FLAG_RXVF)) 1122 goto out; 1123 1124 sc->hn_flags &= ~HN_FLAG_RXVF; 1125 if (hn_ifp->if_drv_flags & IFF_DRV_RUNNING) 1126 hn_rxfilter_config(sc); 1127 else 1128 hn_set_rxfilter(sc, NDIS_PACKET_TYPE_NONE); 1129 } 1130 1131 hn_nvs_set_datapath(sc, 1132 rxvf ? HN_NVS_DATAPATH_VF : HN_NVS_DATAPATH_SYNTH); 1133 1134 hn_rxvf_set(sc, rxvf ? ifp : NULL); 1135 1136 if (rxvf) { 1137 hn_suspend_mgmt(sc); 1138 sc->hn_link_flags &= 1139 ~(HN_LINK_FLAG_LINKUP | HN_LINK_FLAG_NETCHG); 1140 if_link_state_change(hn_ifp, LINK_STATE_DOWN); 1141 } else { 1142 hn_resume_mgmt(sc); 1143 } 1144 1145 devctl_notify("HYPERV_NIC_VF", hn_ifp->if_xname, 1146 rxvf ? "VF_UP" : "VF_DOWN", NULL); 1147 1148 if (bootverbose) { 1149 if_printf(hn_ifp, "datapath is switched %s %s\n", 1150 rxvf ? "to" : "from", ifp->if_xname); 1151 } 1152 out: 1153 HN_UNLOCK(sc); 1154 } 1155 1156 static void 1157 hn_ifnet_event(void *arg, struct ifnet *ifp, int event) 1158 { 1159 1160 if (event != IFNET_EVENT_UP && event != IFNET_EVENT_DOWN) 1161 return; 1162 hn_rxvf_change(arg, ifp, event == IFNET_EVENT_UP); 1163 } 1164 1165 static void 1166 hn_ifaddr_event(void *arg, struct ifnet *ifp) 1167 { 1168 1169 hn_rxvf_change(arg, ifp, ifp->if_flags & IFF_UP); 1170 } 1171 1172 static int 1173 hn_xpnt_vf_iocsetcaps(struct hn_softc *sc, struct ifreq *ifr) 1174 { 1175 struct ifnet *ifp, *vf_ifp; 1176 uint64_t tmp; 1177 int error; 1178 1179 HN_LOCK_ASSERT(sc); 1180 ifp = sc->hn_ifp; 1181 vf_ifp = sc->hn_vf_ifp; 1182 1183 /* 1184 * Fix up requested capabilities w/ supported capabilities, 1185 * since the supported capabilities could have been changed. 1186 */ 1187 ifr->ifr_reqcap &= ifp->if_capabilities; 1188 /* Pass SIOCSIFCAP to VF. */ 1189 error = vf_ifp->if_ioctl(vf_ifp, SIOCSIFCAP, (caddr_t)ifr); 1190 1191 /* 1192 * NOTE: 1193 * The error will be propagated to the callers, however, it 1194 * is _not_ useful here. 1195 */ 1196 1197 /* 1198 * Merge VF's enabled capabilities. 1199 */ 1200 ifp->if_capenable = vf_ifp->if_capenable & ifp->if_capabilities; 1201 1202 tmp = vf_ifp->if_hwassist & HN_CSUM_IP_HWASSIST(sc); 1203 if (ifp->if_capenable & IFCAP_TXCSUM) 1204 ifp->if_hwassist |= tmp; 1205 else 1206 ifp->if_hwassist &= ~tmp; 1207 1208 tmp = vf_ifp->if_hwassist & HN_CSUM_IP6_HWASSIST(sc); 1209 if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) 1210 ifp->if_hwassist |= tmp; 1211 else 1212 ifp->if_hwassist &= ~tmp; 1213 1214 tmp = vf_ifp->if_hwassist & CSUM_IP_TSO; 1215 if (ifp->if_capenable & IFCAP_TSO4) 1216 ifp->if_hwassist |= tmp; 1217 else 1218 ifp->if_hwassist &= ~tmp; 1219 1220 tmp = vf_ifp->if_hwassist & CSUM_IP6_TSO; 1221 if (ifp->if_capenable & IFCAP_TSO6) 1222 ifp->if_hwassist |= tmp; 1223 else 1224 ifp->if_hwassist &= ~tmp; 1225 1226 return (error); 1227 } 1228 1229 static int 1230 hn_xpnt_vf_iocsetflags(struct hn_softc *sc) 1231 { 1232 struct ifnet *vf_ifp; 1233 struct ifreq ifr; 1234 1235 HN_LOCK_ASSERT(sc); 1236 vf_ifp = sc->hn_vf_ifp; 1237 1238 memset(&ifr, 0, sizeof(ifr)); 1239 strlcpy(ifr.ifr_name, vf_ifp->if_xname, sizeof(ifr.ifr_name)); 1240 ifr.ifr_flags = vf_ifp->if_flags & 0xffff; 1241 ifr.ifr_flagshigh = vf_ifp->if_flags >> 16; 1242 return (vf_ifp->if_ioctl(vf_ifp, SIOCSIFFLAGS, (caddr_t)&ifr)); 1243 } 1244 1245 static void 1246 hn_xpnt_vf_saveifflags(struct hn_softc *sc) 1247 { 1248 struct ifnet *ifp = sc->hn_ifp; 1249 int allmulti = 0; 1250 1251 HN_LOCK_ASSERT(sc); 1252 1253 /* XXX vlan(4) style mcast addr maintenance */ 1254 if (!TAILQ_EMPTY(&ifp->if_multiaddrs)) 1255 allmulti = IFF_ALLMULTI; 1256 1257 /* Always set the VF's if_flags */ 1258 sc->hn_vf_ifp->if_flags = ifp->if_flags | allmulti; 1259 } 1260 1261 static void 1262 hn_xpnt_vf_input(struct ifnet *vf_ifp, struct mbuf *m) 1263 { 1264 struct rm_priotracker pt; 1265 struct ifnet *hn_ifp = NULL; 1266 struct mbuf *mn; 1267 1268 /* 1269 * XXX racy, if hn(4) ever detached. 1270 */ 1271 rm_rlock(&hn_vfmap_lock, &pt); 1272 if (vf_ifp->if_index < hn_vfmap_size) 1273 hn_ifp = hn_vfmap[vf_ifp->if_index]; 1274 rm_runlock(&hn_vfmap_lock, &pt); 1275 1276 if (hn_ifp != NULL) { 1277 for (mn = m; mn != NULL; mn = mn->m_nextpkt) { 1278 /* 1279 * Allow tapping on the VF. 1280 */ 1281 ETHER_BPF_MTAP(vf_ifp, mn); 1282 1283 /* 1284 * Update VF stats. 1285 */ 1286 if ((vf_ifp->if_capenable & IFCAP_HWSTATS) == 0) { 1287 if_inc_counter(vf_ifp, IFCOUNTER_IBYTES, 1288 mn->m_pkthdr.len); 1289 } 1290 /* 1291 * XXX IFCOUNTER_IMCAST 1292 * This stat updating is kinda invasive, since it 1293 * requires two checks on the mbuf: the length check 1294 * and the ethernet header check. As of this write, 1295 * all multicast packets go directly to hn(4), which 1296 * makes imcast stat updating in the VF a try in vian. 1297 */ 1298 1299 /* 1300 * Fix up rcvif and increase hn(4)'s ipackets. 1301 */ 1302 mn->m_pkthdr.rcvif = hn_ifp; 1303 if_inc_counter(hn_ifp, IFCOUNTER_IPACKETS, 1); 1304 } 1305 /* 1306 * Go through hn(4)'s if_input. 1307 */ 1308 hn_ifp->if_input(hn_ifp, m); 1309 } else { 1310 /* 1311 * In the middle of the transition; free this 1312 * mbuf chain. 1313 */ 1314 while (m != NULL) { 1315 mn = m->m_nextpkt; 1316 m->m_nextpkt = NULL; 1317 m_freem(m); 1318 m = mn; 1319 } 1320 } 1321 } 1322 1323 static void 1324 hn_mtu_change_fixup(struct hn_softc *sc) 1325 { 1326 struct ifnet *ifp; 1327 1328 HN_LOCK_ASSERT(sc); 1329 ifp = sc->hn_ifp; 1330 1331 hn_set_tso_maxsize(sc, hn_tso_maxlen, ifp->if_mtu); 1332 #if __FreeBSD_version >= 1100099 1333 if (sc->hn_rx_ring[0].hn_lro.lro_length_lim < HN_LRO_LENLIM_MIN(ifp)) 1334 hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MIN(ifp)); 1335 #endif 1336 } 1337 1338 static void 1339 hn_xpnt_vf_setready(struct hn_softc *sc) 1340 { 1341 struct ifnet *ifp, *vf_ifp; 1342 struct ifreq ifr; 1343 1344 HN_LOCK_ASSERT(sc); 1345 ifp = sc->hn_ifp; 1346 vf_ifp = sc->hn_vf_ifp; 1347 1348 /* 1349 * Mark the VF ready. 1350 */ 1351 sc->hn_vf_rdytick = 0; 1352 1353 /* 1354 * Save information for restoration. 1355 */ 1356 sc->hn_saved_caps = ifp->if_capabilities; 1357 sc->hn_saved_tsomax = ifp->if_hw_tsomax; 1358 sc->hn_saved_tsosegcnt = ifp->if_hw_tsomaxsegcount; 1359 sc->hn_saved_tsosegsz = ifp->if_hw_tsomaxsegsize; 1360 1361 /* 1362 * Intersect supported/enabled capabilities. 1363 * 1364 * NOTE: 1365 * if_hwassist is not changed here. 1366 */ 1367 ifp->if_capabilities &= vf_ifp->if_capabilities; 1368 ifp->if_capenable &= ifp->if_capabilities; 1369 1370 /* 1371 * Fix TSO settings. 1372 */ 1373 if (ifp->if_hw_tsomax > vf_ifp->if_hw_tsomax) 1374 ifp->if_hw_tsomax = vf_ifp->if_hw_tsomax; 1375 if (ifp->if_hw_tsomaxsegcount > vf_ifp->if_hw_tsomaxsegcount) 1376 ifp->if_hw_tsomaxsegcount = vf_ifp->if_hw_tsomaxsegcount; 1377 if (ifp->if_hw_tsomaxsegsize > vf_ifp->if_hw_tsomaxsegsize) 1378 ifp->if_hw_tsomaxsegsize = vf_ifp->if_hw_tsomaxsegsize; 1379 1380 /* 1381 * Change VF's enabled capabilities. 1382 */ 1383 memset(&ifr, 0, sizeof(ifr)); 1384 strlcpy(ifr.ifr_name, vf_ifp->if_xname, sizeof(ifr.ifr_name)); 1385 ifr.ifr_reqcap = ifp->if_capenable; 1386 hn_xpnt_vf_iocsetcaps(sc, &ifr); 1387 1388 if (ifp->if_mtu != ETHERMTU) { 1389 int error; 1390 1391 /* 1392 * Change VF's MTU. 1393 */ 1394 memset(&ifr, 0, sizeof(ifr)); 1395 strlcpy(ifr.ifr_name, vf_ifp->if_xname, sizeof(ifr.ifr_name)); 1396 ifr.ifr_mtu = ifp->if_mtu; 1397 error = vf_ifp->if_ioctl(vf_ifp, SIOCSIFMTU, (caddr_t)&ifr); 1398 if (error) { 1399 if_printf(ifp, "%s SIOCSIFMTU %u failed\n", 1400 vf_ifp->if_xname, ifp->if_mtu); 1401 if (ifp->if_mtu > ETHERMTU) { 1402 if_printf(ifp, "change MTU to %d\n", ETHERMTU); 1403 1404 /* 1405 * XXX 1406 * No need to adjust the synthetic parts' MTU; 1407 * failure of the adjustment will cause us 1408 * infinite headache. 1409 */ 1410 ifp->if_mtu = ETHERMTU; 1411 hn_mtu_change_fixup(sc); 1412 } 1413 } 1414 } 1415 } 1416 1417 static bool 1418 hn_xpnt_vf_isready(struct hn_softc *sc) 1419 { 1420 1421 HN_LOCK_ASSERT(sc); 1422 1423 if (!hn_xpnt_vf || sc->hn_vf_ifp == NULL) 1424 return (false); 1425 1426 if (sc->hn_vf_rdytick == 0) 1427 return (true); 1428 1429 if (sc->hn_vf_rdytick > ticks) 1430 return (false); 1431 1432 /* Mark VF as ready. */ 1433 hn_xpnt_vf_setready(sc); 1434 return (true); 1435 } 1436 1437 static void 1438 hn_xpnt_vf_setenable(struct hn_softc *sc) 1439 { 1440 int i; 1441 1442 HN_LOCK_ASSERT(sc); 1443 1444 /* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */ 1445 rm_wlock(&sc->hn_vf_lock); 1446 sc->hn_xvf_flags |= HN_XVFFLAG_ENABLED; 1447 rm_wunlock(&sc->hn_vf_lock); 1448 1449 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 1450 sc->hn_rx_ring[i].hn_rx_flags |= HN_RX_FLAG_XPNT_VF; 1451 } 1452 1453 static void 1454 hn_xpnt_vf_setdisable(struct hn_softc *sc, bool clear_vf) 1455 { 1456 int i; 1457 1458 HN_LOCK_ASSERT(sc); 1459 1460 /* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */ 1461 rm_wlock(&sc->hn_vf_lock); 1462 sc->hn_xvf_flags &= ~HN_XVFFLAG_ENABLED; 1463 if (clear_vf) 1464 sc->hn_vf_ifp = NULL; 1465 rm_wunlock(&sc->hn_vf_lock); 1466 1467 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 1468 sc->hn_rx_ring[i].hn_rx_flags &= ~HN_RX_FLAG_XPNT_VF; 1469 } 1470 1471 static void 1472 hn_xpnt_vf_init(struct hn_softc *sc) 1473 { 1474 int error; 1475 1476 HN_LOCK_ASSERT(sc); 1477 1478 KASSERT((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) == 0, 1479 ("%s: transparent VF was enabled", sc->hn_ifp->if_xname)); 1480 1481 if (bootverbose) { 1482 if_printf(sc->hn_ifp, "try bringing up %s\n", 1483 sc->hn_vf_ifp->if_xname); 1484 } 1485 1486 /* 1487 * Bring the VF up. 1488 */ 1489 hn_xpnt_vf_saveifflags(sc); 1490 sc->hn_vf_ifp->if_flags |= IFF_UP; 1491 error = hn_xpnt_vf_iocsetflags(sc); 1492 if (error) { 1493 if_printf(sc->hn_ifp, "bringing up %s failed: %d\n", 1494 sc->hn_vf_ifp->if_xname, error); 1495 return; 1496 } 1497 1498 /* 1499 * NOTE: 1500 * Datapath setting must happen _after_ bringing the VF up. 1501 */ 1502 hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_VF); 1503 1504 /* Mark transparent mode VF as enabled. */ 1505 hn_xpnt_vf_setenable(sc); 1506 } 1507 1508 static void 1509 hn_xpnt_vf_init_taskfunc(void *xsc, int pending __unused) 1510 { 1511 struct hn_softc *sc = xsc; 1512 1513 HN_LOCK(sc); 1514 1515 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) 1516 goto done; 1517 if (sc->hn_vf_ifp == NULL) 1518 goto done; 1519 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) 1520 goto done; 1521 1522 if (sc->hn_vf_rdytick != 0) { 1523 /* Mark VF as ready. */ 1524 hn_xpnt_vf_setready(sc); 1525 } 1526 1527 if (sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) { 1528 /* 1529 * Delayed VF initialization. 1530 */ 1531 if (bootverbose) { 1532 if_printf(sc->hn_ifp, "delayed initialize %s\n", 1533 sc->hn_vf_ifp->if_xname); 1534 } 1535 hn_xpnt_vf_init(sc); 1536 } 1537 done: 1538 HN_UNLOCK(sc); 1539 } 1540 1541 static void 1542 hn_ifnet_attevent(void *xsc, struct ifnet *ifp) 1543 { 1544 struct hn_softc *sc = xsc; 1545 1546 HN_LOCK(sc); 1547 1548 if (!(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED)) 1549 goto done; 1550 1551 if (!hn_ismyvf(sc, ifp)) 1552 goto done; 1553 1554 if (sc->hn_vf_ifp != NULL) { 1555 if_printf(sc->hn_ifp, "%s was attached as VF\n", 1556 sc->hn_vf_ifp->if_xname); 1557 goto done; 1558 } 1559 1560 if (hn_xpnt_vf && ifp->if_start != NULL) { 1561 /* 1562 * ifnet.if_start is _not_ supported by transparent 1563 * mode VF; mainly due to the IFF_DRV_OACTIVE flag. 1564 */ 1565 if_printf(sc->hn_ifp, "%s uses if_start, which is unsupported " 1566 "in transparent VF mode.\n", ifp->if_xname); 1567 goto done; 1568 } 1569 1570 rm_wlock(&hn_vfmap_lock); 1571 1572 if (ifp->if_index >= hn_vfmap_size) { 1573 struct ifnet **newmap; 1574 int newsize; 1575 1576 newsize = ifp->if_index + HN_VFMAP_SIZE_DEF; 1577 newmap = malloc(sizeof(struct ifnet *) * newsize, M_DEVBUF, 1578 M_WAITOK | M_ZERO); 1579 1580 memcpy(newmap, hn_vfmap, 1581 sizeof(struct ifnet *) * hn_vfmap_size); 1582 free(hn_vfmap, M_DEVBUF); 1583 hn_vfmap = newmap; 1584 hn_vfmap_size = newsize; 1585 } 1586 KASSERT(hn_vfmap[ifp->if_index] == NULL, 1587 ("%s: ifindex %d was mapped to %s", 1588 ifp->if_xname, ifp->if_index, hn_vfmap[ifp->if_index]->if_xname)); 1589 hn_vfmap[ifp->if_index] = sc->hn_ifp; 1590 1591 rm_wunlock(&hn_vfmap_lock); 1592 1593 /* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */ 1594 rm_wlock(&sc->hn_vf_lock); 1595 KASSERT((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) == 0, 1596 ("%s: transparent VF was enabled", sc->hn_ifp->if_xname)); 1597 sc->hn_vf_ifp = ifp; 1598 rm_wunlock(&sc->hn_vf_lock); 1599 1600 if (hn_xpnt_vf) { 1601 int wait_ticks; 1602 1603 /* 1604 * Install if_input for vf_ifp, which does vf_ifp -> hn_ifp. 1605 * Save vf_ifp's current if_input for later restoration. 1606 */ 1607 sc->hn_vf_input = ifp->if_input; 1608 ifp->if_input = hn_xpnt_vf_input; 1609 1610 /* 1611 * Stop link status management; use the VF's. 1612 */ 1613 hn_suspend_mgmt(sc); 1614 1615 /* 1616 * Give VF sometime to complete its attach routing. 1617 */ 1618 wait_ticks = hn_xpnt_vf_attwait * hz; 1619 sc->hn_vf_rdytick = ticks + wait_ticks; 1620 1621 taskqueue_enqueue_timeout(sc->hn_vf_taskq, &sc->hn_vf_init, 1622 wait_ticks); 1623 } 1624 done: 1625 HN_UNLOCK(sc); 1626 } 1627 1628 static void 1629 hn_ifnet_detevent(void *xsc, struct ifnet *ifp) 1630 { 1631 struct hn_softc *sc = xsc; 1632 1633 HN_LOCK(sc); 1634 1635 if (sc->hn_vf_ifp == NULL) 1636 goto done; 1637 1638 if (!hn_ismyvf(sc, ifp)) 1639 goto done; 1640 1641 if (hn_xpnt_vf) { 1642 /* 1643 * Make sure that the delayed initialization is not running. 1644 * 1645 * NOTE: 1646 * - This lock _must_ be released, since the hn_vf_init task 1647 * will try holding this lock. 1648 * - It is safe to release this lock here, since the 1649 * hn_ifnet_attevent() is interlocked by the hn_vf_ifp. 1650 * 1651 * XXX racy, if hn(4) ever detached. 1652 */ 1653 HN_UNLOCK(sc); 1654 taskqueue_drain_timeout(sc->hn_vf_taskq, &sc->hn_vf_init); 1655 HN_LOCK(sc); 1656 1657 KASSERT(sc->hn_vf_input != NULL, ("%s VF input is not saved", 1658 sc->hn_ifp->if_xname)); 1659 ifp->if_input = sc->hn_vf_input; 1660 sc->hn_vf_input = NULL; 1661 1662 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) 1663 hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_SYNTH); 1664 1665 if (sc->hn_vf_rdytick == 0) { 1666 /* 1667 * The VF was ready; restore some settings. 1668 */ 1669 sc->hn_ifp->if_capabilities = sc->hn_saved_caps; 1670 /* 1671 * NOTE: 1672 * There is _no_ need to fixup if_capenable and 1673 * if_hwassist, since the if_capabilities before 1674 * restoration was an intersection of the VF's 1675 * if_capabilites and the synthetic device's 1676 * if_capabilites. 1677 */ 1678 sc->hn_ifp->if_hw_tsomax = sc->hn_saved_tsomax; 1679 sc->hn_ifp->if_hw_tsomaxsegcount = 1680 sc->hn_saved_tsosegcnt; 1681 sc->hn_ifp->if_hw_tsomaxsegsize = sc->hn_saved_tsosegsz; 1682 } 1683 1684 /* 1685 * Resume link status management, which was suspended 1686 * by hn_ifnet_attevent(). 1687 */ 1688 hn_resume_mgmt(sc); 1689 } 1690 1691 /* Mark transparent mode VF as disabled. */ 1692 hn_xpnt_vf_setdisable(sc, true /* clear hn_vf_ifp */); 1693 1694 rm_wlock(&hn_vfmap_lock); 1695 1696 KASSERT(ifp->if_index < hn_vfmap_size, 1697 ("ifindex %d, vfmapsize %d", ifp->if_index, hn_vfmap_size)); 1698 if (hn_vfmap[ifp->if_index] != NULL) { 1699 KASSERT(hn_vfmap[ifp->if_index] == sc->hn_ifp, 1700 ("%s: ifindex %d was mapped to %s", 1701 ifp->if_xname, ifp->if_index, 1702 hn_vfmap[ifp->if_index]->if_xname)); 1703 hn_vfmap[ifp->if_index] = NULL; 1704 } 1705 1706 rm_wunlock(&hn_vfmap_lock); 1707 done: 1708 HN_UNLOCK(sc); 1709 } 1710 1711 static void 1712 hn_ifnet_lnkevent(void *xsc, struct ifnet *ifp, int link_state) 1713 { 1714 struct hn_softc *sc = xsc; 1715 1716 if (sc->hn_vf_ifp == ifp) 1717 if_link_state_change(sc->hn_ifp, link_state); 1718 } 1719 1720 static int 1721 hn_probe(device_t dev) 1722 { 1723 1724 if (VMBUS_PROBE_GUID(device_get_parent(dev), dev, &hn_guid) == 0) { 1725 device_set_desc(dev, "Hyper-V Network Interface"); 1726 return BUS_PROBE_DEFAULT; 1727 } 1728 return ENXIO; 1729 } 1730 1731 static int 1732 hn_attach(device_t dev) 1733 { 1734 struct hn_softc *sc = device_get_softc(dev); 1735 struct sysctl_oid_list *child; 1736 struct sysctl_ctx_list *ctx; 1737 uint8_t eaddr[ETHER_ADDR_LEN]; 1738 struct ifnet *ifp = NULL; 1739 int error, ring_cnt, tx_ring_cnt; 1740 1741 sc->hn_dev = dev; 1742 sc->hn_prichan = vmbus_get_channel(dev); 1743 HN_LOCK_INIT(sc); 1744 rm_init(&sc->hn_vf_lock, "hnvf"); 1745 if (hn_xpnt_vf && hn_xpnt_vf_accbpf) 1746 sc->hn_xvf_flags |= HN_XVFFLAG_ACCBPF; 1747 1748 /* 1749 * Initialize these tunables once. 1750 */ 1751 sc->hn_agg_size = hn_tx_agg_size; 1752 sc->hn_agg_pkts = hn_tx_agg_pkts; 1753 1754 /* 1755 * Setup taskqueue for transmission. 1756 */ 1757 if (hn_tx_taskq_mode == HN_TX_TASKQ_M_INDEP) { 1758 int i; 1759 1760 sc->hn_tx_taskqs = 1761 malloc(hn_tx_taskq_cnt * sizeof(struct taskqueue *), 1762 M_DEVBUF, M_WAITOK); 1763 for (i = 0; i < hn_tx_taskq_cnt; ++i) { 1764 sc->hn_tx_taskqs[i] = taskqueue_create("hn_tx", 1765 M_WAITOK, taskqueue_thread_enqueue, 1766 &sc->hn_tx_taskqs[i]); 1767 taskqueue_start_threads(&sc->hn_tx_taskqs[i], 1, PI_NET, 1768 "%s tx%d", device_get_nameunit(dev), i); 1769 } 1770 } else if (hn_tx_taskq_mode == HN_TX_TASKQ_M_GLOBAL) { 1771 sc->hn_tx_taskqs = hn_tx_taskque; 1772 } 1773 1774 /* 1775 * Setup taskqueue for mangement tasks, e.g. link status. 1776 */ 1777 sc->hn_mgmt_taskq0 = taskqueue_create("hn_mgmt", M_WAITOK, 1778 taskqueue_thread_enqueue, &sc->hn_mgmt_taskq0); 1779 taskqueue_start_threads(&sc->hn_mgmt_taskq0, 1, PI_NET, "%s mgmt", 1780 device_get_nameunit(dev)); 1781 TASK_INIT(&sc->hn_link_task, 0, hn_link_taskfunc, sc); 1782 TASK_INIT(&sc->hn_netchg_init, 0, hn_netchg_init_taskfunc, sc); 1783 TIMEOUT_TASK_INIT(sc->hn_mgmt_taskq0, &sc->hn_netchg_status, 0, 1784 hn_netchg_status_taskfunc, sc); 1785 1786 if (hn_xpnt_vf) { 1787 /* 1788 * Setup taskqueue for VF tasks, e.g. delayed VF bringing up. 1789 */ 1790 sc->hn_vf_taskq = taskqueue_create("hn_vf", M_WAITOK, 1791 taskqueue_thread_enqueue, &sc->hn_vf_taskq); 1792 taskqueue_start_threads(&sc->hn_vf_taskq, 1, PI_NET, "%s vf", 1793 device_get_nameunit(dev)); 1794 TIMEOUT_TASK_INIT(sc->hn_vf_taskq, &sc->hn_vf_init, 0, 1795 hn_xpnt_vf_init_taskfunc, sc); 1796 } 1797 1798 /* 1799 * Allocate ifnet and setup its name earlier, so that if_printf 1800 * can be used by functions, which will be called after 1801 * ether_ifattach(). 1802 */ 1803 ifp = sc->hn_ifp = if_alloc(IFT_ETHER); 1804 ifp->if_softc = sc; 1805 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 1806 1807 /* 1808 * Initialize ifmedia earlier so that it can be unconditionally 1809 * destroyed, if error happened later on. 1810 */ 1811 ifmedia_init(&sc->hn_media, 0, hn_ifmedia_upd, hn_ifmedia_sts); 1812 1813 /* 1814 * Figure out the # of RX rings (ring_cnt) and the # of TX rings 1815 * to use (tx_ring_cnt). 1816 * 1817 * NOTE: 1818 * The # of RX rings to use is same as the # of channels to use. 1819 */ 1820 ring_cnt = hn_chan_cnt; 1821 if (ring_cnt <= 0) { 1822 /* Default */ 1823 ring_cnt = mp_ncpus; 1824 if (ring_cnt > HN_RING_CNT_DEF_MAX) 1825 ring_cnt = HN_RING_CNT_DEF_MAX; 1826 } else if (ring_cnt > mp_ncpus) { 1827 ring_cnt = mp_ncpus; 1828 } 1829 #ifdef RSS 1830 if (ring_cnt > rss_getnumbuckets()) 1831 ring_cnt = rss_getnumbuckets(); 1832 #endif 1833 1834 tx_ring_cnt = hn_tx_ring_cnt; 1835 if (tx_ring_cnt <= 0 || tx_ring_cnt > ring_cnt) 1836 tx_ring_cnt = ring_cnt; 1837 #ifdef HN_IFSTART_SUPPORT 1838 if (hn_use_if_start) { 1839 /* ifnet.if_start only needs one TX ring. */ 1840 tx_ring_cnt = 1; 1841 } 1842 #endif 1843 1844 /* 1845 * Set the leader CPU for channels. 1846 */ 1847 sc->hn_cpu = atomic_fetchadd_int(&hn_cpu_index, ring_cnt) % mp_ncpus; 1848 1849 /* 1850 * Create enough TX/RX rings, even if only limited number of 1851 * channels can be allocated. 1852 */ 1853 error = hn_create_tx_data(sc, tx_ring_cnt); 1854 if (error) 1855 goto failed; 1856 error = hn_create_rx_data(sc, ring_cnt); 1857 if (error) 1858 goto failed; 1859 1860 /* 1861 * Create transaction context for NVS and RNDIS transactions. 1862 */ 1863 sc->hn_xact = vmbus_xact_ctx_create(bus_get_dma_tag(dev), 1864 HN_XACT_REQ_SIZE, HN_XACT_RESP_SIZE, 0); 1865 if (sc->hn_xact == NULL) { 1866 error = ENXIO; 1867 goto failed; 1868 } 1869 1870 /* 1871 * Install orphan handler for the revocation of this device's 1872 * primary channel. 1873 * 1874 * NOTE: 1875 * The processing order is critical here: 1876 * Install the orphan handler, _before_ testing whether this 1877 * device's primary channel has been revoked or not. 1878 */ 1879 vmbus_chan_set_orphan(sc->hn_prichan, sc->hn_xact); 1880 if (vmbus_chan_is_revoked(sc->hn_prichan)) { 1881 error = ENXIO; 1882 goto failed; 1883 } 1884 1885 /* 1886 * Attach the synthetic parts, i.e. NVS and RNDIS. 1887 */ 1888 error = hn_synth_attach(sc, ETHERMTU); 1889 if (error) 1890 goto failed; 1891 1892 error = hn_rndis_get_eaddr(sc, eaddr); 1893 if (error) 1894 goto failed; 1895 1896 #if __FreeBSD_version >= 1100099 1897 if (sc->hn_rx_ring_inuse > 1) { 1898 /* 1899 * Reduce TCP segment aggregation limit for multiple 1900 * RX rings to increase ACK timeliness. 1901 */ 1902 hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MULTIRX_DEF); 1903 } 1904 #endif 1905 1906 /* 1907 * Fixup TX stuffs after synthetic parts are attached. 1908 */ 1909 hn_fixup_tx_data(sc); 1910 1911 ctx = device_get_sysctl_ctx(dev); 1912 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); 1913 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "nvs_version", CTLFLAG_RD, 1914 &sc->hn_nvs_ver, 0, "NVS version"); 1915 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "ndis_version", 1916 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 1917 hn_ndis_version_sysctl, "A", "NDIS version"); 1918 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "caps", 1919 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 1920 hn_caps_sysctl, "A", "capabilities"); 1921 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "hwassist", 1922 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 1923 hn_hwassist_sysctl, "A", "hwassist"); 1924 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tso_max", 1925 CTLFLAG_RD, &ifp->if_hw_tsomax, 0, "max TSO size"); 1926 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tso_maxsegcnt", 1927 CTLFLAG_RD, &ifp->if_hw_tsomaxsegcount, 0, 1928 "max # of TSO segments"); 1929 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tso_maxsegsz", 1930 CTLFLAG_RD, &ifp->if_hw_tsomaxsegsize, 0, 1931 "max size of TSO segment"); 1932 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxfilter", 1933 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 1934 hn_rxfilter_sysctl, "A", "rxfilter"); 1935 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_hash", 1936 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 1937 hn_rss_hash_sysctl, "A", "RSS hash"); 1938 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rss_ind_size", 1939 CTLFLAG_RD, &sc->hn_rss_ind_size, 0, "RSS indirect entry count"); 1940 #ifndef RSS 1941 /* 1942 * Don't allow RSS key/indirect table changes, if RSS is defined. 1943 */ 1944 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_key", 1945 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 1946 hn_rss_key_sysctl, "IU", "RSS key"); 1947 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_ind", 1948 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 1949 hn_rss_ind_sysctl, "IU", "RSS indirect table"); 1950 #endif 1951 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_size", 1952 CTLFLAG_RD, &sc->hn_rndis_agg_size, 0, 1953 "RNDIS offered packet transmission aggregation size limit"); 1954 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_pkts", 1955 CTLFLAG_RD, &sc->hn_rndis_agg_pkts, 0, 1956 "RNDIS offered packet transmission aggregation count limit"); 1957 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_align", 1958 CTLFLAG_RD, &sc->hn_rndis_agg_align, 0, 1959 "RNDIS packet transmission aggregation alignment"); 1960 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_size", 1961 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 1962 hn_txagg_size_sysctl, "I", 1963 "Packet transmission aggregation size, 0 -- disable, -1 -- auto"); 1964 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pkts", 1965 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 1966 hn_txagg_pkts_sysctl, "I", 1967 "Packet transmission aggregation packets, " 1968 "0 -- disable, -1 -- auto"); 1969 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "polling", 1970 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 1971 hn_polling_sysctl, "I", 1972 "Polling frequency: [100,1000000], 0 disable polling"); 1973 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf", 1974 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 1975 hn_vf_sysctl, "A", "Virtual Function's name"); 1976 if (!hn_xpnt_vf) { 1977 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxvf", 1978 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 1979 hn_rxvf_sysctl, "A", "activated Virtual Function's name"); 1980 } else { 1981 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf_xpnt_enabled", 1982 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 1983 hn_xpnt_vf_enabled_sysctl, "I", 1984 "Transparent VF enabled"); 1985 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf_xpnt_accbpf", 1986 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 1987 hn_xpnt_vf_accbpf_sysctl, "I", 1988 "Accurate BPF for transparent VF"); 1989 } 1990 1991 /* 1992 * Setup the ifmedia, which has been initialized earlier. 1993 */ 1994 ifmedia_add(&sc->hn_media, IFM_ETHER | IFM_AUTO, 0, NULL); 1995 ifmedia_set(&sc->hn_media, IFM_ETHER | IFM_AUTO); 1996 /* XXX ifmedia_set really should do this for us */ 1997 sc->hn_media.ifm_media = sc->hn_media.ifm_cur->ifm_media; 1998 1999 /* 2000 * Setup the ifnet for this interface. 2001 */ 2002 2003 ifp->if_baudrate = IF_Gbps(10); 2004 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 2005 ifp->if_ioctl = hn_ioctl; 2006 ifp->if_init = hn_init; 2007 #ifdef HN_IFSTART_SUPPORT 2008 if (hn_use_if_start) { 2009 int qdepth = hn_get_txswq_depth(&sc->hn_tx_ring[0]); 2010 2011 ifp->if_start = hn_start; 2012 IFQ_SET_MAXLEN(&ifp->if_snd, qdepth); 2013 ifp->if_snd.ifq_drv_maxlen = qdepth - 1; 2014 IFQ_SET_READY(&ifp->if_snd); 2015 } else 2016 #endif 2017 { 2018 ifp->if_transmit = hn_transmit; 2019 ifp->if_qflush = hn_xmit_qflush; 2020 } 2021 2022 ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_LRO | IFCAP_LINKSTATE; 2023 #ifdef foo 2024 /* We can't diff IPv6 packets from IPv4 packets on RX path. */ 2025 ifp->if_capabilities |= IFCAP_RXCSUM_IPV6; 2026 #endif 2027 if (sc->hn_caps & HN_CAP_VLAN) { 2028 /* XXX not sure about VLAN_MTU. */ 2029 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU; 2030 } 2031 2032 ifp->if_hwassist = sc->hn_tx_ring[0].hn_csum_assist; 2033 if (ifp->if_hwassist & HN_CSUM_IP_MASK) 2034 ifp->if_capabilities |= IFCAP_TXCSUM; 2035 if (ifp->if_hwassist & HN_CSUM_IP6_MASK) 2036 ifp->if_capabilities |= IFCAP_TXCSUM_IPV6; 2037 if (sc->hn_caps & HN_CAP_TSO4) { 2038 ifp->if_capabilities |= IFCAP_TSO4; 2039 ifp->if_hwassist |= CSUM_IP_TSO; 2040 } 2041 if (sc->hn_caps & HN_CAP_TSO6) { 2042 ifp->if_capabilities |= IFCAP_TSO6; 2043 ifp->if_hwassist |= CSUM_IP6_TSO; 2044 } 2045 2046 /* Enable all available capabilities by default. */ 2047 ifp->if_capenable = ifp->if_capabilities; 2048 2049 /* 2050 * Disable IPv6 TSO and TXCSUM by default, they still can 2051 * be enabled through SIOCSIFCAP. 2052 */ 2053 ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6 | IFCAP_TSO6); 2054 ifp->if_hwassist &= ~(HN_CSUM_IP6_MASK | CSUM_IP6_TSO); 2055 2056 if (ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) { 2057 /* 2058 * Lock hn_set_tso_maxsize() to simplify its 2059 * internal logic. 2060 */ 2061 HN_LOCK(sc); 2062 hn_set_tso_maxsize(sc, hn_tso_maxlen, ETHERMTU); 2063 HN_UNLOCK(sc); 2064 ifp->if_hw_tsomaxsegcount = HN_TX_DATA_SEGCNT_MAX; 2065 ifp->if_hw_tsomaxsegsize = PAGE_SIZE; 2066 } 2067 2068 ether_ifattach(ifp, eaddr); 2069 2070 if ((ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) && bootverbose) { 2071 if_printf(ifp, "TSO segcnt %u segsz %u\n", 2072 ifp->if_hw_tsomaxsegcount, ifp->if_hw_tsomaxsegsize); 2073 } 2074 2075 /* Inform the upper layer about the long frame support. */ 2076 ifp->if_hdrlen = sizeof(struct ether_vlan_header); 2077 2078 /* 2079 * Kick off link status check. 2080 */ 2081 sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0; 2082 hn_update_link_status(sc); 2083 2084 if (!hn_xpnt_vf) { 2085 sc->hn_ifnet_evthand = EVENTHANDLER_REGISTER(ifnet_event, 2086 hn_ifnet_event, sc, EVENTHANDLER_PRI_ANY); 2087 sc->hn_ifaddr_evthand = EVENTHANDLER_REGISTER(ifaddr_event, 2088 hn_ifaddr_event, sc, EVENTHANDLER_PRI_ANY); 2089 } else { 2090 sc->hn_ifnet_lnkhand = EVENTHANDLER_REGISTER(ifnet_link_event, 2091 hn_ifnet_lnkevent, sc, EVENTHANDLER_PRI_ANY); 2092 } 2093 2094 /* 2095 * NOTE: 2096 * Subscribe ether_ifattach event, instead of ifnet_arrival event, 2097 * since interface's LLADDR is needed; interface LLADDR is not 2098 * available when ifnet_arrival event is triggered. 2099 */ 2100 sc->hn_ifnet_atthand = EVENTHANDLER_REGISTER(ether_ifattach_event, 2101 hn_ifnet_attevent, sc, EVENTHANDLER_PRI_ANY); 2102 sc->hn_ifnet_dethand = EVENTHANDLER_REGISTER(ifnet_departure_event, 2103 hn_ifnet_detevent, sc, EVENTHANDLER_PRI_ANY); 2104 2105 return (0); 2106 failed: 2107 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) 2108 hn_synth_detach(sc); 2109 hn_detach(dev); 2110 return (error); 2111 } 2112 2113 static int 2114 hn_detach(device_t dev) 2115 { 2116 struct hn_softc *sc = device_get_softc(dev); 2117 struct ifnet *ifp = sc->hn_ifp, *vf_ifp; 2118 2119 if (sc->hn_xact != NULL && vmbus_chan_is_revoked(sc->hn_prichan)) { 2120 /* 2121 * In case that the vmbus missed the orphan handler 2122 * installation. 2123 */ 2124 vmbus_xact_ctx_orphan(sc->hn_xact); 2125 } 2126 2127 if (sc->hn_ifaddr_evthand != NULL) 2128 EVENTHANDLER_DEREGISTER(ifaddr_event, sc->hn_ifaddr_evthand); 2129 if (sc->hn_ifnet_evthand != NULL) 2130 EVENTHANDLER_DEREGISTER(ifnet_event, sc->hn_ifnet_evthand); 2131 if (sc->hn_ifnet_atthand != NULL) { 2132 EVENTHANDLER_DEREGISTER(ether_ifattach_event, 2133 sc->hn_ifnet_atthand); 2134 } 2135 if (sc->hn_ifnet_dethand != NULL) { 2136 EVENTHANDLER_DEREGISTER(ifnet_departure_event, 2137 sc->hn_ifnet_dethand); 2138 } 2139 if (sc->hn_ifnet_lnkhand != NULL) 2140 EVENTHANDLER_DEREGISTER(ifnet_link_event, sc->hn_ifnet_lnkhand); 2141 2142 vf_ifp = sc->hn_vf_ifp; 2143 __compiler_membar(); 2144 if (vf_ifp != NULL) 2145 hn_ifnet_detevent(sc, vf_ifp); 2146 2147 if (device_is_attached(dev)) { 2148 HN_LOCK(sc); 2149 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) { 2150 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 2151 hn_stop(sc, true); 2152 /* 2153 * NOTE: 2154 * hn_stop() only suspends data, so managment 2155 * stuffs have to be suspended manually here. 2156 */ 2157 hn_suspend_mgmt(sc); 2158 hn_synth_detach(sc); 2159 } 2160 HN_UNLOCK(sc); 2161 ether_ifdetach(ifp); 2162 } 2163 2164 ifmedia_removeall(&sc->hn_media); 2165 hn_destroy_rx_data(sc); 2166 hn_destroy_tx_data(sc); 2167 2168 if (sc->hn_tx_taskqs != NULL && sc->hn_tx_taskqs != hn_tx_taskque) { 2169 int i; 2170 2171 for (i = 0; i < hn_tx_taskq_cnt; ++i) 2172 taskqueue_free(sc->hn_tx_taskqs[i]); 2173 free(sc->hn_tx_taskqs, M_DEVBUF); 2174 } 2175 taskqueue_free(sc->hn_mgmt_taskq0); 2176 if (sc->hn_vf_taskq != NULL) 2177 taskqueue_free(sc->hn_vf_taskq); 2178 2179 if (sc->hn_xact != NULL) { 2180 /* 2181 * Uninstall the orphan handler _before_ the xact is 2182 * destructed. 2183 */ 2184 vmbus_chan_unset_orphan(sc->hn_prichan); 2185 vmbus_xact_ctx_destroy(sc->hn_xact); 2186 } 2187 2188 if_free(ifp); 2189 2190 HN_LOCK_DESTROY(sc); 2191 rm_destroy(&sc->hn_vf_lock); 2192 return (0); 2193 } 2194 2195 static int 2196 hn_shutdown(device_t dev) 2197 { 2198 2199 return (0); 2200 } 2201 2202 static void 2203 hn_link_status(struct hn_softc *sc) 2204 { 2205 uint32_t link_status; 2206 int error; 2207 2208 error = hn_rndis_get_linkstatus(sc, &link_status); 2209 if (error) { 2210 /* XXX what to do? */ 2211 return; 2212 } 2213 2214 if (link_status == NDIS_MEDIA_STATE_CONNECTED) 2215 sc->hn_link_flags |= HN_LINK_FLAG_LINKUP; 2216 else 2217 sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP; 2218 if_link_state_change(sc->hn_ifp, 2219 (sc->hn_link_flags & HN_LINK_FLAG_LINKUP) ? 2220 LINK_STATE_UP : LINK_STATE_DOWN); 2221 } 2222 2223 static void 2224 hn_link_taskfunc(void *xsc, int pending __unused) 2225 { 2226 struct hn_softc *sc = xsc; 2227 2228 if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG) 2229 return; 2230 hn_link_status(sc); 2231 } 2232 2233 static void 2234 hn_netchg_init_taskfunc(void *xsc, int pending __unused) 2235 { 2236 struct hn_softc *sc = xsc; 2237 2238 /* Prevent any link status checks from running. */ 2239 sc->hn_link_flags |= HN_LINK_FLAG_NETCHG; 2240 2241 /* 2242 * Fake up a [link down --> link up] state change; 5 seconds 2243 * delay is used, which closely simulates miibus reaction 2244 * upon link down event. 2245 */ 2246 sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP; 2247 if_link_state_change(sc->hn_ifp, LINK_STATE_DOWN); 2248 taskqueue_enqueue_timeout(sc->hn_mgmt_taskq0, 2249 &sc->hn_netchg_status, 5 * hz); 2250 } 2251 2252 static void 2253 hn_netchg_status_taskfunc(void *xsc, int pending __unused) 2254 { 2255 struct hn_softc *sc = xsc; 2256 2257 /* Re-allow link status checks. */ 2258 sc->hn_link_flags &= ~HN_LINK_FLAG_NETCHG; 2259 hn_link_status(sc); 2260 } 2261 2262 static void 2263 hn_update_link_status(struct hn_softc *sc) 2264 { 2265 2266 if (sc->hn_mgmt_taskq != NULL) 2267 taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_link_task); 2268 } 2269 2270 static void 2271 hn_change_network(struct hn_softc *sc) 2272 { 2273 2274 if (sc->hn_mgmt_taskq != NULL) 2275 taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_netchg_init); 2276 } 2277 2278 static __inline int 2279 hn_txdesc_dmamap_load(struct hn_tx_ring *txr, struct hn_txdesc *txd, 2280 struct mbuf **m_head, bus_dma_segment_t *segs, int *nsegs) 2281 { 2282 struct mbuf *m = *m_head; 2283 int error; 2284 2285 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID, ("txd uses chim")); 2286 2287 error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap, 2288 m, segs, nsegs, BUS_DMA_NOWAIT); 2289 if (error == EFBIG) { 2290 struct mbuf *m_new; 2291 2292 m_new = m_collapse(m, M_NOWAIT, HN_TX_DATA_SEGCNT_MAX); 2293 if (m_new == NULL) 2294 return ENOBUFS; 2295 else 2296 *m_head = m = m_new; 2297 txr->hn_tx_collapsed++; 2298 2299 error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, 2300 txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT); 2301 } 2302 if (!error) { 2303 bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap, 2304 BUS_DMASYNC_PREWRITE); 2305 txd->flags |= HN_TXD_FLAG_DMAMAP; 2306 } 2307 return error; 2308 } 2309 2310 static __inline int 2311 hn_txdesc_put(struct hn_tx_ring *txr, struct hn_txdesc *txd) 2312 { 2313 2314 KASSERT((txd->flags & HN_TXD_FLAG_ONLIST) == 0, 2315 ("put an onlist txd %#x", txd->flags)); 2316 KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0, 2317 ("put an onagg txd %#x", txd->flags)); 2318 2319 KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs)); 2320 if (atomic_fetchadd_int(&txd->refs, -1) != 1) 2321 return 0; 2322 2323 if (!STAILQ_EMPTY(&txd->agg_list)) { 2324 struct hn_txdesc *tmp_txd; 2325 2326 while ((tmp_txd = STAILQ_FIRST(&txd->agg_list)) != NULL) { 2327 int freed; 2328 2329 KASSERT(STAILQ_EMPTY(&tmp_txd->agg_list), 2330 ("resursive aggregation on aggregated txdesc")); 2331 KASSERT((tmp_txd->flags & HN_TXD_FLAG_ONAGG), 2332 ("not aggregated txdesc")); 2333 KASSERT((tmp_txd->flags & HN_TXD_FLAG_DMAMAP) == 0, 2334 ("aggregated txdesc uses dmamap")); 2335 KASSERT(tmp_txd->chim_index == HN_NVS_CHIM_IDX_INVALID, 2336 ("aggregated txdesc consumes " 2337 "chimney sending buffer")); 2338 KASSERT(tmp_txd->chim_size == 0, 2339 ("aggregated txdesc has non-zero " 2340 "chimney sending size")); 2341 2342 STAILQ_REMOVE_HEAD(&txd->agg_list, agg_link); 2343 tmp_txd->flags &= ~HN_TXD_FLAG_ONAGG; 2344 freed = hn_txdesc_put(txr, tmp_txd); 2345 KASSERT(freed, ("failed to free aggregated txdesc")); 2346 } 2347 } 2348 2349 if (txd->chim_index != HN_NVS_CHIM_IDX_INVALID) { 2350 KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, 2351 ("chim txd uses dmamap")); 2352 hn_chim_free(txr->hn_sc, txd->chim_index); 2353 txd->chim_index = HN_NVS_CHIM_IDX_INVALID; 2354 txd->chim_size = 0; 2355 } else if (txd->flags & HN_TXD_FLAG_DMAMAP) { 2356 bus_dmamap_sync(txr->hn_tx_data_dtag, 2357 txd->data_dmap, BUS_DMASYNC_POSTWRITE); 2358 bus_dmamap_unload(txr->hn_tx_data_dtag, 2359 txd->data_dmap); 2360 txd->flags &= ~HN_TXD_FLAG_DMAMAP; 2361 } 2362 2363 if (txd->m != NULL) { 2364 m_freem(txd->m); 2365 txd->m = NULL; 2366 } 2367 2368 txd->flags |= HN_TXD_FLAG_ONLIST; 2369 #ifndef HN_USE_TXDESC_BUFRING 2370 mtx_lock_spin(&txr->hn_txlist_spin); 2371 KASSERT(txr->hn_txdesc_avail >= 0 && 2372 txr->hn_txdesc_avail < txr->hn_txdesc_cnt, 2373 ("txdesc_put: invalid txd avail %d", txr->hn_txdesc_avail)); 2374 txr->hn_txdesc_avail++; 2375 SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link); 2376 mtx_unlock_spin(&txr->hn_txlist_spin); 2377 #else /* HN_USE_TXDESC_BUFRING */ 2378 #ifdef HN_DEBUG 2379 atomic_add_int(&txr->hn_txdesc_avail, 1); 2380 #endif 2381 buf_ring_enqueue(txr->hn_txdesc_br, txd); 2382 #endif /* !HN_USE_TXDESC_BUFRING */ 2383 2384 return 1; 2385 } 2386 2387 static __inline struct hn_txdesc * 2388 hn_txdesc_get(struct hn_tx_ring *txr) 2389 { 2390 struct hn_txdesc *txd; 2391 2392 #ifndef HN_USE_TXDESC_BUFRING 2393 mtx_lock_spin(&txr->hn_txlist_spin); 2394 txd = SLIST_FIRST(&txr->hn_txlist); 2395 if (txd != NULL) { 2396 KASSERT(txr->hn_txdesc_avail > 0, 2397 ("txdesc_get: invalid txd avail %d", txr->hn_txdesc_avail)); 2398 txr->hn_txdesc_avail--; 2399 SLIST_REMOVE_HEAD(&txr->hn_txlist, link); 2400 } 2401 mtx_unlock_spin(&txr->hn_txlist_spin); 2402 #else 2403 txd = buf_ring_dequeue_sc(txr->hn_txdesc_br); 2404 #endif 2405 2406 if (txd != NULL) { 2407 #ifdef HN_USE_TXDESC_BUFRING 2408 #ifdef HN_DEBUG 2409 atomic_subtract_int(&txr->hn_txdesc_avail, 1); 2410 #endif 2411 #endif /* HN_USE_TXDESC_BUFRING */ 2412 KASSERT(txd->m == NULL && txd->refs == 0 && 2413 STAILQ_EMPTY(&txd->agg_list) && 2414 txd->chim_index == HN_NVS_CHIM_IDX_INVALID && 2415 txd->chim_size == 0 && 2416 (txd->flags & HN_TXD_FLAG_ONLIST) && 2417 (txd->flags & HN_TXD_FLAG_ONAGG) == 0 && 2418 (txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("invalid txd")); 2419 txd->flags &= ~HN_TXD_FLAG_ONLIST; 2420 txd->refs = 1; 2421 } 2422 return txd; 2423 } 2424 2425 static __inline void 2426 hn_txdesc_hold(struct hn_txdesc *txd) 2427 { 2428 2429 /* 0->1 transition will never work */ 2430 KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs)); 2431 atomic_add_int(&txd->refs, 1); 2432 } 2433 2434 static __inline void 2435 hn_txdesc_agg(struct hn_txdesc *agg_txd, struct hn_txdesc *txd) 2436 { 2437 2438 KASSERT((agg_txd->flags & HN_TXD_FLAG_ONAGG) == 0, 2439 ("recursive aggregation on aggregating txdesc")); 2440 2441 KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0, 2442 ("already aggregated")); 2443 KASSERT(STAILQ_EMPTY(&txd->agg_list), 2444 ("recursive aggregation on to-be-aggregated txdesc")); 2445 2446 txd->flags |= HN_TXD_FLAG_ONAGG; 2447 STAILQ_INSERT_TAIL(&agg_txd->agg_list, txd, agg_link); 2448 } 2449 2450 static bool 2451 hn_tx_ring_pending(struct hn_tx_ring *txr) 2452 { 2453 bool pending = false; 2454 2455 #ifndef HN_USE_TXDESC_BUFRING 2456 mtx_lock_spin(&txr->hn_txlist_spin); 2457 if (txr->hn_txdesc_avail != txr->hn_txdesc_cnt) 2458 pending = true; 2459 mtx_unlock_spin(&txr->hn_txlist_spin); 2460 #else 2461 if (!buf_ring_full(txr->hn_txdesc_br)) 2462 pending = true; 2463 #endif 2464 return (pending); 2465 } 2466 2467 static __inline void 2468 hn_txeof(struct hn_tx_ring *txr) 2469 { 2470 txr->hn_has_txeof = 0; 2471 txr->hn_txeof(txr); 2472 } 2473 2474 static void 2475 hn_txpkt_done(struct hn_nvs_sendctx *sndc, struct hn_softc *sc, 2476 struct vmbus_channel *chan, const void *data __unused, int dlen __unused) 2477 { 2478 struct hn_txdesc *txd = sndc->hn_cbarg; 2479 struct hn_tx_ring *txr; 2480 2481 txr = txd->txr; 2482 KASSERT(txr->hn_chan == chan, 2483 ("channel mismatch, on chan%u, should be chan%u", 2484 vmbus_chan_id(chan), vmbus_chan_id(txr->hn_chan))); 2485 2486 txr->hn_has_txeof = 1; 2487 hn_txdesc_put(txr, txd); 2488 2489 ++txr->hn_txdone_cnt; 2490 if (txr->hn_txdone_cnt >= HN_EARLY_TXEOF_THRESH) { 2491 txr->hn_txdone_cnt = 0; 2492 if (txr->hn_oactive) 2493 hn_txeof(txr); 2494 } 2495 } 2496 2497 static void 2498 hn_chan_rollup(struct hn_rx_ring *rxr, struct hn_tx_ring *txr) 2499 { 2500 #if defined(INET) || defined(INET6) 2501 tcp_lro_flush_all(&rxr->hn_lro); 2502 #endif 2503 2504 /* 2505 * NOTE: 2506 * 'txr' could be NULL, if multiple channels and 2507 * ifnet.if_start method are enabled. 2508 */ 2509 if (txr == NULL || !txr->hn_has_txeof) 2510 return; 2511 2512 txr->hn_txdone_cnt = 0; 2513 hn_txeof(txr); 2514 } 2515 2516 static __inline uint32_t 2517 hn_rndis_pktmsg_offset(uint32_t ofs) 2518 { 2519 2520 KASSERT(ofs >= sizeof(struct rndis_packet_msg), 2521 ("invalid RNDIS packet msg offset %u", ofs)); 2522 return (ofs - __offsetof(struct rndis_packet_msg, rm_dataoffset)); 2523 } 2524 2525 static __inline void * 2526 hn_rndis_pktinfo_append(struct rndis_packet_msg *pkt, size_t pktsize, 2527 size_t pi_dlen, uint32_t pi_type) 2528 { 2529 const size_t pi_size = HN_RNDIS_PKTINFO_SIZE(pi_dlen); 2530 struct rndis_pktinfo *pi; 2531 2532 KASSERT((pi_size & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK) == 0, 2533 ("unaligned pktinfo size %zu, pktinfo dlen %zu", pi_size, pi_dlen)); 2534 2535 /* 2536 * Per-packet-info does not move; it only grows. 2537 * 2538 * NOTE: 2539 * rm_pktinfooffset in this phase counts from the beginning 2540 * of rndis_packet_msg. 2541 */ 2542 KASSERT(pkt->rm_pktinfooffset + pkt->rm_pktinfolen + pi_size <= pktsize, 2543 ("%u pktinfo overflows RNDIS packet msg", pi_type)); 2544 pi = (struct rndis_pktinfo *)((uint8_t *)pkt + pkt->rm_pktinfooffset + 2545 pkt->rm_pktinfolen); 2546 pkt->rm_pktinfolen += pi_size; 2547 2548 pi->rm_size = pi_size; 2549 pi->rm_type = pi_type; 2550 pi->rm_pktinfooffset = RNDIS_PKTINFO_OFFSET; 2551 2552 return (pi->rm_data); 2553 } 2554 2555 static __inline int 2556 hn_flush_txagg(struct ifnet *ifp, struct hn_tx_ring *txr) 2557 { 2558 struct hn_txdesc *txd; 2559 struct mbuf *m; 2560 int error, pkts; 2561 2562 txd = txr->hn_agg_txd; 2563 KASSERT(txd != NULL, ("no aggregate txdesc")); 2564 2565 /* 2566 * Since hn_txpkt() will reset this temporary stat, save 2567 * it now, so that oerrors can be updated properly, if 2568 * hn_txpkt() ever fails. 2569 */ 2570 pkts = txr->hn_stat_pkts; 2571 2572 /* 2573 * Since txd's mbuf will _not_ be freed upon hn_txpkt() 2574 * failure, save it for later freeing, if hn_txpkt() ever 2575 * fails. 2576 */ 2577 m = txd->m; 2578 error = hn_txpkt(ifp, txr, txd); 2579 if (__predict_false(error)) { 2580 /* txd is freed, but m is not. */ 2581 m_freem(m); 2582 2583 txr->hn_flush_failed++; 2584 if_inc_counter(ifp, IFCOUNTER_OERRORS, pkts); 2585 } 2586 2587 /* Reset all aggregation states. */ 2588 txr->hn_agg_txd = NULL; 2589 txr->hn_agg_szleft = 0; 2590 txr->hn_agg_pktleft = 0; 2591 txr->hn_agg_prevpkt = NULL; 2592 2593 return (error); 2594 } 2595 2596 static void * 2597 hn_try_txagg(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd, 2598 int pktsize) 2599 { 2600 void *chim; 2601 2602 if (txr->hn_agg_txd != NULL) { 2603 if (txr->hn_agg_pktleft >= 1 && txr->hn_agg_szleft > pktsize) { 2604 struct hn_txdesc *agg_txd = txr->hn_agg_txd; 2605 struct rndis_packet_msg *pkt = txr->hn_agg_prevpkt; 2606 int olen; 2607 2608 /* 2609 * Update the previous RNDIS packet's total length, 2610 * it can be increased due to the mandatory alignment 2611 * padding for this RNDIS packet. And update the 2612 * aggregating txdesc's chimney sending buffer size 2613 * accordingly. 2614 * 2615 * XXX 2616 * Zero-out the padding, as required by the RNDIS spec. 2617 */ 2618 olen = pkt->rm_len; 2619 pkt->rm_len = roundup2(olen, txr->hn_agg_align); 2620 agg_txd->chim_size += pkt->rm_len - olen; 2621 2622 /* Link this txdesc to the parent. */ 2623 hn_txdesc_agg(agg_txd, txd); 2624 2625 chim = (uint8_t *)pkt + pkt->rm_len; 2626 /* Save the current packet for later fixup. */ 2627 txr->hn_agg_prevpkt = chim; 2628 2629 txr->hn_agg_pktleft--; 2630 txr->hn_agg_szleft -= pktsize; 2631 if (txr->hn_agg_szleft <= 2632 HN_PKTSIZE_MIN(txr->hn_agg_align)) { 2633 /* 2634 * Probably can't aggregate more packets, 2635 * flush this aggregating txdesc proactively. 2636 */ 2637 txr->hn_agg_pktleft = 0; 2638 } 2639 /* Done! */ 2640 return (chim); 2641 } 2642 hn_flush_txagg(ifp, txr); 2643 } 2644 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc")); 2645 2646 txr->hn_tx_chimney_tried++; 2647 txd->chim_index = hn_chim_alloc(txr->hn_sc); 2648 if (txd->chim_index == HN_NVS_CHIM_IDX_INVALID) 2649 return (NULL); 2650 txr->hn_tx_chimney++; 2651 2652 chim = txr->hn_sc->hn_chim + 2653 (txd->chim_index * txr->hn_sc->hn_chim_szmax); 2654 2655 if (txr->hn_agg_pktmax > 1 && 2656 txr->hn_agg_szmax > pktsize + HN_PKTSIZE_MIN(txr->hn_agg_align)) { 2657 txr->hn_agg_txd = txd; 2658 txr->hn_agg_pktleft = txr->hn_agg_pktmax - 1; 2659 txr->hn_agg_szleft = txr->hn_agg_szmax - pktsize; 2660 txr->hn_agg_prevpkt = chim; 2661 } 2662 return (chim); 2663 } 2664 2665 /* 2666 * NOTE: 2667 * If this function fails, then both txd and m_head0 will be freed. 2668 */ 2669 static int 2670 hn_encap(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd, 2671 struct mbuf **m_head0) 2672 { 2673 bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX]; 2674 int error, nsegs, i; 2675 struct mbuf *m_head = *m_head0; 2676 struct rndis_packet_msg *pkt; 2677 uint32_t *pi_data; 2678 void *chim = NULL; 2679 int pkt_hlen, pkt_size; 2680 2681 pkt = txd->rndis_pkt; 2682 pkt_size = HN_PKTSIZE(m_head, txr->hn_agg_align); 2683 if (pkt_size < txr->hn_chim_size) { 2684 chim = hn_try_txagg(ifp, txr, txd, pkt_size); 2685 if (chim != NULL) 2686 pkt = chim; 2687 } else { 2688 if (txr->hn_agg_txd != NULL) 2689 hn_flush_txagg(ifp, txr); 2690 } 2691 2692 pkt->rm_type = REMOTE_NDIS_PACKET_MSG; 2693 pkt->rm_len = m_head->m_pkthdr.len; 2694 pkt->rm_dataoffset = 0; 2695 pkt->rm_datalen = m_head->m_pkthdr.len; 2696 pkt->rm_oobdataoffset = 0; 2697 pkt->rm_oobdatalen = 0; 2698 pkt->rm_oobdataelements = 0; 2699 pkt->rm_pktinfooffset = sizeof(*pkt); 2700 pkt->rm_pktinfolen = 0; 2701 pkt->rm_vchandle = 0; 2702 pkt->rm_reserved = 0; 2703 2704 if (txr->hn_tx_flags & HN_TX_FLAG_HASHVAL) { 2705 /* 2706 * Set the hash value for this packet, so that the host could 2707 * dispatch the TX done event for this packet back to this TX 2708 * ring's channel. 2709 */ 2710 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 2711 HN_NDIS_HASH_VALUE_SIZE, HN_NDIS_PKTINFO_TYPE_HASHVAL); 2712 *pi_data = txr->hn_tx_idx; 2713 } 2714 2715 if (m_head->m_flags & M_VLANTAG) { 2716 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 2717 NDIS_VLAN_INFO_SIZE, NDIS_PKTINFO_TYPE_VLAN); 2718 *pi_data = NDIS_VLAN_INFO_MAKE( 2719 EVL_VLANOFTAG(m_head->m_pkthdr.ether_vtag), 2720 EVL_PRIOFTAG(m_head->m_pkthdr.ether_vtag), 2721 EVL_CFIOFTAG(m_head->m_pkthdr.ether_vtag)); 2722 } 2723 2724 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 2725 #if defined(INET6) || defined(INET) 2726 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 2727 NDIS_LSO2_INFO_SIZE, NDIS_PKTINFO_TYPE_LSO); 2728 #ifdef INET 2729 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) { 2730 *pi_data = NDIS_LSO2_INFO_MAKEIPV4(0, 2731 m_head->m_pkthdr.tso_segsz); 2732 } 2733 #endif 2734 #if defined(INET6) && defined(INET) 2735 else 2736 #endif 2737 #ifdef INET6 2738 { 2739 *pi_data = NDIS_LSO2_INFO_MAKEIPV6(0, 2740 m_head->m_pkthdr.tso_segsz); 2741 } 2742 #endif 2743 #endif /* INET6 || INET */ 2744 } else if (m_head->m_pkthdr.csum_flags & txr->hn_csum_assist) { 2745 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 2746 NDIS_TXCSUM_INFO_SIZE, NDIS_PKTINFO_TYPE_CSUM); 2747 if (m_head->m_pkthdr.csum_flags & 2748 (CSUM_IP6_TCP | CSUM_IP6_UDP)) { 2749 *pi_data = NDIS_TXCSUM_INFO_IPV6; 2750 } else { 2751 *pi_data = NDIS_TXCSUM_INFO_IPV4; 2752 if (m_head->m_pkthdr.csum_flags & CSUM_IP) 2753 *pi_data |= NDIS_TXCSUM_INFO_IPCS; 2754 } 2755 2756 if (m_head->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP)) 2757 *pi_data |= NDIS_TXCSUM_INFO_TCPCS; 2758 else if (m_head->m_pkthdr.csum_flags & 2759 (CSUM_IP_UDP | CSUM_IP6_UDP)) 2760 *pi_data |= NDIS_TXCSUM_INFO_UDPCS; 2761 } 2762 2763 pkt_hlen = pkt->rm_pktinfooffset + pkt->rm_pktinfolen; 2764 /* Fixup RNDIS packet message total length */ 2765 pkt->rm_len += pkt_hlen; 2766 /* Convert RNDIS packet message offsets */ 2767 pkt->rm_dataoffset = hn_rndis_pktmsg_offset(pkt_hlen); 2768 pkt->rm_pktinfooffset = hn_rndis_pktmsg_offset(pkt->rm_pktinfooffset); 2769 2770 /* 2771 * Fast path: Chimney sending. 2772 */ 2773 if (chim != NULL) { 2774 struct hn_txdesc *tgt_txd = txd; 2775 2776 if (txr->hn_agg_txd != NULL) { 2777 tgt_txd = txr->hn_agg_txd; 2778 #ifdef INVARIANTS 2779 *m_head0 = NULL; 2780 #endif 2781 } 2782 2783 KASSERT(pkt == chim, 2784 ("RNDIS pkt not in chimney sending buffer")); 2785 KASSERT(tgt_txd->chim_index != HN_NVS_CHIM_IDX_INVALID, 2786 ("chimney sending buffer is not used")); 2787 tgt_txd->chim_size += pkt->rm_len; 2788 2789 m_copydata(m_head, 0, m_head->m_pkthdr.len, 2790 ((uint8_t *)chim) + pkt_hlen); 2791 2792 txr->hn_gpa_cnt = 0; 2793 txr->hn_sendpkt = hn_txpkt_chim; 2794 goto done; 2795 } 2796 2797 KASSERT(txr->hn_agg_txd == NULL, ("aggregating sglist txdesc")); 2798 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID, 2799 ("chimney buffer is used")); 2800 KASSERT(pkt == txd->rndis_pkt, ("RNDIS pkt not in txdesc")); 2801 2802 error = hn_txdesc_dmamap_load(txr, txd, &m_head, segs, &nsegs); 2803 if (__predict_false(error)) { 2804 int freed; 2805 2806 /* 2807 * This mbuf is not linked w/ the txd yet, so free it now. 2808 */ 2809 m_freem(m_head); 2810 *m_head0 = NULL; 2811 2812 freed = hn_txdesc_put(txr, txd); 2813 KASSERT(freed != 0, 2814 ("fail to free txd upon txdma error")); 2815 2816 txr->hn_txdma_failed++; 2817 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2818 return error; 2819 } 2820 *m_head0 = m_head; 2821 2822 /* +1 RNDIS packet message */ 2823 txr->hn_gpa_cnt = nsegs + 1; 2824 2825 /* send packet with page buffer */ 2826 txr->hn_gpa[0].gpa_page = atop(txd->rndis_pkt_paddr); 2827 txr->hn_gpa[0].gpa_ofs = txd->rndis_pkt_paddr & PAGE_MASK; 2828 txr->hn_gpa[0].gpa_len = pkt_hlen; 2829 2830 /* 2831 * Fill the page buffers with mbuf info after the page 2832 * buffer for RNDIS packet message. 2833 */ 2834 for (i = 0; i < nsegs; ++i) { 2835 struct vmbus_gpa *gpa = &txr->hn_gpa[i + 1]; 2836 2837 gpa->gpa_page = atop(segs[i].ds_addr); 2838 gpa->gpa_ofs = segs[i].ds_addr & PAGE_MASK; 2839 gpa->gpa_len = segs[i].ds_len; 2840 } 2841 2842 txd->chim_index = HN_NVS_CHIM_IDX_INVALID; 2843 txd->chim_size = 0; 2844 txr->hn_sendpkt = hn_txpkt_sglist; 2845 done: 2846 txd->m = m_head; 2847 2848 /* Set the completion routine */ 2849 hn_nvs_sendctx_init(&txd->send_ctx, hn_txpkt_done, txd); 2850 2851 /* Update temporary stats for later use. */ 2852 txr->hn_stat_pkts++; 2853 txr->hn_stat_size += m_head->m_pkthdr.len; 2854 if (m_head->m_flags & M_MCAST) 2855 txr->hn_stat_mcasts++; 2856 2857 return 0; 2858 } 2859 2860 /* 2861 * NOTE: 2862 * If this function fails, then txd will be freed, but the mbuf 2863 * associated w/ the txd will _not_ be freed. 2864 */ 2865 static int 2866 hn_txpkt(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd) 2867 { 2868 int error, send_failed = 0, has_bpf; 2869 2870 again: 2871 has_bpf = bpf_peers_present(ifp->if_bpf); 2872 if (has_bpf) { 2873 /* 2874 * Make sure that this txd and any aggregated txds are not 2875 * freed before ETHER_BPF_MTAP. 2876 */ 2877 hn_txdesc_hold(txd); 2878 } 2879 error = txr->hn_sendpkt(txr, txd); 2880 if (!error) { 2881 if (has_bpf) { 2882 const struct hn_txdesc *tmp_txd; 2883 2884 ETHER_BPF_MTAP(ifp, txd->m); 2885 STAILQ_FOREACH(tmp_txd, &txd->agg_list, agg_link) 2886 ETHER_BPF_MTAP(ifp, tmp_txd->m); 2887 } 2888 2889 if_inc_counter(ifp, IFCOUNTER_OPACKETS, txr->hn_stat_pkts); 2890 #ifdef HN_IFSTART_SUPPORT 2891 if (!hn_use_if_start) 2892 #endif 2893 { 2894 if_inc_counter(ifp, IFCOUNTER_OBYTES, 2895 txr->hn_stat_size); 2896 if (txr->hn_stat_mcasts != 0) { 2897 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 2898 txr->hn_stat_mcasts); 2899 } 2900 } 2901 txr->hn_pkts += txr->hn_stat_pkts; 2902 txr->hn_sends++; 2903 } 2904 if (has_bpf) 2905 hn_txdesc_put(txr, txd); 2906 2907 if (__predict_false(error)) { 2908 int freed; 2909 2910 /* 2911 * This should "really rarely" happen. 2912 * 2913 * XXX Too many RX to be acked or too many sideband 2914 * commands to run? Ask netvsc_channel_rollup() 2915 * to kick start later. 2916 */ 2917 txr->hn_has_txeof = 1; 2918 if (!send_failed) { 2919 txr->hn_send_failed++; 2920 send_failed = 1; 2921 /* 2922 * Try sending again after set hn_has_txeof; 2923 * in case that we missed the last 2924 * netvsc_channel_rollup(). 2925 */ 2926 goto again; 2927 } 2928 if_printf(ifp, "send failed\n"); 2929 2930 /* 2931 * Caller will perform further processing on the 2932 * associated mbuf, so don't free it in hn_txdesc_put(); 2933 * only unload it from the DMA map in hn_txdesc_put(), 2934 * if it was loaded. 2935 */ 2936 txd->m = NULL; 2937 freed = hn_txdesc_put(txr, txd); 2938 KASSERT(freed != 0, 2939 ("fail to free txd upon send error")); 2940 2941 txr->hn_send_failed++; 2942 } 2943 2944 /* Reset temporary stats, after this sending is done. */ 2945 txr->hn_stat_size = 0; 2946 txr->hn_stat_pkts = 0; 2947 txr->hn_stat_mcasts = 0; 2948 2949 return (error); 2950 } 2951 2952 /* 2953 * Append the specified data to the indicated mbuf chain, 2954 * Extend the mbuf chain if the new data does not fit in 2955 * existing space. 2956 * 2957 * This is a minor rewrite of m_append() from sys/kern/uipc_mbuf.c. 2958 * There should be an equivalent in the kernel mbuf code, 2959 * but there does not appear to be one yet. 2960 * 2961 * Differs from m_append() in that additional mbufs are 2962 * allocated with cluster size MJUMPAGESIZE, and filled 2963 * accordingly. 2964 * 2965 * Return 1 if able to complete the job; otherwise 0. 2966 */ 2967 static int 2968 hv_m_append(struct mbuf *m0, int len, c_caddr_t cp) 2969 { 2970 struct mbuf *m, *n; 2971 int remainder, space; 2972 2973 for (m = m0; m->m_next != NULL; m = m->m_next) 2974 ; 2975 remainder = len; 2976 space = M_TRAILINGSPACE(m); 2977 if (space > 0) { 2978 /* 2979 * Copy into available space. 2980 */ 2981 if (space > remainder) 2982 space = remainder; 2983 bcopy(cp, mtod(m, caddr_t) + m->m_len, space); 2984 m->m_len += space; 2985 cp += space; 2986 remainder -= space; 2987 } 2988 while (remainder > 0) { 2989 /* 2990 * Allocate a new mbuf; could check space 2991 * and allocate a cluster instead. 2992 */ 2993 n = m_getjcl(M_NOWAIT, m->m_type, 0, MJUMPAGESIZE); 2994 if (n == NULL) 2995 break; 2996 n->m_len = min(MJUMPAGESIZE, remainder); 2997 bcopy(cp, mtod(n, caddr_t), n->m_len); 2998 cp += n->m_len; 2999 remainder -= n->m_len; 3000 m->m_next = n; 3001 m = n; 3002 } 3003 if (m0->m_flags & M_PKTHDR) 3004 m0->m_pkthdr.len += len - remainder; 3005 3006 return (remainder == 0); 3007 } 3008 3009 #if defined(INET) || defined(INET6) 3010 static __inline int 3011 hn_lro_rx(struct lro_ctrl *lc, struct mbuf *m) 3012 { 3013 #if __FreeBSD_version >= 1100095 3014 if (hn_lro_mbufq_depth) { 3015 tcp_lro_queue_mbuf(lc, m); 3016 return 0; 3017 } 3018 #endif 3019 return tcp_lro_rx(lc, m, 0); 3020 } 3021 #endif 3022 3023 static int 3024 hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen, 3025 const struct hn_rxinfo *info) 3026 { 3027 struct ifnet *ifp, *hn_ifp = rxr->hn_ifp; 3028 struct mbuf *m_new; 3029 int size, do_lro = 0, do_csum = 1; 3030 int hash_type; 3031 3032 /* 3033 * If the non-transparent mode VF is active, inject this packet 3034 * into the VF. 3035 */ 3036 ifp = rxr->hn_rxvf_ifp ? rxr->hn_rxvf_ifp : hn_ifp; 3037 3038 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 3039 /* 3040 * NOTE: 3041 * See the NOTE of hn_rndis_init_fixat(). This 3042 * function can be reached, immediately after the 3043 * RNDIS is initialized but before the ifnet is 3044 * setup on the hn_attach() path; drop the unexpected 3045 * packets. 3046 */ 3047 return (0); 3048 } 3049 3050 if (__predict_false(dlen < ETHER_HDR_LEN)) { 3051 if_inc_counter(hn_ifp, IFCOUNTER_IERRORS, 1); 3052 return (0); 3053 } 3054 3055 if (dlen <= MHLEN) { 3056 m_new = m_gethdr(M_NOWAIT, MT_DATA); 3057 if (m_new == NULL) { 3058 if_inc_counter(hn_ifp, IFCOUNTER_IQDROPS, 1); 3059 return (0); 3060 } 3061 memcpy(mtod(m_new, void *), data, dlen); 3062 m_new->m_pkthdr.len = m_new->m_len = dlen; 3063 rxr->hn_small_pkts++; 3064 } else { 3065 /* 3066 * Get an mbuf with a cluster. For packets 2K or less, 3067 * get a standard 2K cluster. For anything larger, get a 3068 * 4K cluster. Any buffers larger than 4K can cause problems 3069 * if looped around to the Hyper-V TX channel, so avoid them. 3070 */ 3071 size = MCLBYTES; 3072 if (dlen > MCLBYTES) { 3073 /* 4096 */ 3074 size = MJUMPAGESIZE; 3075 } 3076 3077 m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size); 3078 if (m_new == NULL) { 3079 if_inc_counter(hn_ifp, IFCOUNTER_IQDROPS, 1); 3080 return (0); 3081 } 3082 3083 hv_m_append(m_new, dlen, data); 3084 } 3085 m_new->m_pkthdr.rcvif = ifp; 3086 3087 if (__predict_false((hn_ifp->if_capenable & IFCAP_RXCSUM) == 0)) 3088 do_csum = 0; 3089 3090 /* receive side checksum offload */ 3091 if (info->csum_info != HN_NDIS_RXCSUM_INFO_INVALID) { 3092 /* IP csum offload */ 3093 if ((info->csum_info & NDIS_RXCSUM_INFO_IPCS_OK) && do_csum) { 3094 m_new->m_pkthdr.csum_flags |= 3095 (CSUM_IP_CHECKED | CSUM_IP_VALID); 3096 rxr->hn_csum_ip++; 3097 } 3098 3099 /* TCP/UDP csum offload */ 3100 if ((info->csum_info & (NDIS_RXCSUM_INFO_UDPCS_OK | 3101 NDIS_RXCSUM_INFO_TCPCS_OK)) && do_csum) { 3102 m_new->m_pkthdr.csum_flags |= 3103 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 3104 m_new->m_pkthdr.csum_data = 0xffff; 3105 if (info->csum_info & NDIS_RXCSUM_INFO_TCPCS_OK) 3106 rxr->hn_csum_tcp++; 3107 else 3108 rxr->hn_csum_udp++; 3109 } 3110 3111 /* 3112 * XXX 3113 * As of this write (Oct 28th, 2016), host side will turn 3114 * on only TCPCS_OK and IPCS_OK even for UDP datagrams, so 3115 * the do_lro setting here is actually _not_ accurate. We 3116 * depend on the RSS hash type check to reset do_lro. 3117 */ 3118 if ((info->csum_info & 3119 (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) == 3120 (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) 3121 do_lro = 1; 3122 } else { 3123 const struct ether_header *eh; 3124 uint16_t etype; 3125 int hoff; 3126 3127 hoff = sizeof(*eh); 3128 /* Checked at the beginning of this function. */ 3129 KASSERT(m_new->m_len >= hoff, ("not ethernet frame")); 3130 3131 eh = mtod(m_new, struct ether_header *); 3132 etype = ntohs(eh->ether_type); 3133 if (etype == ETHERTYPE_VLAN) { 3134 const struct ether_vlan_header *evl; 3135 3136 hoff = sizeof(*evl); 3137 if (m_new->m_len < hoff) 3138 goto skip; 3139 evl = mtod(m_new, struct ether_vlan_header *); 3140 etype = ntohs(evl->evl_proto); 3141 } 3142 3143 if (etype == ETHERTYPE_IP) { 3144 int pr; 3145 3146 pr = hn_check_iplen(m_new, hoff); 3147 if (pr == IPPROTO_TCP) { 3148 if (do_csum && 3149 (rxr->hn_trust_hcsum & 3150 HN_TRUST_HCSUM_TCP)) { 3151 rxr->hn_csum_trusted++; 3152 m_new->m_pkthdr.csum_flags |= 3153 (CSUM_IP_CHECKED | CSUM_IP_VALID | 3154 CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 3155 m_new->m_pkthdr.csum_data = 0xffff; 3156 } 3157 do_lro = 1; 3158 } else if (pr == IPPROTO_UDP) { 3159 if (do_csum && 3160 (rxr->hn_trust_hcsum & 3161 HN_TRUST_HCSUM_UDP)) { 3162 rxr->hn_csum_trusted++; 3163 m_new->m_pkthdr.csum_flags |= 3164 (CSUM_IP_CHECKED | CSUM_IP_VALID | 3165 CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 3166 m_new->m_pkthdr.csum_data = 0xffff; 3167 } 3168 } else if (pr != IPPROTO_DONE && do_csum && 3169 (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_IP)) { 3170 rxr->hn_csum_trusted++; 3171 m_new->m_pkthdr.csum_flags |= 3172 (CSUM_IP_CHECKED | CSUM_IP_VALID); 3173 } 3174 } 3175 } 3176 skip: 3177 if (info->vlan_info != HN_NDIS_VLAN_INFO_INVALID) { 3178 m_new->m_pkthdr.ether_vtag = EVL_MAKETAG( 3179 NDIS_VLAN_INFO_ID(info->vlan_info), 3180 NDIS_VLAN_INFO_PRI(info->vlan_info), 3181 NDIS_VLAN_INFO_CFI(info->vlan_info)); 3182 m_new->m_flags |= M_VLANTAG; 3183 } 3184 3185 /* 3186 * If VF is activated (tranparent/non-transparent mode does not 3187 * matter here). 3188 * 3189 * - Don't setup mbuf hash, if 'options RSS' is set. 3190 * 3191 * In Azure, when VF is activated, TCP SYN and SYN|ACK go 3192 * through hn(4) while the rest of segments and ACKs belonging 3193 * to the same TCP 4-tuple go through the VF. So don't setup 3194 * mbuf hash, if a VF is activated and 'options RSS' is not 3195 * enabled. hn(4) and the VF may use neither the same RSS 3196 * hash key nor the same RSS hash function, so the hash value 3197 * for packets belonging to the same flow could be different! 3198 * 3199 * - Disable LRO 3200 * 3201 * hn(4) will only receive broadcast packets, multicast packets, 3202 * TCP SYN and SYN|ACK (in Azure), LRO is useless for these 3203 * packet types. 3204 * 3205 * For non-transparent, we definitely _cannot_ enable LRO at 3206 * all, since the LRO flush will use hn(4) as the receiving 3207 * interface; i.e. hn_ifp->if_input(hn_ifp, m). 3208 */ 3209 if (hn_ifp != ifp || (rxr->hn_rx_flags & HN_RX_FLAG_XPNT_VF)) { 3210 do_lro = 0; /* disable LRO. */ 3211 #ifndef RSS 3212 goto skip_hash; /* skip mbuf hash setup */ 3213 #endif 3214 } 3215 3216 if (info->hash_info != HN_NDIS_HASH_INFO_INVALID) { 3217 rxr->hn_rss_pkts++; 3218 m_new->m_pkthdr.flowid = info->hash_value; 3219 hash_type = M_HASHTYPE_OPAQUE_HASH; 3220 if ((info->hash_info & NDIS_HASH_FUNCTION_MASK) == 3221 NDIS_HASH_FUNCTION_TOEPLITZ) { 3222 uint32_t type = (info->hash_info & NDIS_HASH_TYPE_MASK); 3223 3224 /* 3225 * NOTE: 3226 * do_lro is resetted, if the hash types are not TCP 3227 * related. See the comment in the above csum_flags 3228 * setup section. 3229 */ 3230 switch (type) { 3231 case NDIS_HASH_IPV4: 3232 hash_type = M_HASHTYPE_RSS_IPV4; 3233 do_lro = 0; 3234 break; 3235 3236 case NDIS_HASH_TCP_IPV4: 3237 hash_type = M_HASHTYPE_RSS_TCP_IPV4; 3238 break; 3239 3240 case NDIS_HASH_IPV6: 3241 hash_type = M_HASHTYPE_RSS_IPV6; 3242 do_lro = 0; 3243 break; 3244 3245 case NDIS_HASH_IPV6_EX: 3246 hash_type = M_HASHTYPE_RSS_IPV6_EX; 3247 do_lro = 0; 3248 break; 3249 3250 case NDIS_HASH_TCP_IPV6: 3251 hash_type = M_HASHTYPE_RSS_TCP_IPV6; 3252 break; 3253 3254 case NDIS_HASH_TCP_IPV6_EX: 3255 hash_type = M_HASHTYPE_RSS_TCP_IPV6_EX; 3256 break; 3257 } 3258 } 3259 } else { 3260 m_new->m_pkthdr.flowid = rxr->hn_rx_idx; 3261 hash_type = M_HASHTYPE_OPAQUE; 3262 } 3263 M_HASHTYPE_SET(m_new, hash_type); 3264 3265 #ifndef RSS 3266 skip_hash: 3267 #endif 3268 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); 3269 if (hn_ifp != ifp) { 3270 const struct ether_header *eh; 3271 3272 /* 3273 * Non-transparent mode VF is activated. 3274 */ 3275 3276 /* 3277 * Allow tapping on hn(4). 3278 */ 3279 ETHER_BPF_MTAP(hn_ifp, m_new); 3280 3281 /* 3282 * Update hn(4)'s stats. 3283 */ 3284 if_inc_counter(hn_ifp, IFCOUNTER_IPACKETS, 1); 3285 if_inc_counter(hn_ifp, IFCOUNTER_IBYTES, m_new->m_pkthdr.len); 3286 /* Checked at the beginning of this function. */ 3287 KASSERT(m_new->m_len >= ETHER_HDR_LEN, ("not ethernet frame")); 3288 eh = mtod(m_new, struct ether_header *); 3289 if (ETHER_IS_MULTICAST(eh->ether_dhost)) 3290 if_inc_counter(hn_ifp, IFCOUNTER_IMCASTS, 1); 3291 } 3292 rxr->hn_pkts++; 3293 3294 if ((hn_ifp->if_capenable & IFCAP_LRO) && do_lro) { 3295 #if defined(INET) || defined(INET6) 3296 struct lro_ctrl *lro = &rxr->hn_lro; 3297 3298 if (lro->lro_cnt) { 3299 rxr->hn_lro_tried++; 3300 if (hn_lro_rx(lro, m_new) == 0) { 3301 /* DONE! */ 3302 return 0; 3303 } 3304 } 3305 #endif 3306 } 3307 ifp->if_input(ifp, m_new); 3308 3309 return (0); 3310 } 3311 3312 static int 3313 hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 3314 { 3315 struct hn_softc *sc = ifp->if_softc; 3316 struct ifreq *ifr = (struct ifreq *)data, ifr_vf; 3317 struct ifnet *vf_ifp; 3318 int mask, error = 0; 3319 struct ifrsskey *ifrk; 3320 struct ifrsshash *ifrh; 3321 3322 switch (cmd) { 3323 case SIOCSIFMTU: 3324 if (ifr->ifr_mtu > HN_MTU_MAX) { 3325 error = EINVAL; 3326 break; 3327 } 3328 3329 HN_LOCK(sc); 3330 3331 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) { 3332 HN_UNLOCK(sc); 3333 break; 3334 } 3335 3336 if ((sc->hn_caps & HN_CAP_MTU) == 0) { 3337 /* Can't change MTU */ 3338 HN_UNLOCK(sc); 3339 error = EOPNOTSUPP; 3340 break; 3341 } 3342 3343 if (ifp->if_mtu == ifr->ifr_mtu) { 3344 HN_UNLOCK(sc); 3345 break; 3346 } 3347 3348 if (hn_xpnt_vf_isready(sc)) { 3349 vf_ifp = sc->hn_vf_ifp; 3350 ifr_vf = *ifr; 3351 strlcpy(ifr_vf.ifr_name, vf_ifp->if_xname, 3352 sizeof(ifr_vf.ifr_name)); 3353 error = vf_ifp->if_ioctl(vf_ifp, SIOCSIFMTU, 3354 (caddr_t)&ifr_vf); 3355 if (error) { 3356 HN_UNLOCK(sc); 3357 if_printf(ifp, "%s SIOCSIFMTU %d failed: %d\n", 3358 vf_ifp->if_xname, ifr->ifr_mtu, error); 3359 break; 3360 } 3361 } 3362 3363 /* 3364 * Suspend this interface before the synthetic parts 3365 * are ripped. 3366 */ 3367 hn_suspend(sc); 3368 3369 /* 3370 * Detach the synthetics parts, i.e. NVS and RNDIS. 3371 */ 3372 hn_synth_detach(sc); 3373 3374 /* 3375 * Reattach the synthetic parts, i.e. NVS and RNDIS, 3376 * with the new MTU setting. 3377 */ 3378 error = hn_synth_attach(sc, ifr->ifr_mtu); 3379 if (error) { 3380 HN_UNLOCK(sc); 3381 break; 3382 } 3383 3384 /* 3385 * Commit the requested MTU, after the synthetic parts 3386 * have been successfully attached. 3387 */ 3388 ifp->if_mtu = ifr->ifr_mtu; 3389 3390 /* 3391 * Synthetic parts' reattach may change the chimney 3392 * sending size; update it. 3393 */ 3394 if (sc->hn_tx_ring[0].hn_chim_size > sc->hn_chim_szmax) 3395 hn_set_chim_size(sc, sc->hn_chim_szmax); 3396 3397 /* 3398 * Make sure that various parameters based on MTU are 3399 * still valid, after the MTU change. 3400 */ 3401 hn_mtu_change_fixup(sc); 3402 3403 /* 3404 * All done! Resume the interface now. 3405 */ 3406 hn_resume(sc); 3407 3408 if ((sc->hn_flags & HN_FLAG_RXVF) || 3409 (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) { 3410 /* 3411 * Since we have reattached the NVS part, 3412 * change the datapath to VF again; in case 3413 * that it is lost, after the NVS was detached. 3414 */ 3415 hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_VF); 3416 } 3417 3418 HN_UNLOCK(sc); 3419 break; 3420 3421 case SIOCSIFFLAGS: 3422 HN_LOCK(sc); 3423 3424 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) { 3425 HN_UNLOCK(sc); 3426 break; 3427 } 3428 3429 if (hn_xpnt_vf_isready(sc)) 3430 hn_xpnt_vf_saveifflags(sc); 3431 3432 if (ifp->if_flags & IFF_UP) { 3433 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 3434 /* 3435 * Caller meight hold mutex, e.g. 3436 * bpf; use busy-wait for the RNDIS 3437 * reply. 3438 */ 3439 HN_NO_SLEEPING(sc); 3440 hn_rxfilter_config(sc); 3441 HN_SLEEPING_OK(sc); 3442 3443 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) 3444 error = hn_xpnt_vf_iocsetflags(sc); 3445 } else { 3446 hn_init_locked(sc); 3447 } 3448 } else { 3449 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 3450 hn_stop(sc, false); 3451 } 3452 sc->hn_if_flags = ifp->if_flags; 3453 3454 HN_UNLOCK(sc); 3455 break; 3456 3457 case SIOCSIFCAP: 3458 HN_LOCK(sc); 3459 3460 if (hn_xpnt_vf_isready(sc)) { 3461 ifr_vf = *ifr; 3462 strlcpy(ifr_vf.ifr_name, sc->hn_vf_ifp->if_xname, 3463 sizeof(ifr_vf.ifr_name)); 3464 error = hn_xpnt_vf_iocsetcaps(sc, &ifr_vf); 3465 HN_UNLOCK(sc); 3466 break; 3467 } 3468 3469 /* 3470 * Fix up requested capabilities w/ supported capabilities, 3471 * since the supported capabilities could have been changed. 3472 */ 3473 mask = (ifr->ifr_reqcap & ifp->if_capabilities) ^ 3474 ifp->if_capenable; 3475 3476 if (mask & IFCAP_TXCSUM) { 3477 ifp->if_capenable ^= IFCAP_TXCSUM; 3478 if (ifp->if_capenable & IFCAP_TXCSUM) 3479 ifp->if_hwassist |= HN_CSUM_IP_HWASSIST(sc); 3480 else 3481 ifp->if_hwassist &= ~HN_CSUM_IP_HWASSIST(sc); 3482 } 3483 if (mask & IFCAP_TXCSUM_IPV6) { 3484 ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; 3485 if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) 3486 ifp->if_hwassist |= HN_CSUM_IP6_HWASSIST(sc); 3487 else 3488 ifp->if_hwassist &= ~HN_CSUM_IP6_HWASSIST(sc); 3489 } 3490 3491 /* TODO: flip RNDIS offload parameters for RXCSUM. */ 3492 if (mask & IFCAP_RXCSUM) 3493 ifp->if_capenable ^= IFCAP_RXCSUM; 3494 #ifdef foo 3495 /* We can't diff IPv6 packets from IPv4 packets on RX path. */ 3496 if (mask & IFCAP_RXCSUM_IPV6) 3497 ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; 3498 #endif 3499 3500 if (mask & IFCAP_LRO) 3501 ifp->if_capenable ^= IFCAP_LRO; 3502 3503 if (mask & IFCAP_TSO4) { 3504 ifp->if_capenable ^= IFCAP_TSO4; 3505 if (ifp->if_capenable & IFCAP_TSO4) 3506 ifp->if_hwassist |= CSUM_IP_TSO; 3507 else 3508 ifp->if_hwassist &= ~CSUM_IP_TSO; 3509 } 3510 if (mask & IFCAP_TSO6) { 3511 ifp->if_capenable ^= IFCAP_TSO6; 3512 if (ifp->if_capenable & IFCAP_TSO6) 3513 ifp->if_hwassist |= CSUM_IP6_TSO; 3514 else 3515 ifp->if_hwassist &= ~CSUM_IP6_TSO; 3516 } 3517 3518 HN_UNLOCK(sc); 3519 break; 3520 3521 case SIOCADDMULTI: 3522 case SIOCDELMULTI: 3523 HN_LOCK(sc); 3524 3525 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) { 3526 HN_UNLOCK(sc); 3527 break; 3528 } 3529 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 3530 /* 3531 * Multicast uses mutex; use busy-wait for 3532 * the RNDIS reply. 3533 */ 3534 HN_NO_SLEEPING(sc); 3535 hn_rxfilter_config(sc); 3536 HN_SLEEPING_OK(sc); 3537 } 3538 3539 /* XXX vlan(4) style mcast addr maintenance */ 3540 if (hn_xpnt_vf_isready(sc)) { 3541 int old_if_flags; 3542 3543 old_if_flags = sc->hn_vf_ifp->if_flags; 3544 hn_xpnt_vf_saveifflags(sc); 3545 3546 if ((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) && 3547 ((old_if_flags ^ sc->hn_vf_ifp->if_flags) & 3548 IFF_ALLMULTI)) 3549 error = hn_xpnt_vf_iocsetflags(sc); 3550 } 3551 3552 HN_UNLOCK(sc); 3553 break; 3554 3555 case SIOCSIFMEDIA: 3556 case SIOCGIFMEDIA: 3557 HN_LOCK(sc); 3558 if (hn_xpnt_vf_isready(sc)) { 3559 /* 3560 * SIOCGIFMEDIA expects ifmediareq, so don't 3561 * create and pass ifr_vf to the VF here; just 3562 * replace the ifr_name. 3563 */ 3564 vf_ifp = sc->hn_vf_ifp; 3565 strlcpy(ifr->ifr_name, vf_ifp->if_xname, 3566 sizeof(ifr->ifr_name)); 3567 error = vf_ifp->if_ioctl(vf_ifp, cmd, data); 3568 /* Restore the ifr_name. */ 3569 strlcpy(ifr->ifr_name, ifp->if_xname, 3570 sizeof(ifr->ifr_name)); 3571 HN_UNLOCK(sc); 3572 break; 3573 } 3574 HN_UNLOCK(sc); 3575 error = ifmedia_ioctl(ifp, ifr, &sc->hn_media, cmd); 3576 break; 3577 3578 case SIOCGIFRSSHASH: 3579 ifrh = (struct ifrsshash *)data; 3580 HN_LOCK(sc); 3581 if (sc->hn_rx_ring_inuse == 1) { 3582 HN_UNLOCK(sc); 3583 ifrh->ifrh_func = RSS_FUNC_NONE; 3584 ifrh->ifrh_types = 0; 3585 break; 3586 } 3587 3588 if (sc->hn_rss_hash & NDIS_HASH_FUNCTION_TOEPLITZ) 3589 ifrh->ifrh_func = RSS_FUNC_TOEPLITZ; 3590 else 3591 ifrh->ifrh_func = RSS_FUNC_PRIVATE; 3592 3593 ifrh->ifrh_types = 0; 3594 if (sc->hn_rss_hash & NDIS_HASH_IPV4) 3595 ifrh->ifrh_types |= RSS_TYPE_IPV4; 3596 if (sc->hn_rss_hash & NDIS_HASH_TCP_IPV4) 3597 ifrh->ifrh_types |= RSS_TYPE_TCP_IPV4; 3598 if (sc->hn_rss_hash & NDIS_HASH_IPV6) 3599 ifrh->ifrh_types |= RSS_TYPE_IPV6; 3600 if (sc->hn_rss_hash & NDIS_HASH_IPV6_EX) 3601 ifrh->ifrh_types |= RSS_TYPE_IPV6_EX; 3602 if (sc->hn_rss_hash & NDIS_HASH_TCP_IPV6) 3603 ifrh->ifrh_types |= RSS_TYPE_TCP_IPV6; 3604 if (sc->hn_rss_hash & NDIS_HASH_TCP_IPV6_EX) 3605 ifrh->ifrh_types |= RSS_TYPE_TCP_IPV6_EX; 3606 HN_UNLOCK(sc); 3607 break; 3608 3609 case SIOCGIFRSSKEY: 3610 ifrk = (struct ifrsskey *)data; 3611 HN_LOCK(sc); 3612 if (sc->hn_rx_ring_inuse == 1) { 3613 HN_UNLOCK(sc); 3614 ifrk->ifrk_func = RSS_FUNC_NONE; 3615 ifrk->ifrk_keylen = 0; 3616 break; 3617 } 3618 if (sc->hn_rss_hash & NDIS_HASH_FUNCTION_TOEPLITZ) 3619 ifrk->ifrk_func = RSS_FUNC_TOEPLITZ; 3620 else 3621 ifrk->ifrk_func = RSS_FUNC_PRIVATE; 3622 ifrk->ifrk_keylen = NDIS_HASH_KEYSIZE_TOEPLITZ; 3623 memcpy(ifrk->ifrk_key, sc->hn_rss.rss_key, 3624 NDIS_HASH_KEYSIZE_TOEPLITZ); 3625 HN_UNLOCK(sc); 3626 break; 3627 3628 default: 3629 error = ether_ioctl(ifp, cmd, data); 3630 break; 3631 } 3632 return (error); 3633 } 3634 3635 static void 3636 hn_stop(struct hn_softc *sc, bool detaching) 3637 { 3638 struct ifnet *ifp = sc->hn_ifp; 3639 int i; 3640 3641 HN_LOCK_ASSERT(sc); 3642 3643 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, 3644 ("synthetic parts were not attached")); 3645 3646 /* Clear RUNNING bit ASAP. */ 3647 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_RUNNING); 3648 3649 /* Disable polling. */ 3650 hn_polling(sc, 0); 3651 3652 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) { 3653 KASSERT(sc->hn_vf_ifp != NULL, 3654 ("%s: VF is not attached", ifp->if_xname)); 3655 3656 /* Mark transparent mode VF as disabled. */ 3657 hn_xpnt_vf_setdisable(sc, false /* keep hn_vf_ifp */); 3658 3659 /* 3660 * NOTE: 3661 * Datapath setting must happen _before_ bringing 3662 * the VF down. 3663 */ 3664 hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_SYNTH); 3665 3666 /* 3667 * Bring the VF down. 3668 */ 3669 hn_xpnt_vf_saveifflags(sc); 3670 sc->hn_vf_ifp->if_flags &= ~IFF_UP; 3671 hn_xpnt_vf_iocsetflags(sc); 3672 } 3673 3674 /* Suspend data transfers. */ 3675 hn_suspend_data(sc); 3676 3677 /* Clear OACTIVE bit. */ 3678 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 3679 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) 3680 sc->hn_tx_ring[i].hn_oactive = 0; 3681 3682 /* 3683 * If the non-transparent mode VF is active, make sure 3684 * that the RX filter still allows packet reception. 3685 */ 3686 if (!detaching && (sc->hn_flags & HN_FLAG_RXVF)) 3687 hn_rxfilter_config(sc); 3688 } 3689 3690 static void 3691 hn_init_locked(struct hn_softc *sc) 3692 { 3693 struct ifnet *ifp = sc->hn_ifp; 3694 int i; 3695 3696 HN_LOCK_ASSERT(sc); 3697 3698 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) 3699 return; 3700 3701 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 3702 return; 3703 3704 /* Configure RX filter */ 3705 hn_rxfilter_config(sc); 3706 3707 /* Clear OACTIVE bit. */ 3708 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 3709 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) 3710 sc->hn_tx_ring[i].hn_oactive = 0; 3711 3712 /* Clear TX 'suspended' bit. */ 3713 hn_resume_tx(sc, sc->hn_tx_ring_inuse); 3714 3715 if (hn_xpnt_vf_isready(sc)) { 3716 /* Initialize transparent VF. */ 3717 hn_xpnt_vf_init(sc); 3718 } 3719 3720 /* Everything is ready; unleash! */ 3721 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_RUNNING); 3722 3723 /* Re-enable polling if requested. */ 3724 if (sc->hn_pollhz > 0) 3725 hn_polling(sc, sc->hn_pollhz); 3726 } 3727 3728 static void 3729 hn_init(void *xsc) 3730 { 3731 struct hn_softc *sc = xsc; 3732 3733 HN_LOCK(sc); 3734 hn_init_locked(sc); 3735 HN_UNLOCK(sc); 3736 } 3737 3738 #if __FreeBSD_version >= 1100099 3739 3740 static int 3741 hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS) 3742 { 3743 struct hn_softc *sc = arg1; 3744 unsigned int lenlim; 3745 int error; 3746 3747 lenlim = sc->hn_rx_ring[0].hn_lro.lro_length_lim; 3748 error = sysctl_handle_int(oidp, &lenlim, 0, req); 3749 if (error || req->newptr == NULL) 3750 return error; 3751 3752 HN_LOCK(sc); 3753 if (lenlim < HN_LRO_LENLIM_MIN(sc->hn_ifp) || 3754 lenlim > TCP_LRO_LENGTH_MAX) { 3755 HN_UNLOCK(sc); 3756 return EINVAL; 3757 } 3758 hn_set_lro_lenlim(sc, lenlim); 3759 HN_UNLOCK(sc); 3760 3761 return 0; 3762 } 3763 3764 static int 3765 hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS) 3766 { 3767 struct hn_softc *sc = arg1; 3768 int ackcnt, error, i; 3769 3770 /* 3771 * lro_ackcnt_lim is append count limit, 3772 * +1 to turn it into aggregation limit. 3773 */ 3774 ackcnt = sc->hn_rx_ring[0].hn_lro.lro_ackcnt_lim + 1; 3775 error = sysctl_handle_int(oidp, &ackcnt, 0, req); 3776 if (error || req->newptr == NULL) 3777 return error; 3778 3779 if (ackcnt < 2 || ackcnt > (TCP_LRO_ACKCNT_MAX + 1)) 3780 return EINVAL; 3781 3782 /* 3783 * Convert aggregation limit back to append 3784 * count limit. 3785 */ 3786 --ackcnt; 3787 HN_LOCK(sc); 3788 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 3789 sc->hn_rx_ring[i].hn_lro.lro_ackcnt_lim = ackcnt; 3790 HN_UNLOCK(sc); 3791 return 0; 3792 } 3793 3794 #endif 3795 3796 static int 3797 hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS) 3798 { 3799 struct hn_softc *sc = arg1; 3800 int hcsum = arg2; 3801 int on, error, i; 3802 3803 on = 0; 3804 if (sc->hn_rx_ring[0].hn_trust_hcsum & hcsum) 3805 on = 1; 3806 3807 error = sysctl_handle_int(oidp, &on, 0, req); 3808 if (error || req->newptr == NULL) 3809 return error; 3810 3811 HN_LOCK(sc); 3812 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 3813 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 3814 3815 if (on) 3816 rxr->hn_trust_hcsum |= hcsum; 3817 else 3818 rxr->hn_trust_hcsum &= ~hcsum; 3819 } 3820 HN_UNLOCK(sc); 3821 return 0; 3822 } 3823 3824 static int 3825 hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS) 3826 { 3827 struct hn_softc *sc = arg1; 3828 int chim_size, error; 3829 3830 chim_size = sc->hn_tx_ring[0].hn_chim_size; 3831 error = sysctl_handle_int(oidp, &chim_size, 0, req); 3832 if (error || req->newptr == NULL) 3833 return error; 3834 3835 if (chim_size > sc->hn_chim_szmax || chim_size <= 0) 3836 return EINVAL; 3837 3838 HN_LOCK(sc); 3839 hn_set_chim_size(sc, chim_size); 3840 HN_UNLOCK(sc); 3841 return 0; 3842 } 3843 3844 #if __FreeBSD_version < 1100095 3845 static int 3846 hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS) 3847 { 3848 struct hn_softc *sc = arg1; 3849 int ofs = arg2, i, error; 3850 struct hn_rx_ring *rxr; 3851 uint64_t stat; 3852 3853 stat = 0; 3854 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 3855 rxr = &sc->hn_rx_ring[i]; 3856 stat += *((int *)((uint8_t *)rxr + ofs)); 3857 } 3858 3859 error = sysctl_handle_64(oidp, &stat, 0, req); 3860 if (error || req->newptr == NULL) 3861 return error; 3862 3863 /* Zero out this stat. */ 3864 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 3865 rxr = &sc->hn_rx_ring[i]; 3866 *((int *)((uint8_t *)rxr + ofs)) = 0; 3867 } 3868 return 0; 3869 } 3870 #else 3871 static int 3872 hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS) 3873 { 3874 struct hn_softc *sc = arg1; 3875 int ofs = arg2, i, error; 3876 struct hn_rx_ring *rxr; 3877 uint64_t stat; 3878 3879 stat = 0; 3880 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 3881 rxr = &sc->hn_rx_ring[i]; 3882 stat += *((uint64_t *)((uint8_t *)rxr + ofs)); 3883 } 3884 3885 error = sysctl_handle_64(oidp, &stat, 0, req); 3886 if (error || req->newptr == NULL) 3887 return error; 3888 3889 /* Zero out this stat. */ 3890 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 3891 rxr = &sc->hn_rx_ring[i]; 3892 *((uint64_t *)((uint8_t *)rxr + ofs)) = 0; 3893 } 3894 return 0; 3895 } 3896 3897 #endif 3898 3899 static int 3900 hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS) 3901 { 3902 struct hn_softc *sc = arg1; 3903 int ofs = arg2, i, error; 3904 struct hn_rx_ring *rxr; 3905 u_long stat; 3906 3907 stat = 0; 3908 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 3909 rxr = &sc->hn_rx_ring[i]; 3910 stat += *((u_long *)((uint8_t *)rxr + ofs)); 3911 } 3912 3913 error = sysctl_handle_long(oidp, &stat, 0, req); 3914 if (error || req->newptr == NULL) 3915 return error; 3916 3917 /* Zero out this stat. */ 3918 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 3919 rxr = &sc->hn_rx_ring[i]; 3920 *((u_long *)((uint8_t *)rxr + ofs)) = 0; 3921 } 3922 return 0; 3923 } 3924 3925 static int 3926 hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS) 3927 { 3928 struct hn_softc *sc = arg1; 3929 int ofs = arg2, i, error; 3930 struct hn_tx_ring *txr; 3931 u_long stat; 3932 3933 stat = 0; 3934 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 3935 txr = &sc->hn_tx_ring[i]; 3936 stat += *((u_long *)((uint8_t *)txr + ofs)); 3937 } 3938 3939 error = sysctl_handle_long(oidp, &stat, 0, req); 3940 if (error || req->newptr == NULL) 3941 return error; 3942 3943 /* Zero out this stat. */ 3944 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 3945 txr = &sc->hn_tx_ring[i]; 3946 *((u_long *)((uint8_t *)txr + ofs)) = 0; 3947 } 3948 return 0; 3949 } 3950 3951 static int 3952 hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS) 3953 { 3954 struct hn_softc *sc = arg1; 3955 int ofs = arg2, i, error, conf; 3956 struct hn_tx_ring *txr; 3957 3958 txr = &sc->hn_tx_ring[0]; 3959 conf = *((int *)((uint8_t *)txr + ofs)); 3960 3961 error = sysctl_handle_int(oidp, &conf, 0, req); 3962 if (error || req->newptr == NULL) 3963 return error; 3964 3965 HN_LOCK(sc); 3966 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 3967 txr = &sc->hn_tx_ring[i]; 3968 *((int *)((uint8_t *)txr + ofs)) = conf; 3969 } 3970 HN_UNLOCK(sc); 3971 3972 return 0; 3973 } 3974 3975 static int 3976 hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS) 3977 { 3978 struct hn_softc *sc = arg1; 3979 int error, size; 3980 3981 size = sc->hn_agg_size; 3982 error = sysctl_handle_int(oidp, &size, 0, req); 3983 if (error || req->newptr == NULL) 3984 return (error); 3985 3986 HN_LOCK(sc); 3987 sc->hn_agg_size = size; 3988 hn_set_txagg(sc); 3989 HN_UNLOCK(sc); 3990 3991 return (0); 3992 } 3993 3994 static int 3995 hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS) 3996 { 3997 struct hn_softc *sc = arg1; 3998 int error, pkts; 3999 4000 pkts = sc->hn_agg_pkts; 4001 error = sysctl_handle_int(oidp, &pkts, 0, req); 4002 if (error || req->newptr == NULL) 4003 return (error); 4004 4005 HN_LOCK(sc); 4006 sc->hn_agg_pkts = pkts; 4007 hn_set_txagg(sc); 4008 HN_UNLOCK(sc); 4009 4010 return (0); 4011 } 4012 4013 static int 4014 hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS) 4015 { 4016 struct hn_softc *sc = arg1; 4017 int pkts; 4018 4019 pkts = sc->hn_tx_ring[0].hn_agg_pktmax; 4020 return (sysctl_handle_int(oidp, &pkts, 0, req)); 4021 } 4022 4023 static int 4024 hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS) 4025 { 4026 struct hn_softc *sc = arg1; 4027 int align; 4028 4029 align = sc->hn_tx_ring[0].hn_agg_align; 4030 return (sysctl_handle_int(oidp, &align, 0, req)); 4031 } 4032 4033 static void 4034 hn_chan_polling(struct vmbus_channel *chan, u_int pollhz) 4035 { 4036 if (pollhz == 0) 4037 vmbus_chan_poll_disable(chan); 4038 else 4039 vmbus_chan_poll_enable(chan, pollhz); 4040 } 4041 4042 static void 4043 hn_polling(struct hn_softc *sc, u_int pollhz) 4044 { 4045 int nsubch = sc->hn_rx_ring_inuse - 1; 4046 4047 HN_LOCK_ASSERT(sc); 4048 4049 if (nsubch > 0) { 4050 struct vmbus_channel **subch; 4051 int i; 4052 4053 subch = vmbus_subchan_get(sc->hn_prichan, nsubch); 4054 for (i = 0; i < nsubch; ++i) 4055 hn_chan_polling(subch[i], pollhz); 4056 vmbus_subchan_rel(subch, nsubch); 4057 } 4058 hn_chan_polling(sc->hn_prichan, pollhz); 4059 } 4060 4061 static int 4062 hn_polling_sysctl(SYSCTL_HANDLER_ARGS) 4063 { 4064 struct hn_softc *sc = arg1; 4065 int pollhz, error; 4066 4067 pollhz = sc->hn_pollhz; 4068 error = sysctl_handle_int(oidp, &pollhz, 0, req); 4069 if (error || req->newptr == NULL) 4070 return (error); 4071 4072 if (pollhz != 0 && 4073 (pollhz < VMBUS_CHAN_POLLHZ_MIN || pollhz > VMBUS_CHAN_POLLHZ_MAX)) 4074 return (EINVAL); 4075 4076 HN_LOCK(sc); 4077 if (sc->hn_pollhz != pollhz) { 4078 sc->hn_pollhz = pollhz; 4079 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) && 4080 (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED)) 4081 hn_polling(sc, sc->hn_pollhz); 4082 } 4083 HN_UNLOCK(sc); 4084 4085 return (0); 4086 } 4087 4088 static int 4089 hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS) 4090 { 4091 struct hn_softc *sc = arg1; 4092 char verstr[16]; 4093 4094 snprintf(verstr, sizeof(verstr), "%u.%u", 4095 HN_NDIS_VERSION_MAJOR(sc->hn_ndis_ver), 4096 HN_NDIS_VERSION_MINOR(sc->hn_ndis_ver)); 4097 return sysctl_handle_string(oidp, verstr, sizeof(verstr), req); 4098 } 4099 4100 static int 4101 hn_caps_sysctl(SYSCTL_HANDLER_ARGS) 4102 { 4103 struct hn_softc *sc = arg1; 4104 char caps_str[128]; 4105 uint32_t caps; 4106 4107 HN_LOCK(sc); 4108 caps = sc->hn_caps; 4109 HN_UNLOCK(sc); 4110 snprintf(caps_str, sizeof(caps_str), "%b", caps, HN_CAP_BITS); 4111 return sysctl_handle_string(oidp, caps_str, sizeof(caps_str), req); 4112 } 4113 4114 static int 4115 hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS) 4116 { 4117 struct hn_softc *sc = arg1; 4118 char assist_str[128]; 4119 uint32_t hwassist; 4120 4121 HN_LOCK(sc); 4122 hwassist = sc->hn_ifp->if_hwassist; 4123 HN_UNLOCK(sc); 4124 snprintf(assist_str, sizeof(assist_str), "%b", hwassist, CSUM_BITS); 4125 return sysctl_handle_string(oidp, assist_str, sizeof(assist_str), req); 4126 } 4127 4128 static int 4129 hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS) 4130 { 4131 struct hn_softc *sc = arg1; 4132 char filter_str[128]; 4133 uint32_t filter; 4134 4135 HN_LOCK(sc); 4136 filter = sc->hn_rx_filter; 4137 HN_UNLOCK(sc); 4138 snprintf(filter_str, sizeof(filter_str), "%b", filter, 4139 NDIS_PACKET_TYPES); 4140 return sysctl_handle_string(oidp, filter_str, sizeof(filter_str), req); 4141 } 4142 4143 #ifndef RSS 4144 4145 static int 4146 hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS) 4147 { 4148 struct hn_softc *sc = arg1; 4149 int error; 4150 4151 HN_LOCK(sc); 4152 4153 error = SYSCTL_OUT(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key)); 4154 if (error || req->newptr == NULL) 4155 goto back; 4156 4157 error = SYSCTL_IN(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key)); 4158 if (error) 4159 goto back; 4160 sc->hn_flags |= HN_FLAG_HAS_RSSKEY; 4161 4162 if (sc->hn_rx_ring_inuse > 1) { 4163 error = hn_rss_reconfig(sc); 4164 } else { 4165 /* Not RSS capable, at least for now; just save the RSS key. */ 4166 error = 0; 4167 } 4168 back: 4169 HN_UNLOCK(sc); 4170 return (error); 4171 } 4172 4173 static int 4174 hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS) 4175 { 4176 struct hn_softc *sc = arg1; 4177 int error; 4178 4179 HN_LOCK(sc); 4180 4181 error = SYSCTL_OUT(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind)); 4182 if (error || req->newptr == NULL) 4183 goto back; 4184 4185 /* 4186 * Don't allow RSS indirect table change, if this interface is not 4187 * RSS capable currently. 4188 */ 4189 if (sc->hn_rx_ring_inuse == 1) { 4190 error = EOPNOTSUPP; 4191 goto back; 4192 } 4193 4194 error = SYSCTL_IN(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind)); 4195 if (error) 4196 goto back; 4197 sc->hn_flags |= HN_FLAG_HAS_RSSIND; 4198 4199 hn_rss_ind_fixup(sc); 4200 error = hn_rss_reconfig(sc); 4201 back: 4202 HN_UNLOCK(sc); 4203 return (error); 4204 } 4205 4206 #endif /* !RSS */ 4207 4208 static int 4209 hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS) 4210 { 4211 struct hn_softc *sc = arg1; 4212 char hash_str[128]; 4213 uint32_t hash; 4214 4215 HN_LOCK(sc); 4216 hash = sc->hn_rss_hash; 4217 HN_UNLOCK(sc); 4218 snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS); 4219 return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req); 4220 } 4221 4222 static int 4223 hn_vf_sysctl(SYSCTL_HANDLER_ARGS) 4224 { 4225 struct hn_softc *sc = arg1; 4226 char vf_name[IFNAMSIZ + 1]; 4227 struct ifnet *vf_ifp; 4228 4229 HN_LOCK(sc); 4230 vf_name[0] = '\0'; 4231 vf_ifp = sc->hn_vf_ifp; 4232 if (vf_ifp != NULL) 4233 snprintf(vf_name, sizeof(vf_name), "%s", vf_ifp->if_xname); 4234 HN_UNLOCK(sc); 4235 return sysctl_handle_string(oidp, vf_name, sizeof(vf_name), req); 4236 } 4237 4238 static int 4239 hn_rxvf_sysctl(SYSCTL_HANDLER_ARGS) 4240 { 4241 struct hn_softc *sc = arg1; 4242 char vf_name[IFNAMSIZ + 1]; 4243 struct ifnet *vf_ifp; 4244 4245 HN_LOCK(sc); 4246 vf_name[0] = '\0'; 4247 vf_ifp = sc->hn_rx_ring[0].hn_rxvf_ifp; 4248 if (vf_ifp != NULL) 4249 snprintf(vf_name, sizeof(vf_name), "%s", vf_ifp->if_xname); 4250 HN_UNLOCK(sc); 4251 return sysctl_handle_string(oidp, vf_name, sizeof(vf_name), req); 4252 } 4253 4254 static int 4255 hn_vflist_sysctl(SYSCTL_HANDLER_ARGS) 4256 { 4257 struct rm_priotracker pt; 4258 struct sbuf *sb; 4259 int error, i; 4260 bool first; 4261 4262 error = sysctl_wire_old_buffer(req, 0); 4263 if (error != 0) 4264 return (error); 4265 4266 sb = sbuf_new_for_sysctl(NULL, NULL, 128, req); 4267 if (sb == NULL) 4268 return (ENOMEM); 4269 4270 rm_rlock(&hn_vfmap_lock, &pt); 4271 4272 first = true; 4273 for (i = 0; i < hn_vfmap_size; ++i) { 4274 struct ifnet *ifp; 4275 4276 if (hn_vfmap[i] == NULL) 4277 continue; 4278 4279 ifp = ifnet_byindex(i); 4280 if (ifp != NULL) { 4281 if (first) 4282 sbuf_printf(sb, "%s", ifp->if_xname); 4283 else 4284 sbuf_printf(sb, " %s", ifp->if_xname); 4285 first = false; 4286 } 4287 } 4288 4289 rm_runlock(&hn_vfmap_lock, &pt); 4290 4291 error = sbuf_finish(sb); 4292 sbuf_delete(sb); 4293 return (error); 4294 } 4295 4296 static int 4297 hn_vfmap_sysctl(SYSCTL_HANDLER_ARGS) 4298 { 4299 struct rm_priotracker pt; 4300 struct sbuf *sb; 4301 int error, i; 4302 bool first; 4303 4304 error = sysctl_wire_old_buffer(req, 0); 4305 if (error != 0) 4306 return (error); 4307 4308 sb = sbuf_new_for_sysctl(NULL, NULL, 128, req); 4309 if (sb == NULL) 4310 return (ENOMEM); 4311 4312 rm_rlock(&hn_vfmap_lock, &pt); 4313 4314 first = true; 4315 for (i = 0; i < hn_vfmap_size; ++i) { 4316 struct ifnet *ifp, *hn_ifp; 4317 4318 hn_ifp = hn_vfmap[i]; 4319 if (hn_ifp == NULL) 4320 continue; 4321 4322 ifp = ifnet_byindex(i); 4323 if (ifp != NULL) { 4324 if (first) { 4325 sbuf_printf(sb, "%s:%s", ifp->if_xname, 4326 hn_ifp->if_xname); 4327 } else { 4328 sbuf_printf(sb, " %s:%s", ifp->if_xname, 4329 hn_ifp->if_xname); 4330 } 4331 first = false; 4332 } 4333 } 4334 4335 rm_runlock(&hn_vfmap_lock, &pt); 4336 4337 error = sbuf_finish(sb); 4338 sbuf_delete(sb); 4339 return (error); 4340 } 4341 4342 static int 4343 hn_xpnt_vf_accbpf_sysctl(SYSCTL_HANDLER_ARGS) 4344 { 4345 struct hn_softc *sc = arg1; 4346 int error, onoff = 0; 4347 4348 if (sc->hn_xvf_flags & HN_XVFFLAG_ACCBPF) 4349 onoff = 1; 4350 error = sysctl_handle_int(oidp, &onoff, 0, req); 4351 if (error || req->newptr == NULL) 4352 return (error); 4353 4354 HN_LOCK(sc); 4355 /* NOTE: hn_vf_lock for hn_transmit() */ 4356 rm_wlock(&sc->hn_vf_lock); 4357 if (onoff) 4358 sc->hn_xvf_flags |= HN_XVFFLAG_ACCBPF; 4359 else 4360 sc->hn_xvf_flags &= ~HN_XVFFLAG_ACCBPF; 4361 rm_wunlock(&sc->hn_vf_lock); 4362 HN_UNLOCK(sc); 4363 4364 return (0); 4365 } 4366 4367 static int 4368 hn_xpnt_vf_enabled_sysctl(SYSCTL_HANDLER_ARGS) 4369 { 4370 struct hn_softc *sc = arg1; 4371 int enabled = 0; 4372 4373 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) 4374 enabled = 1; 4375 return (sysctl_handle_int(oidp, &enabled, 0, req)); 4376 } 4377 4378 static int 4379 hn_check_iplen(const struct mbuf *m, int hoff) 4380 { 4381 const struct ip *ip; 4382 int len, iphlen, iplen; 4383 const struct tcphdr *th; 4384 int thoff; /* TCP data offset */ 4385 4386 len = hoff + sizeof(struct ip); 4387 4388 /* The packet must be at least the size of an IP header. */ 4389 if (m->m_pkthdr.len < len) 4390 return IPPROTO_DONE; 4391 4392 /* The fixed IP header must reside completely in the first mbuf. */ 4393 if (m->m_len < len) 4394 return IPPROTO_DONE; 4395 4396 ip = mtodo(m, hoff); 4397 4398 /* Bound check the packet's stated IP header length. */ 4399 iphlen = ip->ip_hl << 2; 4400 if (iphlen < sizeof(struct ip)) /* minimum header length */ 4401 return IPPROTO_DONE; 4402 4403 /* The full IP header must reside completely in the one mbuf. */ 4404 if (m->m_len < hoff + iphlen) 4405 return IPPROTO_DONE; 4406 4407 iplen = ntohs(ip->ip_len); 4408 4409 /* 4410 * Check that the amount of data in the buffers is as 4411 * at least much as the IP header would have us expect. 4412 */ 4413 if (m->m_pkthdr.len < hoff + iplen) 4414 return IPPROTO_DONE; 4415 4416 /* 4417 * Ignore IP fragments. 4418 */ 4419 if (ntohs(ip->ip_off) & (IP_OFFMASK | IP_MF)) 4420 return IPPROTO_DONE; 4421 4422 /* 4423 * The TCP/IP or UDP/IP header must be entirely contained within 4424 * the first fragment of a packet. 4425 */ 4426 switch (ip->ip_p) { 4427 case IPPROTO_TCP: 4428 if (iplen < iphlen + sizeof(struct tcphdr)) 4429 return IPPROTO_DONE; 4430 if (m->m_len < hoff + iphlen + sizeof(struct tcphdr)) 4431 return IPPROTO_DONE; 4432 th = (const struct tcphdr *)((const uint8_t *)ip + iphlen); 4433 thoff = th->th_off << 2; 4434 if (thoff < sizeof(struct tcphdr) || thoff + iphlen > iplen) 4435 return IPPROTO_DONE; 4436 if (m->m_len < hoff + iphlen + thoff) 4437 return IPPROTO_DONE; 4438 break; 4439 case IPPROTO_UDP: 4440 if (iplen < iphlen + sizeof(struct udphdr)) 4441 return IPPROTO_DONE; 4442 if (m->m_len < hoff + iphlen + sizeof(struct udphdr)) 4443 return IPPROTO_DONE; 4444 break; 4445 default: 4446 if (iplen < iphlen) 4447 return IPPROTO_DONE; 4448 break; 4449 } 4450 return ip->ip_p; 4451 } 4452 4453 static int 4454 hn_create_rx_data(struct hn_softc *sc, int ring_cnt) 4455 { 4456 struct sysctl_oid_list *child; 4457 struct sysctl_ctx_list *ctx; 4458 device_t dev = sc->hn_dev; 4459 #if defined(INET) || defined(INET6) 4460 #if __FreeBSD_version >= 1100095 4461 int lroent_cnt; 4462 #endif 4463 #endif 4464 int i; 4465 4466 /* 4467 * Create RXBUF for reception. 4468 * 4469 * NOTE: 4470 * - It is shared by all channels. 4471 * - A large enough buffer is allocated, certain version of NVSes 4472 * may further limit the usable space. 4473 */ 4474 sc->hn_rxbuf = hyperv_dmamem_alloc(bus_get_dma_tag(dev), 4475 PAGE_SIZE, 0, HN_RXBUF_SIZE, &sc->hn_rxbuf_dma, 4476 BUS_DMA_WAITOK | BUS_DMA_ZERO); 4477 if (sc->hn_rxbuf == NULL) { 4478 device_printf(sc->hn_dev, "allocate rxbuf failed\n"); 4479 return (ENOMEM); 4480 } 4481 4482 sc->hn_rx_ring_cnt = ring_cnt; 4483 sc->hn_rx_ring_inuse = sc->hn_rx_ring_cnt; 4484 4485 sc->hn_rx_ring = malloc(sizeof(struct hn_rx_ring) * sc->hn_rx_ring_cnt, 4486 M_DEVBUF, M_WAITOK | M_ZERO); 4487 4488 #if defined(INET) || defined(INET6) 4489 #if __FreeBSD_version >= 1100095 4490 lroent_cnt = hn_lro_entry_count; 4491 if (lroent_cnt < TCP_LRO_ENTRIES) 4492 lroent_cnt = TCP_LRO_ENTRIES; 4493 if (bootverbose) 4494 device_printf(dev, "LRO: entry count %d\n", lroent_cnt); 4495 #endif 4496 #endif /* INET || INET6 */ 4497 4498 ctx = device_get_sysctl_ctx(dev); 4499 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); 4500 4501 /* Create dev.hn.UNIT.rx sysctl tree */ 4502 sc->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rx", 4503 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 4504 4505 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4506 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 4507 4508 rxr->hn_br = hyperv_dmamem_alloc(bus_get_dma_tag(dev), 4509 PAGE_SIZE, 0, HN_TXBR_SIZE + HN_RXBR_SIZE, 4510 &rxr->hn_br_dma, BUS_DMA_WAITOK); 4511 if (rxr->hn_br == NULL) { 4512 device_printf(dev, "allocate bufring failed\n"); 4513 return (ENOMEM); 4514 } 4515 4516 if (hn_trust_hosttcp) 4517 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_TCP; 4518 if (hn_trust_hostudp) 4519 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_UDP; 4520 if (hn_trust_hostip) 4521 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_IP; 4522 rxr->hn_ifp = sc->hn_ifp; 4523 if (i < sc->hn_tx_ring_cnt) 4524 rxr->hn_txr = &sc->hn_tx_ring[i]; 4525 rxr->hn_pktbuf_len = HN_PKTBUF_LEN_DEF; 4526 rxr->hn_pktbuf = malloc(rxr->hn_pktbuf_len, M_DEVBUF, M_WAITOK); 4527 rxr->hn_rx_idx = i; 4528 rxr->hn_rxbuf = sc->hn_rxbuf; 4529 4530 /* 4531 * Initialize LRO. 4532 */ 4533 #if defined(INET) || defined(INET6) 4534 #if __FreeBSD_version >= 1100095 4535 tcp_lro_init_args(&rxr->hn_lro, sc->hn_ifp, lroent_cnt, 4536 hn_lro_mbufq_depth); 4537 #else 4538 tcp_lro_init(&rxr->hn_lro); 4539 rxr->hn_lro.ifp = sc->hn_ifp; 4540 #endif 4541 #if __FreeBSD_version >= 1100099 4542 rxr->hn_lro.lro_length_lim = HN_LRO_LENLIM_DEF; 4543 rxr->hn_lro.lro_ackcnt_lim = HN_LRO_ACKCNT_DEF; 4544 #endif 4545 #endif /* INET || INET6 */ 4546 4547 if (sc->hn_rx_sysctl_tree != NULL) { 4548 char name[16]; 4549 4550 /* 4551 * Create per RX ring sysctl tree: 4552 * dev.hn.UNIT.rx.RINGID 4553 */ 4554 snprintf(name, sizeof(name), "%d", i); 4555 rxr->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, 4556 SYSCTL_CHILDREN(sc->hn_rx_sysctl_tree), 4557 OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 4558 4559 if (rxr->hn_rx_sysctl_tree != NULL) { 4560 SYSCTL_ADD_ULONG(ctx, 4561 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), 4562 OID_AUTO, "packets", CTLFLAG_RW, 4563 &rxr->hn_pkts, "# of packets received"); 4564 SYSCTL_ADD_ULONG(ctx, 4565 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), 4566 OID_AUTO, "rss_pkts", CTLFLAG_RW, 4567 &rxr->hn_rss_pkts, 4568 "# of packets w/ RSS info received"); 4569 SYSCTL_ADD_INT(ctx, 4570 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), 4571 OID_AUTO, "pktbuf_len", CTLFLAG_RD, 4572 &rxr->hn_pktbuf_len, 0, 4573 "Temporary channel packet buffer length"); 4574 } 4575 } 4576 } 4577 4578 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_queued", 4579 CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 4580 __offsetof(struct hn_rx_ring, hn_lro.lro_queued), 4581 #if __FreeBSD_version < 1100095 4582 hn_rx_stat_int_sysctl, 4583 #else 4584 hn_rx_stat_u64_sysctl, 4585 #endif 4586 "LU", "LRO queued"); 4587 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_flushed", 4588 CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 4589 __offsetof(struct hn_rx_ring, hn_lro.lro_flushed), 4590 #if __FreeBSD_version < 1100095 4591 hn_rx_stat_int_sysctl, 4592 #else 4593 hn_rx_stat_u64_sysctl, 4594 #endif 4595 "LU", "LRO flushed"); 4596 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_tried", 4597 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 4598 __offsetof(struct hn_rx_ring, hn_lro_tried), 4599 hn_rx_stat_ulong_sysctl, "LU", "# of LRO tries"); 4600 #if __FreeBSD_version >= 1100099 4601 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_length_lim", 4602 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 4603 hn_lro_lenlim_sysctl, "IU", 4604 "Max # of data bytes to be aggregated by LRO"); 4605 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_ackcnt_lim", 4606 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 4607 hn_lro_ackcnt_sysctl, "I", 4608 "Max # of ACKs to be aggregated by LRO"); 4609 #endif 4610 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hosttcp", 4611 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_TCP, 4612 hn_trust_hcsum_sysctl, "I", 4613 "Trust tcp segement verification on host side, " 4614 "when csum info is missing"); 4615 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostudp", 4616 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_UDP, 4617 hn_trust_hcsum_sysctl, "I", 4618 "Trust udp datagram verification on host side, " 4619 "when csum info is missing"); 4620 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostip", 4621 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_IP, 4622 hn_trust_hcsum_sysctl, "I", 4623 "Trust ip packet verification on host side, " 4624 "when csum info is missing"); 4625 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_ip", 4626 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 4627 __offsetof(struct hn_rx_ring, hn_csum_ip), 4628 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM IP"); 4629 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_tcp", 4630 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 4631 __offsetof(struct hn_rx_ring, hn_csum_tcp), 4632 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM TCP"); 4633 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_udp", 4634 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 4635 __offsetof(struct hn_rx_ring, hn_csum_udp), 4636 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM UDP"); 4637 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_trusted", 4638 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 4639 __offsetof(struct hn_rx_ring, hn_csum_trusted), 4640 hn_rx_stat_ulong_sysctl, "LU", 4641 "# of packets that we trust host's csum verification"); 4642 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "small_pkts", 4643 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 4644 __offsetof(struct hn_rx_ring, hn_small_pkts), 4645 hn_rx_stat_ulong_sysctl, "LU", "# of small packets received"); 4646 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_ack_failed", 4647 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 4648 __offsetof(struct hn_rx_ring, hn_ack_failed), 4649 hn_rx_stat_ulong_sysctl, "LU", "# of RXBUF ack failures"); 4650 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_cnt", 4651 CTLFLAG_RD, &sc->hn_rx_ring_cnt, 0, "# created RX rings"); 4652 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_inuse", 4653 CTLFLAG_RD, &sc->hn_rx_ring_inuse, 0, "# used RX rings"); 4654 4655 return (0); 4656 } 4657 4658 static void 4659 hn_destroy_rx_data(struct hn_softc *sc) 4660 { 4661 int i; 4662 4663 if (sc->hn_rxbuf != NULL) { 4664 if ((sc->hn_flags & HN_FLAG_RXBUF_REF) == 0) 4665 hyperv_dmamem_free(&sc->hn_rxbuf_dma, sc->hn_rxbuf); 4666 else 4667 device_printf(sc->hn_dev, "RXBUF is referenced\n"); 4668 sc->hn_rxbuf = NULL; 4669 } 4670 4671 if (sc->hn_rx_ring_cnt == 0) 4672 return; 4673 4674 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4675 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 4676 4677 if (rxr->hn_br == NULL) 4678 continue; 4679 if ((rxr->hn_rx_flags & HN_RX_FLAG_BR_REF) == 0) { 4680 hyperv_dmamem_free(&rxr->hn_br_dma, rxr->hn_br); 4681 } else { 4682 device_printf(sc->hn_dev, 4683 "%dth channel bufring is referenced", i); 4684 } 4685 rxr->hn_br = NULL; 4686 4687 #if defined(INET) || defined(INET6) 4688 tcp_lro_free(&rxr->hn_lro); 4689 #endif 4690 free(rxr->hn_pktbuf, M_DEVBUF); 4691 } 4692 free(sc->hn_rx_ring, M_DEVBUF); 4693 sc->hn_rx_ring = NULL; 4694 4695 sc->hn_rx_ring_cnt = 0; 4696 sc->hn_rx_ring_inuse = 0; 4697 } 4698 4699 static int 4700 hn_tx_ring_create(struct hn_softc *sc, int id) 4701 { 4702 struct hn_tx_ring *txr = &sc->hn_tx_ring[id]; 4703 device_t dev = sc->hn_dev; 4704 bus_dma_tag_t parent_dtag; 4705 int error, i; 4706 4707 txr->hn_sc = sc; 4708 txr->hn_tx_idx = id; 4709 4710 #ifndef HN_USE_TXDESC_BUFRING 4711 mtx_init(&txr->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN); 4712 #endif 4713 mtx_init(&txr->hn_tx_lock, "hn tx", NULL, MTX_DEF); 4714 4715 txr->hn_txdesc_cnt = HN_TX_DESC_CNT; 4716 txr->hn_txdesc = malloc(sizeof(struct hn_txdesc) * txr->hn_txdesc_cnt, 4717 M_DEVBUF, M_WAITOK | M_ZERO); 4718 #ifndef HN_USE_TXDESC_BUFRING 4719 SLIST_INIT(&txr->hn_txlist); 4720 #else 4721 txr->hn_txdesc_br = buf_ring_alloc(txr->hn_txdesc_cnt, M_DEVBUF, 4722 M_WAITOK, &txr->hn_tx_lock); 4723 #endif 4724 4725 if (hn_tx_taskq_mode == HN_TX_TASKQ_M_EVTTQ) { 4726 txr->hn_tx_taskq = VMBUS_GET_EVENT_TASKQ( 4727 device_get_parent(dev), dev, HN_RING_IDX2CPU(sc, id)); 4728 } else { 4729 txr->hn_tx_taskq = sc->hn_tx_taskqs[id % hn_tx_taskq_cnt]; 4730 } 4731 4732 #ifdef HN_IFSTART_SUPPORT 4733 if (hn_use_if_start) { 4734 txr->hn_txeof = hn_start_txeof; 4735 TASK_INIT(&txr->hn_tx_task, 0, hn_start_taskfunc, txr); 4736 TASK_INIT(&txr->hn_txeof_task, 0, hn_start_txeof_taskfunc, txr); 4737 } else 4738 #endif 4739 { 4740 int br_depth; 4741 4742 txr->hn_txeof = hn_xmit_txeof; 4743 TASK_INIT(&txr->hn_tx_task, 0, hn_xmit_taskfunc, txr); 4744 TASK_INIT(&txr->hn_txeof_task, 0, hn_xmit_txeof_taskfunc, txr); 4745 4746 br_depth = hn_get_txswq_depth(txr); 4747 txr->hn_mbuf_br = buf_ring_alloc(br_depth, M_DEVBUF, 4748 M_WAITOK, &txr->hn_tx_lock); 4749 } 4750 4751 txr->hn_direct_tx_size = hn_direct_tx_size; 4752 4753 /* 4754 * Always schedule transmission instead of trying to do direct 4755 * transmission. This one gives the best performance so far. 4756 */ 4757 txr->hn_sched_tx = 1; 4758 4759 parent_dtag = bus_get_dma_tag(dev); 4760 4761 /* DMA tag for RNDIS packet messages. */ 4762 error = bus_dma_tag_create(parent_dtag, /* parent */ 4763 HN_RNDIS_PKT_ALIGN, /* alignment */ 4764 HN_RNDIS_PKT_BOUNDARY, /* boundary */ 4765 BUS_SPACE_MAXADDR, /* lowaddr */ 4766 BUS_SPACE_MAXADDR, /* highaddr */ 4767 NULL, NULL, /* filter, filterarg */ 4768 HN_RNDIS_PKT_LEN, /* maxsize */ 4769 1, /* nsegments */ 4770 HN_RNDIS_PKT_LEN, /* maxsegsize */ 4771 0, /* flags */ 4772 NULL, /* lockfunc */ 4773 NULL, /* lockfuncarg */ 4774 &txr->hn_tx_rndis_dtag); 4775 if (error) { 4776 device_printf(dev, "failed to create rndis dmatag\n"); 4777 return error; 4778 } 4779 4780 /* DMA tag for data. */ 4781 error = bus_dma_tag_create(parent_dtag, /* parent */ 4782 1, /* alignment */ 4783 HN_TX_DATA_BOUNDARY, /* boundary */ 4784 BUS_SPACE_MAXADDR, /* lowaddr */ 4785 BUS_SPACE_MAXADDR, /* highaddr */ 4786 NULL, NULL, /* filter, filterarg */ 4787 HN_TX_DATA_MAXSIZE, /* maxsize */ 4788 HN_TX_DATA_SEGCNT_MAX, /* nsegments */ 4789 HN_TX_DATA_SEGSIZE, /* maxsegsize */ 4790 0, /* flags */ 4791 NULL, /* lockfunc */ 4792 NULL, /* lockfuncarg */ 4793 &txr->hn_tx_data_dtag); 4794 if (error) { 4795 device_printf(dev, "failed to create data dmatag\n"); 4796 return error; 4797 } 4798 4799 for (i = 0; i < txr->hn_txdesc_cnt; ++i) { 4800 struct hn_txdesc *txd = &txr->hn_txdesc[i]; 4801 4802 txd->txr = txr; 4803 txd->chim_index = HN_NVS_CHIM_IDX_INVALID; 4804 STAILQ_INIT(&txd->agg_list); 4805 4806 /* 4807 * Allocate and load RNDIS packet message. 4808 */ 4809 error = bus_dmamem_alloc(txr->hn_tx_rndis_dtag, 4810 (void **)&txd->rndis_pkt, 4811 BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, 4812 &txd->rndis_pkt_dmap); 4813 if (error) { 4814 device_printf(dev, 4815 "failed to allocate rndis_packet_msg, %d\n", i); 4816 return error; 4817 } 4818 4819 error = bus_dmamap_load(txr->hn_tx_rndis_dtag, 4820 txd->rndis_pkt_dmap, 4821 txd->rndis_pkt, HN_RNDIS_PKT_LEN, 4822 hyperv_dma_map_paddr, &txd->rndis_pkt_paddr, 4823 BUS_DMA_NOWAIT); 4824 if (error) { 4825 device_printf(dev, 4826 "failed to load rndis_packet_msg, %d\n", i); 4827 bus_dmamem_free(txr->hn_tx_rndis_dtag, 4828 txd->rndis_pkt, txd->rndis_pkt_dmap); 4829 return error; 4830 } 4831 4832 /* DMA map for TX data. */ 4833 error = bus_dmamap_create(txr->hn_tx_data_dtag, 0, 4834 &txd->data_dmap); 4835 if (error) { 4836 device_printf(dev, 4837 "failed to allocate tx data dmamap\n"); 4838 bus_dmamap_unload(txr->hn_tx_rndis_dtag, 4839 txd->rndis_pkt_dmap); 4840 bus_dmamem_free(txr->hn_tx_rndis_dtag, 4841 txd->rndis_pkt, txd->rndis_pkt_dmap); 4842 return error; 4843 } 4844 4845 /* All set, put it to list */ 4846 txd->flags |= HN_TXD_FLAG_ONLIST; 4847 #ifndef HN_USE_TXDESC_BUFRING 4848 SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link); 4849 #else 4850 buf_ring_enqueue(txr->hn_txdesc_br, txd); 4851 #endif 4852 } 4853 txr->hn_txdesc_avail = txr->hn_txdesc_cnt; 4854 4855 if (sc->hn_tx_sysctl_tree != NULL) { 4856 struct sysctl_oid_list *child; 4857 struct sysctl_ctx_list *ctx; 4858 char name[16]; 4859 4860 /* 4861 * Create per TX ring sysctl tree: 4862 * dev.hn.UNIT.tx.RINGID 4863 */ 4864 ctx = device_get_sysctl_ctx(dev); 4865 child = SYSCTL_CHILDREN(sc->hn_tx_sysctl_tree); 4866 4867 snprintf(name, sizeof(name), "%d", id); 4868 txr->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, 4869 name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 4870 4871 if (txr->hn_tx_sysctl_tree != NULL) { 4872 child = SYSCTL_CHILDREN(txr->hn_tx_sysctl_tree); 4873 4874 #ifdef HN_DEBUG 4875 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail", 4876 CTLFLAG_RD, &txr->hn_txdesc_avail, 0, 4877 "# of available TX descs"); 4878 #endif 4879 #ifdef HN_IFSTART_SUPPORT 4880 if (!hn_use_if_start) 4881 #endif 4882 { 4883 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "oactive", 4884 CTLFLAG_RD, &txr->hn_oactive, 0, 4885 "over active"); 4886 } 4887 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "packets", 4888 CTLFLAG_RW, &txr->hn_pkts, 4889 "# of packets transmitted"); 4890 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "sends", 4891 CTLFLAG_RW, &txr->hn_sends, "# of sends"); 4892 } 4893 } 4894 4895 return 0; 4896 } 4897 4898 static void 4899 hn_txdesc_dmamap_destroy(struct hn_txdesc *txd) 4900 { 4901 struct hn_tx_ring *txr = txd->txr; 4902 4903 KASSERT(txd->m == NULL, ("still has mbuf installed")); 4904 KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("still dma mapped")); 4905 4906 bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_pkt_dmap); 4907 bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_pkt, 4908 txd->rndis_pkt_dmap); 4909 bus_dmamap_destroy(txr->hn_tx_data_dtag, txd->data_dmap); 4910 } 4911 4912 static void 4913 hn_txdesc_gc(struct hn_tx_ring *txr, struct hn_txdesc *txd) 4914 { 4915 4916 KASSERT(txd->refs == 0 || txd->refs == 1, 4917 ("invalid txd refs %d", txd->refs)); 4918 4919 /* Aggregated txds will be freed by their aggregating txd. */ 4920 if (txd->refs > 0 && (txd->flags & HN_TXD_FLAG_ONAGG) == 0) { 4921 int freed; 4922 4923 freed = hn_txdesc_put(txr, txd); 4924 KASSERT(freed, ("can't free txdesc")); 4925 } 4926 } 4927 4928 static void 4929 hn_tx_ring_destroy(struct hn_tx_ring *txr) 4930 { 4931 int i; 4932 4933 if (txr->hn_txdesc == NULL) 4934 return; 4935 4936 /* 4937 * NOTE: 4938 * Because the freeing of aggregated txds will be deferred 4939 * to the aggregating txd, two passes are used here: 4940 * - The first pass GCes any pending txds. This GC is necessary, 4941 * since if the channels are revoked, hypervisor will not 4942 * deliver send-done for all pending txds. 4943 * - The second pass frees the busdma stuffs, i.e. after all txds 4944 * were freed. 4945 */ 4946 for (i = 0; i < txr->hn_txdesc_cnt; ++i) 4947 hn_txdesc_gc(txr, &txr->hn_txdesc[i]); 4948 for (i = 0; i < txr->hn_txdesc_cnt; ++i) 4949 hn_txdesc_dmamap_destroy(&txr->hn_txdesc[i]); 4950 4951 if (txr->hn_tx_data_dtag != NULL) 4952 bus_dma_tag_destroy(txr->hn_tx_data_dtag); 4953 if (txr->hn_tx_rndis_dtag != NULL) 4954 bus_dma_tag_destroy(txr->hn_tx_rndis_dtag); 4955 4956 #ifdef HN_USE_TXDESC_BUFRING 4957 buf_ring_free(txr->hn_txdesc_br, M_DEVBUF); 4958 #endif 4959 4960 free(txr->hn_txdesc, M_DEVBUF); 4961 txr->hn_txdesc = NULL; 4962 4963 if (txr->hn_mbuf_br != NULL) 4964 buf_ring_free(txr->hn_mbuf_br, M_DEVBUF); 4965 4966 #ifndef HN_USE_TXDESC_BUFRING 4967 mtx_destroy(&txr->hn_txlist_spin); 4968 #endif 4969 mtx_destroy(&txr->hn_tx_lock); 4970 } 4971 4972 static int 4973 hn_create_tx_data(struct hn_softc *sc, int ring_cnt) 4974 { 4975 struct sysctl_oid_list *child; 4976 struct sysctl_ctx_list *ctx; 4977 int i; 4978 4979 /* 4980 * Create TXBUF for chimney sending. 4981 * 4982 * NOTE: It is shared by all channels. 4983 */ 4984 sc->hn_chim = hyperv_dmamem_alloc(bus_get_dma_tag(sc->hn_dev), 4985 PAGE_SIZE, 0, HN_CHIM_SIZE, &sc->hn_chim_dma, 4986 BUS_DMA_WAITOK | BUS_DMA_ZERO); 4987 if (sc->hn_chim == NULL) { 4988 device_printf(sc->hn_dev, "allocate txbuf failed\n"); 4989 return (ENOMEM); 4990 } 4991 4992 sc->hn_tx_ring_cnt = ring_cnt; 4993 sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt; 4994 4995 sc->hn_tx_ring = malloc(sizeof(struct hn_tx_ring) * sc->hn_tx_ring_cnt, 4996 M_DEVBUF, M_WAITOK | M_ZERO); 4997 4998 ctx = device_get_sysctl_ctx(sc->hn_dev); 4999 child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->hn_dev)); 5000 5001 /* Create dev.hn.UNIT.tx sysctl tree */ 5002 sc->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "tx", 5003 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 5004 5005 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 5006 int error; 5007 5008 error = hn_tx_ring_create(sc, i); 5009 if (error) 5010 return error; 5011 } 5012 5013 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "no_txdescs", 5014 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5015 __offsetof(struct hn_tx_ring, hn_no_txdescs), 5016 hn_tx_stat_ulong_sysctl, "LU", "# of times short of TX descs"); 5017 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "send_failed", 5018 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5019 __offsetof(struct hn_tx_ring, hn_send_failed), 5020 hn_tx_stat_ulong_sysctl, "LU", "# of hyper-v sending failure"); 5021 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txdma_failed", 5022 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5023 __offsetof(struct hn_tx_ring, hn_txdma_failed), 5024 hn_tx_stat_ulong_sysctl, "LU", "# of TX DMA failure"); 5025 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_flush_failed", 5026 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5027 __offsetof(struct hn_tx_ring, hn_flush_failed), 5028 hn_tx_stat_ulong_sysctl, "LU", 5029 "# of packet transmission aggregation flush failure"); 5030 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_collapsed", 5031 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5032 __offsetof(struct hn_tx_ring, hn_tx_collapsed), 5033 hn_tx_stat_ulong_sysctl, "LU", "# of TX mbuf collapsed"); 5034 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney", 5035 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5036 __offsetof(struct hn_tx_ring, hn_tx_chimney), 5037 hn_tx_stat_ulong_sysctl, "LU", "# of chimney send"); 5038 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_tried", 5039 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5040 __offsetof(struct hn_tx_ring, hn_tx_chimney_tried), 5041 hn_tx_stat_ulong_sysctl, "LU", "# of chimney send tries"); 5042 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt", 5043 CTLFLAG_RD, &sc->hn_tx_ring[0].hn_txdesc_cnt, 0, 5044 "# of total TX descs"); 5045 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max", 5046 CTLFLAG_RD, &sc->hn_chim_szmax, 0, 5047 "Chimney send packet size upper boundary"); 5048 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size", 5049 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 5050 hn_chim_size_sysctl, "I", "Chimney send packet size limit"); 5051 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "direct_tx_size", 5052 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5053 __offsetof(struct hn_tx_ring, hn_direct_tx_size), 5054 hn_tx_conf_int_sysctl, "I", 5055 "Size of the packet for direct transmission"); 5056 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "sched_tx", 5057 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5058 __offsetof(struct hn_tx_ring, hn_sched_tx), 5059 hn_tx_conf_int_sysctl, "I", 5060 "Always schedule transmission " 5061 "instead of doing direct transmission"); 5062 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_cnt", 5063 CTLFLAG_RD, &sc->hn_tx_ring_cnt, 0, "# created TX rings"); 5064 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_inuse", 5065 CTLFLAG_RD, &sc->hn_tx_ring_inuse, 0, "# used TX rings"); 5066 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "agg_szmax", 5067 CTLFLAG_RD, &sc->hn_tx_ring[0].hn_agg_szmax, 0, 5068 "Applied packet transmission aggregation size"); 5069 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pktmax", 5070 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 5071 hn_txagg_pktmax_sysctl, "I", 5072 "Applied packet transmission aggregation packets"); 5073 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_align", 5074 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 5075 hn_txagg_align_sysctl, "I", 5076 "Applied packet transmission aggregation alignment"); 5077 5078 return 0; 5079 } 5080 5081 static void 5082 hn_set_chim_size(struct hn_softc *sc, int chim_size) 5083 { 5084 int i; 5085 5086 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 5087 sc->hn_tx_ring[i].hn_chim_size = chim_size; 5088 } 5089 5090 static void 5091 hn_set_tso_maxsize(struct hn_softc *sc, int tso_maxlen, int mtu) 5092 { 5093 struct ifnet *ifp = sc->hn_ifp; 5094 u_int hw_tsomax; 5095 int tso_minlen; 5096 5097 HN_LOCK_ASSERT(sc); 5098 5099 if ((ifp->if_capabilities & (IFCAP_TSO4 | IFCAP_TSO6)) == 0) 5100 return; 5101 5102 KASSERT(sc->hn_ndis_tso_sgmin >= 2, 5103 ("invalid NDIS tso sgmin %d", sc->hn_ndis_tso_sgmin)); 5104 tso_minlen = sc->hn_ndis_tso_sgmin * mtu; 5105 5106 KASSERT(sc->hn_ndis_tso_szmax >= tso_minlen && 5107 sc->hn_ndis_tso_szmax <= IP_MAXPACKET, 5108 ("invalid NDIS tso szmax %d", sc->hn_ndis_tso_szmax)); 5109 5110 if (tso_maxlen < tso_minlen) 5111 tso_maxlen = tso_minlen; 5112 else if (tso_maxlen > IP_MAXPACKET) 5113 tso_maxlen = IP_MAXPACKET; 5114 if (tso_maxlen > sc->hn_ndis_tso_szmax) 5115 tso_maxlen = sc->hn_ndis_tso_szmax; 5116 hw_tsomax = tso_maxlen - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); 5117 5118 if (hn_xpnt_vf_isready(sc)) { 5119 if (hw_tsomax > sc->hn_vf_ifp->if_hw_tsomax) 5120 hw_tsomax = sc->hn_vf_ifp->if_hw_tsomax; 5121 } 5122 ifp->if_hw_tsomax = hw_tsomax; 5123 if (bootverbose) 5124 if_printf(ifp, "TSO size max %u\n", ifp->if_hw_tsomax); 5125 } 5126 5127 static void 5128 hn_fixup_tx_data(struct hn_softc *sc) 5129 { 5130 uint64_t csum_assist; 5131 int i; 5132 5133 hn_set_chim_size(sc, sc->hn_chim_szmax); 5134 if (hn_tx_chimney_size > 0 && 5135 hn_tx_chimney_size < sc->hn_chim_szmax) 5136 hn_set_chim_size(sc, hn_tx_chimney_size); 5137 5138 csum_assist = 0; 5139 if (sc->hn_caps & HN_CAP_IPCS) 5140 csum_assist |= CSUM_IP; 5141 if (sc->hn_caps & HN_CAP_TCP4CS) 5142 csum_assist |= CSUM_IP_TCP; 5143 if (sc->hn_caps & HN_CAP_UDP4CS) 5144 csum_assist |= CSUM_IP_UDP; 5145 if (sc->hn_caps & HN_CAP_TCP6CS) 5146 csum_assist |= CSUM_IP6_TCP; 5147 if (sc->hn_caps & HN_CAP_UDP6CS) 5148 csum_assist |= CSUM_IP6_UDP; 5149 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 5150 sc->hn_tx_ring[i].hn_csum_assist = csum_assist; 5151 5152 if (sc->hn_caps & HN_CAP_HASHVAL) { 5153 /* 5154 * Support HASHVAL pktinfo on TX path. 5155 */ 5156 if (bootverbose) 5157 if_printf(sc->hn_ifp, "support HASHVAL pktinfo\n"); 5158 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 5159 sc->hn_tx_ring[i].hn_tx_flags |= HN_TX_FLAG_HASHVAL; 5160 } 5161 } 5162 5163 static void 5164 hn_destroy_tx_data(struct hn_softc *sc) 5165 { 5166 int i; 5167 5168 if (sc->hn_chim != NULL) { 5169 if ((sc->hn_flags & HN_FLAG_CHIM_REF) == 0) { 5170 hyperv_dmamem_free(&sc->hn_chim_dma, sc->hn_chim); 5171 } else { 5172 device_printf(sc->hn_dev, 5173 "chimney sending buffer is referenced"); 5174 } 5175 sc->hn_chim = NULL; 5176 } 5177 5178 if (sc->hn_tx_ring_cnt == 0) 5179 return; 5180 5181 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 5182 hn_tx_ring_destroy(&sc->hn_tx_ring[i]); 5183 5184 free(sc->hn_tx_ring, M_DEVBUF); 5185 sc->hn_tx_ring = NULL; 5186 5187 sc->hn_tx_ring_cnt = 0; 5188 sc->hn_tx_ring_inuse = 0; 5189 } 5190 5191 #ifdef HN_IFSTART_SUPPORT 5192 5193 static void 5194 hn_start_taskfunc(void *xtxr, int pending __unused) 5195 { 5196 struct hn_tx_ring *txr = xtxr; 5197 5198 mtx_lock(&txr->hn_tx_lock); 5199 hn_start_locked(txr, 0); 5200 mtx_unlock(&txr->hn_tx_lock); 5201 } 5202 5203 static int 5204 hn_start_locked(struct hn_tx_ring *txr, int len) 5205 { 5206 struct hn_softc *sc = txr->hn_sc; 5207 struct ifnet *ifp = sc->hn_ifp; 5208 int sched = 0; 5209 5210 KASSERT(hn_use_if_start, 5211 ("hn_start_locked is called, when if_start is disabled")); 5212 KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring")); 5213 mtx_assert(&txr->hn_tx_lock, MA_OWNED); 5214 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc")); 5215 5216 if (__predict_false(txr->hn_suspended)) 5217 return (0); 5218 5219 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != 5220 IFF_DRV_RUNNING) 5221 return (0); 5222 5223 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { 5224 struct hn_txdesc *txd; 5225 struct mbuf *m_head; 5226 int error; 5227 5228 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); 5229 if (m_head == NULL) 5230 break; 5231 5232 if (len > 0 && m_head->m_pkthdr.len > len) { 5233 /* 5234 * This sending could be time consuming; let callers 5235 * dispatch this packet sending (and sending of any 5236 * following up packets) to tx taskqueue. 5237 */ 5238 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 5239 sched = 1; 5240 break; 5241 } 5242 5243 #if defined(INET6) || defined(INET) 5244 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 5245 m_head = hn_tso_fixup(m_head); 5246 if (__predict_false(m_head == NULL)) { 5247 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 5248 continue; 5249 } 5250 } 5251 #endif 5252 5253 txd = hn_txdesc_get(txr); 5254 if (txd == NULL) { 5255 txr->hn_no_txdescs++; 5256 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 5257 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 5258 break; 5259 } 5260 5261 error = hn_encap(ifp, txr, txd, &m_head); 5262 if (error) { 5263 /* Both txd and m_head are freed */ 5264 KASSERT(txr->hn_agg_txd == NULL, 5265 ("encap failed w/ pending aggregating txdesc")); 5266 continue; 5267 } 5268 5269 if (txr->hn_agg_pktleft == 0) { 5270 if (txr->hn_agg_txd != NULL) { 5271 KASSERT(m_head == NULL, 5272 ("pending mbuf for aggregating txdesc")); 5273 error = hn_flush_txagg(ifp, txr); 5274 if (__predict_false(error)) { 5275 atomic_set_int(&ifp->if_drv_flags, 5276 IFF_DRV_OACTIVE); 5277 break; 5278 } 5279 } else { 5280 KASSERT(m_head != NULL, ("mbuf was freed")); 5281 error = hn_txpkt(ifp, txr, txd); 5282 if (__predict_false(error)) { 5283 /* txd is freed, but m_head is not */ 5284 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 5285 atomic_set_int(&ifp->if_drv_flags, 5286 IFF_DRV_OACTIVE); 5287 break; 5288 } 5289 } 5290 } 5291 #ifdef INVARIANTS 5292 else { 5293 KASSERT(txr->hn_agg_txd != NULL, 5294 ("no aggregating txdesc")); 5295 KASSERT(m_head == NULL, 5296 ("pending mbuf for aggregating txdesc")); 5297 } 5298 #endif 5299 } 5300 5301 /* Flush pending aggerated transmission. */ 5302 if (txr->hn_agg_txd != NULL) 5303 hn_flush_txagg(ifp, txr); 5304 return (sched); 5305 } 5306 5307 static void 5308 hn_start(struct ifnet *ifp) 5309 { 5310 struct hn_softc *sc = ifp->if_softc; 5311 struct hn_tx_ring *txr = &sc->hn_tx_ring[0]; 5312 5313 if (txr->hn_sched_tx) 5314 goto do_sched; 5315 5316 if (mtx_trylock(&txr->hn_tx_lock)) { 5317 int sched; 5318 5319 sched = hn_start_locked(txr, txr->hn_direct_tx_size); 5320 mtx_unlock(&txr->hn_tx_lock); 5321 if (!sched) 5322 return; 5323 } 5324 do_sched: 5325 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); 5326 } 5327 5328 static void 5329 hn_start_txeof_taskfunc(void *xtxr, int pending __unused) 5330 { 5331 struct hn_tx_ring *txr = xtxr; 5332 5333 mtx_lock(&txr->hn_tx_lock); 5334 atomic_clear_int(&txr->hn_sc->hn_ifp->if_drv_flags, IFF_DRV_OACTIVE); 5335 hn_start_locked(txr, 0); 5336 mtx_unlock(&txr->hn_tx_lock); 5337 } 5338 5339 static void 5340 hn_start_txeof(struct hn_tx_ring *txr) 5341 { 5342 struct hn_softc *sc = txr->hn_sc; 5343 struct ifnet *ifp = sc->hn_ifp; 5344 5345 KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring")); 5346 5347 if (txr->hn_sched_tx) 5348 goto do_sched; 5349 5350 if (mtx_trylock(&txr->hn_tx_lock)) { 5351 int sched; 5352 5353 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 5354 sched = hn_start_locked(txr, txr->hn_direct_tx_size); 5355 mtx_unlock(&txr->hn_tx_lock); 5356 if (sched) { 5357 taskqueue_enqueue(txr->hn_tx_taskq, 5358 &txr->hn_tx_task); 5359 } 5360 } else { 5361 do_sched: 5362 /* 5363 * Release the OACTIVE earlier, with the hope, that 5364 * others could catch up. The task will clear the 5365 * flag again with the hn_tx_lock to avoid possible 5366 * races. 5367 */ 5368 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 5369 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); 5370 } 5371 } 5372 5373 #endif /* HN_IFSTART_SUPPORT */ 5374 5375 static int 5376 hn_xmit(struct hn_tx_ring *txr, int len) 5377 { 5378 struct hn_softc *sc = txr->hn_sc; 5379 struct ifnet *ifp = sc->hn_ifp; 5380 struct mbuf *m_head; 5381 int sched = 0; 5382 5383 mtx_assert(&txr->hn_tx_lock, MA_OWNED); 5384 #ifdef HN_IFSTART_SUPPORT 5385 KASSERT(hn_use_if_start == 0, 5386 ("hn_xmit is called, when if_start is enabled")); 5387 #endif 5388 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc")); 5389 5390 if (__predict_false(txr->hn_suspended)) 5391 return (0); 5392 5393 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || txr->hn_oactive) 5394 return (0); 5395 5396 while ((m_head = drbr_peek(ifp, txr->hn_mbuf_br)) != NULL) { 5397 struct hn_txdesc *txd; 5398 int error; 5399 5400 if (len > 0 && m_head->m_pkthdr.len > len) { 5401 /* 5402 * This sending could be time consuming; let callers 5403 * dispatch this packet sending (and sending of any 5404 * following up packets) to tx taskqueue. 5405 */ 5406 drbr_putback(ifp, txr->hn_mbuf_br, m_head); 5407 sched = 1; 5408 break; 5409 } 5410 5411 txd = hn_txdesc_get(txr); 5412 if (txd == NULL) { 5413 txr->hn_no_txdescs++; 5414 drbr_putback(ifp, txr->hn_mbuf_br, m_head); 5415 txr->hn_oactive = 1; 5416 break; 5417 } 5418 5419 error = hn_encap(ifp, txr, txd, &m_head); 5420 if (error) { 5421 /* Both txd and m_head are freed; discard */ 5422 KASSERT(txr->hn_agg_txd == NULL, 5423 ("encap failed w/ pending aggregating txdesc")); 5424 drbr_advance(ifp, txr->hn_mbuf_br); 5425 continue; 5426 } 5427 5428 if (txr->hn_agg_pktleft == 0) { 5429 if (txr->hn_agg_txd != NULL) { 5430 KASSERT(m_head == NULL, 5431 ("pending mbuf for aggregating txdesc")); 5432 error = hn_flush_txagg(ifp, txr); 5433 if (__predict_false(error)) { 5434 txr->hn_oactive = 1; 5435 break; 5436 } 5437 } else { 5438 KASSERT(m_head != NULL, ("mbuf was freed")); 5439 error = hn_txpkt(ifp, txr, txd); 5440 if (__predict_false(error)) { 5441 /* txd is freed, but m_head is not */ 5442 drbr_putback(ifp, txr->hn_mbuf_br, 5443 m_head); 5444 txr->hn_oactive = 1; 5445 break; 5446 } 5447 } 5448 } 5449 #ifdef INVARIANTS 5450 else { 5451 KASSERT(txr->hn_agg_txd != NULL, 5452 ("no aggregating txdesc")); 5453 KASSERT(m_head == NULL, 5454 ("pending mbuf for aggregating txdesc")); 5455 } 5456 #endif 5457 5458 /* Sent */ 5459 drbr_advance(ifp, txr->hn_mbuf_br); 5460 } 5461 5462 /* Flush pending aggerated transmission. */ 5463 if (txr->hn_agg_txd != NULL) 5464 hn_flush_txagg(ifp, txr); 5465 return (sched); 5466 } 5467 5468 static int 5469 hn_transmit(struct ifnet *ifp, struct mbuf *m) 5470 { 5471 struct hn_softc *sc = ifp->if_softc; 5472 struct hn_tx_ring *txr; 5473 int error, idx = 0; 5474 5475 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) { 5476 struct rm_priotracker pt; 5477 5478 rm_rlock(&sc->hn_vf_lock, &pt); 5479 if (__predict_true(sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) { 5480 struct mbuf *m_bpf = NULL; 5481 int obytes, omcast; 5482 5483 obytes = m->m_pkthdr.len; 5484 if (m->m_flags & M_MCAST) 5485 omcast = 1; 5486 5487 if (sc->hn_xvf_flags & HN_XVFFLAG_ACCBPF) { 5488 if (bpf_peers_present(ifp->if_bpf)) { 5489 m_bpf = m_copypacket(m, M_NOWAIT); 5490 if (m_bpf == NULL) { 5491 /* 5492 * Failed to grab a shallow 5493 * copy; tap now. 5494 */ 5495 ETHER_BPF_MTAP(ifp, m); 5496 } 5497 } 5498 } else { 5499 ETHER_BPF_MTAP(ifp, m); 5500 } 5501 5502 error = sc->hn_vf_ifp->if_transmit(sc->hn_vf_ifp, m); 5503 rm_runlock(&sc->hn_vf_lock, &pt); 5504 5505 if (m_bpf != NULL) { 5506 if (!error) 5507 ETHER_BPF_MTAP(ifp, m_bpf); 5508 m_freem(m_bpf); 5509 } 5510 5511 if (error == ENOBUFS) { 5512 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); 5513 } else if (error) { 5514 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 5515 } else { 5516 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 5517 if_inc_counter(ifp, IFCOUNTER_OBYTES, obytes); 5518 if (omcast) { 5519 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 5520 omcast); 5521 } 5522 } 5523 return (error); 5524 } 5525 rm_runlock(&sc->hn_vf_lock, &pt); 5526 } 5527 5528 #if defined(INET6) || defined(INET) 5529 /* 5530 * Perform TSO packet header fixup now, since the TSO 5531 * packet header should be cache-hot. 5532 */ 5533 if (m->m_pkthdr.csum_flags & CSUM_TSO) { 5534 m = hn_tso_fixup(m); 5535 if (__predict_false(m == NULL)) { 5536 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 5537 return EIO; 5538 } 5539 } 5540 #endif 5541 5542 /* 5543 * Select the TX ring based on flowid 5544 */ 5545 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 5546 #ifdef RSS 5547 uint32_t bid; 5548 5549 if (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m), 5550 &bid) == 0) 5551 idx = bid % sc->hn_tx_ring_inuse; 5552 else 5553 #endif 5554 { 5555 #if defined(INET6) || defined(INET) 5556 int tcpsyn = 0; 5557 5558 if (m->m_pkthdr.len < 128 && 5559 (m->m_pkthdr.csum_flags & 5560 (CSUM_IP_TCP | CSUM_IP6_TCP)) && 5561 (m->m_pkthdr.csum_flags & CSUM_TSO) == 0) { 5562 m = hn_check_tcpsyn(m, &tcpsyn); 5563 if (__predict_false(m == NULL)) { 5564 if_inc_counter(ifp, 5565 IFCOUNTER_OERRORS, 1); 5566 return (EIO); 5567 } 5568 } 5569 #else 5570 const int tcpsyn = 0; 5571 #endif 5572 if (tcpsyn) 5573 idx = 0; 5574 else 5575 idx = m->m_pkthdr.flowid % sc->hn_tx_ring_inuse; 5576 } 5577 } 5578 txr = &sc->hn_tx_ring[idx]; 5579 5580 error = drbr_enqueue(ifp, txr->hn_mbuf_br, m); 5581 if (error) { 5582 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); 5583 return error; 5584 } 5585 5586 if (txr->hn_oactive) 5587 return 0; 5588 5589 if (txr->hn_sched_tx) 5590 goto do_sched; 5591 5592 if (mtx_trylock(&txr->hn_tx_lock)) { 5593 int sched; 5594 5595 sched = hn_xmit(txr, txr->hn_direct_tx_size); 5596 mtx_unlock(&txr->hn_tx_lock); 5597 if (!sched) 5598 return 0; 5599 } 5600 do_sched: 5601 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); 5602 return 0; 5603 } 5604 5605 static void 5606 hn_tx_ring_qflush(struct hn_tx_ring *txr) 5607 { 5608 struct mbuf *m; 5609 5610 mtx_lock(&txr->hn_tx_lock); 5611 while ((m = buf_ring_dequeue_sc(txr->hn_mbuf_br)) != NULL) 5612 m_freem(m); 5613 mtx_unlock(&txr->hn_tx_lock); 5614 } 5615 5616 static void 5617 hn_xmit_qflush(struct ifnet *ifp) 5618 { 5619 struct hn_softc *sc = ifp->if_softc; 5620 struct rm_priotracker pt; 5621 int i; 5622 5623 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) 5624 hn_tx_ring_qflush(&sc->hn_tx_ring[i]); 5625 if_qflush(ifp); 5626 5627 rm_rlock(&sc->hn_vf_lock, &pt); 5628 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) 5629 sc->hn_vf_ifp->if_qflush(sc->hn_vf_ifp); 5630 rm_runlock(&sc->hn_vf_lock, &pt); 5631 } 5632 5633 static void 5634 hn_xmit_txeof(struct hn_tx_ring *txr) 5635 { 5636 5637 if (txr->hn_sched_tx) 5638 goto do_sched; 5639 5640 if (mtx_trylock(&txr->hn_tx_lock)) { 5641 int sched; 5642 5643 txr->hn_oactive = 0; 5644 sched = hn_xmit(txr, txr->hn_direct_tx_size); 5645 mtx_unlock(&txr->hn_tx_lock); 5646 if (sched) { 5647 taskqueue_enqueue(txr->hn_tx_taskq, 5648 &txr->hn_tx_task); 5649 } 5650 } else { 5651 do_sched: 5652 /* 5653 * Release the oactive earlier, with the hope, that 5654 * others could catch up. The task will clear the 5655 * oactive again with the hn_tx_lock to avoid possible 5656 * races. 5657 */ 5658 txr->hn_oactive = 0; 5659 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); 5660 } 5661 } 5662 5663 static void 5664 hn_xmit_taskfunc(void *xtxr, int pending __unused) 5665 { 5666 struct hn_tx_ring *txr = xtxr; 5667 5668 mtx_lock(&txr->hn_tx_lock); 5669 hn_xmit(txr, 0); 5670 mtx_unlock(&txr->hn_tx_lock); 5671 } 5672 5673 static void 5674 hn_xmit_txeof_taskfunc(void *xtxr, int pending __unused) 5675 { 5676 struct hn_tx_ring *txr = xtxr; 5677 5678 mtx_lock(&txr->hn_tx_lock); 5679 txr->hn_oactive = 0; 5680 hn_xmit(txr, 0); 5681 mtx_unlock(&txr->hn_tx_lock); 5682 } 5683 5684 static int 5685 hn_chan_attach(struct hn_softc *sc, struct vmbus_channel *chan) 5686 { 5687 struct vmbus_chan_br cbr; 5688 struct hn_rx_ring *rxr; 5689 struct hn_tx_ring *txr = NULL; 5690 int idx, error; 5691 5692 idx = vmbus_chan_subidx(chan); 5693 5694 /* 5695 * Link this channel to RX/TX ring. 5696 */ 5697 KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse, 5698 ("invalid channel index %d, should > 0 && < %d", 5699 idx, sc->hn_rx_ring_inuse)); 5700 rxr = &sc->hn_rx_ring[idx]; 5701 KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED) == 0, 5702 ("RX ring %d already attached", idx)); 5703 rxr->hn_rx_flags |= HN_RX_FLAG_ATTACHED; 5704 rxr->hn_chan = chan; 5705 5706 if (bootverbose) { 5707 if_printf(sc->hn_ifp, "link RX ring %d to chan%u\n", 5708 idx, vmbus_chan_id(chan)); 5709 } 5710 5711 if (idx < sc->hn_tx_ring_inuse) { 5712 txr = &sc->hn_tx_ring[idx]; 5713 KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED) == 0, 5714 ("TX ring %d already attached", idx)); 5715 txr->hn_tx_flags |= HN_TX_FLAG_ATTACHED; 5716 5717 txr->hn_chan = chan; 5718 if (bootverbose) { 5719 if_printf(sc->hn_ifp, "link TX ring %d to chan%u\n", 5720 idx, vmbus_chan_id(chan)); 5721 } 5722 } 5723 5724 /* Bind this channel to a proper CPU. */ 5725 vmbus_chan_cpu_set(chan, HN_RING_IDX2CPU(sc, idx)); 5726 5727 /* 5728 * Open this channel 5729 */ 5730 cbr.cbr = rxr->hn_br; 5731 cbr.cbr_paddr = rxr->hn_br_dma.hv_paddr; 5732 cbr.cbr_txsz = HN_TXBR_SIZE; 5733 cbr.cbr_rxsz = HN_RXBR_SIZE; 5734 error = vmbus_chan_open_br(chan, &cbr, NULL, 0, hn_chan_callback, rxr); 5735 if (error) { 5736 if (error == EISCONN) { 5737 if_printf(sc->hn_ifp, "bufring is connected after " 5738 "chan%u open failure\n", vmbus_chan_id(chan)); 5739 rxr->hn_rx_flags |= HN_RX_FLAG_BR_REF; 5740 } else { 5741 if_printf(sc->hn_ifp, "open chan%u failed: %d\n", 5742 vmbus_chan_id(chan), error); 5743 } 5744 } 5745 return (error); 5746 } 5747 5748 static void 5749 hn_chan_detach(struct hn_softc *sc, struct vmbus_channel *chan) 5750 { 5751 struct hn_rx_ring *rxr; 5752 int idx, error; 5753 5754 idx = vmbus_chan_subidx(chan); 5755 5756 /* 5757 * Link this channel to RX/TX ring. 5758 */ 5759 KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse, 5760 ("invalid channel index %d, should > 0 && < %d", 5761 idx, sc->hn_rx_ring_inuse)); 5762 rxr = &sc->hn_rx_ring[idx]; 5763 KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED), 5764 ("RX ring %d is not attached", idx)); 5765 rxr->hn_rx_flags &= ~HN_RX_FLAG_ATTACHED; 5766 5767 if (idx < sc->hn_tx_ring_inuse) { 5768 struct hn_tx_ring *txr = &sc->hn_tx_ring[idx]; 5769 5770 KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED), 5771 ("TX ring %d is not attached attached", idx)); 5772 txr->hn_tx_flags &= ~HN_TX_FLAG_ATTACHED; 5773 } 5774 5775 /* 5776 * Close this channel. 5777 * 5778 * NOTE: 5779 * Channel closing does _not_ destroy the target channel. 5780 */ 5781 error = vmbus_chan_close_direct(chan); 5782 if (error == EISCONN) { 5783 if_printf(sc->hn_ifp, "chan%u bufring is connected " 5784 "after being closed\n", vmbus_chan_id(chan)); 5785 rxr->hn_rx_flags |= HN_RX_FLAG_BR_REF; 5786 } else if (error) { 5787 if_printf(sc->hn_ifp, "chan%u close failed: %d\n", 5788 vmbus_chan_id(chan), error); 5789 } 5790 } 5791 5792 static int 5793 hn_attach_subchans(struct hn_softc *sc) 5794 { 5795 struct vmbus_channel **subchans; 5796 int subchan_cnt = sc->hn_rx_ring_inuse - 1; 5797 int i, error = 0; 5798 5799 KASSERT(subchan_cnt > 0, ("no sub-channels")); 5800 5801 /* Attach the sub-channels. */ 5802 subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt); 5803 for (i = 0; i < subchan_cnt; ++i) { 5804 int error1; 5805 5806 error1 = hn_chan_attach(sc, subchans[i]); 5807 if (error1) { 5808 error = error1; 5809 /* Move on; all channels will be detached later. */ 5810 } 5811 } 5812 vmbus_subchan_rel(subchans, subchan_cnt); 5813 5814 if (error) { 5815 if_printf(sc->hn_ifp, "sub-channels attach failed: %d\n", error); 5816 } else { 5817 if (bootverbose) { 5818 if_printf(sc->hn_ifp, "%d sub-channels attached\n", 5819 subchan_cnt); 5820 } 5821 } 5822 return (error); 5823 } 5824 5825 static void 5826 hn_detach_allchans(struct hn_softc *sc) 5827 { 5828 struct vmbus_channel **subchans; 5829 int subchan_cnt = sc->hn_rx_ring_inuse - 1; 5830 int i; 5831 5832 if (subchan_cnt == 0) 5833 goto back; 5834 5835 /* Detach the sub-channels. */ 5836 subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt); 5837 for (i = 0; i < subchan_cnt; ++i) 5838 hn_chan_detach(sc, subchans[i]); 5839 vmbus_subchan_rel(subchans, subchan_cnt); 5840 5841 back: 5842 /* 5843 * Detach the primary channel, _after_ all sub-channels 5844 * are detached. 5845 */ 5846 hn_chan_detach(sc, sc->hn_prichan); 5847 5848 /* Wait for sub-channels to be destroyed, if any. */ 5849 vmbus_subchan_drain(sc->hn_prichan); 5850 5851 #ifdef INVARIANTS 5852 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 5853 KASSERT((sc->hn_rx_ring[i].hn_rx_flags & 5854 HN_RX_FLAG_ATTACHED) == 0, 5855 ("%dth RX ring is still attached", i)); 5856 } 5857 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 5858 KASSERT((sc->hn_tx_ring[i].hn_tx_flags & 5859 HN_TX_FLAG_ATTACHED) == 0, 5860 ("%dth TX ring is still attached", i)); 5861 } 5862 #endif 5863 } 5864 5865 static int 5866 hn_synth_alloc_subchans(struct hn_softc *sc, int *nsubch) 5867 { 5868 struct vmbus_channel **subchans; 5869 int nchan, rxr_cnt, error; 5870 5871 nchan = *nsubch + 1; 5872 if (nchan == 1) { 5873 /* 5874 * Multiple RX/TX rings are not requested. 5875 */ 5876 *nsubch = 0; 5877 return (0); 5878 } 5879 5880 /* 5881 * Query RSS capabilities, e.g. # of RX rings, and # of indirect 5882 * table entries. 5883 */ 5884 error = hn_rndis_query_rsscaps(sc, &rxr_cnt); 5885 if (error) { 5886 /* No RSS; this is benign. */ 5887 *nsubch = 0; 5888 return (0); 5889 } 5890 if (bootverbose) { 5891 if_printf(sc->hn_ifp, "RX rings offered %u, requested %d\n", 5892 rxr_cnt, nchan); 5893 } 5894 5895 if (nchan > rxr_cnt) 5896 nchan = rxr_cnt; 5897 if (nchan == 1) { 5898 if_printf(sc->hn_ifp, "only 1 channel is supported, no vRSS\n"); 5899 *nsubch = 0; 5900 return (0); 5901 } 5902 5903 /* 5904 * Allocate sub-channels from NVS. 5905 */ 5906 *nsubch = nchan - 1; 5907 error = hn_nvs_alloc_subchans(sc, nsubch); 5908 if (error || *nsubch == 0) { 5909 /* Failed to allocate sub-channels. */ 5910 *nsubch = 0; 5911 return (0); 5912 } 5913 5914 /* 5915 * Wait for all sub-channels to become ready before moving on. 5916 */ 5917 subchans = vmbus_subchan_get(sc->hn_prichan, *nsubch); 5918 vmbus_subchan_rel(subchans, *nsubch); 5919 return (0); 5920 } 5921 5922 static bool 5923 hn_synth_attachable(const struct hn_softc *sc) 5924 { 5925 int i; 5926 5927 if (sc->hn_flags & HN_FLAG_ERRORS) 5928 return (false); 5929 5930 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 5931 const struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 5932 5933 if (rxr->hn_rx_flags & HN_RX_FLAG_BR_REF) 5934 return (false); 5935 } 5936 return (true); 5937 } 5938 5939 /* 5940 * Make sure that the RX filter is zero after the successful 5941 * RNDIS initialization. 5942 * 5943 * NOTE: 5944 * Under certain conditions on certain versions of Hyper-V, 5945 * the RNDIS rxfilter is _not_ zero on the hypervisor side 5946 * after the successful RNDIS initialization, which breaks 5947 * the assumption of any following code (well, it breaks the 5948 * RNDIS API contract actually). Clear the RNDIS rxfilter 5949 * explicitly, drain packets sneaking through, and drain the 5950 * interrupt taskqueues scheduled due to the stealth packets. 5951 */ 5952 static void 5953 hn_rndis_init_fixat(struct hn_softc *sc, int nchan) 5954 { 5955 5956 hn_disable_rx(sc); 5957 hn_drain_rxtx(sc, nchan); 5958 } 5959 5960 static int 5961 hn_synth_attach(struct hn_softc *sc, int mtu) 5962 { 5963 #define ATTACHED_NVS 0x0002 5964 #define ATTACHED_RNDIS 0x0004 5965 5966 struct ndis_rssprm_toeplitz *rss = &sc->hn_rss; 5967 int error, nsubch, nchan = 1, i, rndis_inited; 5968 uint32_t old_caps, attached = 0; 5969 5970 KASSERT((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0, 5971 ("synthetic parts were attached")); 5972 5973 if (!hn_synth_attachable(sc)) 5974 return (ENXIO); 5975 5976 /* Save capabilities for later verification. */ 5977 old_caps = sc->hn_caps; 5978 sc->hn_caps = 0; 5979 5980 /* Clear RSS stuffs. */ 5981 sc->hn_rss_ind_size = 0; 5982 sc->hn_rss_hash = 0; 5983 5984 /* 5985 * Attach the primary channel _before_ attaching NVS and RNDIS. 5986 */ 5987 error = hn_chan_attach(sc, sc->hn_prichan); 5988 if (error) 5989 goto failed; 5990 5991 /* 5992 * Attach NVS. 5993 */ 5994 error = hn_nvs_attach(sc, mtu); 5995 if (error) 5996 goto failed; 5997 attached |= ATTACHED_NVS; 5998 5999 /* 6000 * Attach RNDIS _after_ NVS is attached. 6001 */ 6002 error = hn_rndis_attach(sc, mtu, &rndis_inited); 6003 if (rndis_inited) 6004 attached |= ATTACHED_RNDIS; 6005 if (error) 6006 goto failed; 6007 6008 /* 6009 * Make sure capabilities are not changed. 6010 */ 6011 if (device_is_attached(sc->hn_dev) && old_caps != sc->hn_caps) { 6012 if_printf(sc->hn_ifp, "caps mismatch old 0x%08x, new 0x%08x\n", 6013 old_caps, sc->hn_caps); 6014 error = ENXIO; 6015 goto failed; 6016 } 6017 6018 /* 6019 * Allocate sub-channels for multi-TX/RX rings. 6020 * 6021 * NOTE: 6022 * The # of RX rings that can be used is equivalent to the # of 6023 * channels to be requested. 6024 */ 6025 nsubch = sc->hn_rx_ring_cnt - 1; 6026 error = hn_synth_alloc_subchans(sc, &nsubch); 6027 if (error) 6028 goto failed; 6029 /* NOTE: _Full_ synthetic parts detach is required now. */ 6030 sc->hn_flags |= HN_FLAG_SYNTH_ATTACHED; 6031 6032 /* 6033 * Set the # of TX/RX rings that could be used according to 6034 * the # of channels that NVS offered. 6035 */ 6036 nchan = nsubch + 1; 6037 hn_set_ring_inuse(sc, nchan); 6038 if (nchan == 1) { 6039 /* Only the primary channel can be used; done */ 6040 goto back; 6041 } 6042 6043 /* 6044 * Attach the sub-channels. 6045 * 6046 * NOTE: hn_set_ring_inuse() _must_ have been called. 6047 */ 6048 error = hn_attach_subchans(sc); 6049 if (error) 6050 goto failed; 6051 6052 /* 6053 * Configure RSS key and indirect table _after_ all sub-channels 6054 * are attached. 6055 */ 6056 if ((sc->hn_flags & HN_FLAG_HAS_RSSKEY) == 0) { 6057 /* 6058 * RSS key is not set yet; set it to the default RSS key. 6059 */ 6060 if (bootverbose) 6061 if_printf(sc->hn_ifp, "setup default RSS key\n"); 6062 #ifdef RSS 6063 rss_getkey(rss->rss_key); 6064 #else 6065 memcpy(rss->rss_key, hn_rss_key_default, sizeof(rss->rss_key)); 6066 #endif 6067 sc->hn_flags |= HN_FLAG_HAS_RSSKEY; 6068 } 6069 6070 if ((sc->hn_flags & HN_FLAG_HAS_RSSIND) == 0) { 6071 /* 6072 * RSS indirect table is not set yet; set it up in round- 6073 * robin fashion. 6074 */ 6075 if (bootverbose) { 6076 if_printf(sc->hn_ifp, "setup default RSS indirect " 6077 "table\n"); 6078 } 6079 for (i = 0; i < NDIS_HASH_INDCNT; ++i) { 6080 uint32_t subidx; 6081 6082 #ifdef RSS 6083 subidx = rss_get_indirection_to_bucket(i); 6084 #else 6085 subidx = i; 6086 #endif 6087 rss->rss_ind[i] = subidx % nchan; 6088 } 6089 sc->hn_flags |= HN_FLAG_HAS_RSSIND; 6090 } else { 6091 /* 6092 * # of usable channels may be changed, so we have to 6093 * make sure that all entries in RSS indirect table 6094 * are valid. 6095 * 6096 * NOTE: hn_set_ring_inuse() _must_ have been called. 6097 */ 6098 hn_rss_ind_fixup(sc); 6099 } 6100 6101 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE); 6102 if (error) 6103 goto failed; 6104 back: 6105 /* 6106 * Fixup transmission aggregation setup. 6107 */ 6108 hn_set_txagg(sc); 6109 hn_rndis_init_fixat(sc, nchan); 6110 return (0); 6111 6112 failed: 6113 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) { 6114 hn_rndis_init_fixat(sc, nchan); 6115 hn_synth_detach(sc); 6116 } else { 6117 if (attached & ATTACHED_RNDIS) { 6118 hn_rndis_init_fixat(sc, nchan); 6119 hn_rndis_detach(sc); 6120 } 6121 if (attached & ATTACHED_NVS) 6122 hn_nvs_detach(sc); 6123 hn_chan_detach(sc, sc->hn_prichan); 6124 /* Restore old capabilities. */ 6125 sc->hn_caps = old_caps; 6126 } 6127 return (error); 6128 6129 #undef ATTACHED_RNDIS 6130 #undef ATTACHED_NVS 6131 } 6132 6133 /* 6134 * NOTE: 6135 * The interface must have been suspended though hn_suspend(), before 6136 * this function get called. 6137 */ 6138 static void 6139 hn_synth_detach(struct hn_softc *sc) 6140 { 6141 6142 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, 6143 ("synthetic parts were not attached")); 6144 6145 /* Detach the RNDIS first. */ 6146 hn_rndis_detach(sc); 6147 6148 /* Detach NVS. */ 6149 hn_nvs_detach(sc); 6150 6151 /* Detach all of the channels. */ 6152 hn_detach_allchans(sc); 6153 6154 sc->hn_flags &= ~HN_FLAG_SYNTH_ATTACHED; 6155 } 6156 6157 static void 6158 hn_set_ring_inuse(struct hn_softc *sc, int ring_cnt) 6159 { 6160 KASSERT(ring_cnt > 0 && ring_cnt <= sc->hn_rx_ring_cnt, 6161 ("invalid ring count %d", ring_cnt)); 6162 6163 if (sc->hn_tx_ring_cnt > ring_cnt) 6164 sc->hn_tx_ring_inuse = ring_cnt; 6165 else 6166 sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt; 6167 sc->hn_rx_ring_inuse = ring_cnt; 6168 6169 #ifdef RSS 6170 if (sc->hn_rx_ring_inuse != rss_getnumbuckets()) { 6171 if_printf(sc->hn_ifp, "# of RX rings (%d) does not match " 6172 "# of RSS buckets (%d)\n", sc->hn_rx_ring_inuse, 6173 rss_getnumbuckets()); 6174 } 6175 #endif 6176 6177 if (bootverbose) { 6178 if_printf(sc->hn_ifp, "%d TX ring, %d RX ring\n", 6179 sc->hn_tx_ring_inuse, sc->hn_rx_ring_inuse); 6180 } 6181 } 6182 6183 static void 6184 hn_chan_drain(struct hn_softc *sc, struct vmbus_channel *chan) 6185 { 6186 6187 /* 6188 * NOTE: 6189 * The TX bufring will not be drained by the hypervisor, 6190 * if the primary channel is revoked. 6191 */ 6192 while (!vmbus_chan_rx_empty(chan) || 6193 (!vmbus_chan_is_revoked(sc->hn_prichan) && 6194 !vmbus_chan_tx_empty(chan))) 6195 pause("waitch", 1); 6196 vmbus_chan_intr_drain(chan); 6197 } 6198 6199 static void 6200 hn_disable_rx(struct hn_softc *sc) 6201 { 6202 6203 /* 6204 * Disable RX by clearing RX filter forcefully. 6205 */ 6206 sc->hn_rx_filter = NDIS_PACKET_TYPE_NONE; 6207 hn_rndis_set_rxfilter(sc, sc->hn_rx_filter); /* ignore error */ 6208 6209 /* 6210 * Give RNDIS enough time to flush all pending data packets. 6211 */ 6212 pause("waitrx", (200 * hz) / 1000); 6213 } 6214 6215 /* 6216 * NOTE: 6217 * RX/TX _must_ have been suspended/disabled, before this function 6218 * is called. 6219 */ 6220 static void 6221 hn_drain_rxtx(struct hn_softc *sc, int nchan) 6222 { 6223 struct vmbus_channel **subch = NULL; 6224 int nsubch; 6225 6226 /* 6227 * Drain RX/TX bufrings and interrupts. 6228 */ 6229 nsubch = nchan - 1; 6230 if (nsubch > 0) 6231 subch = vmbus_subchan_get(sc->hn_prichan, nsubch); 6232 6233 if (subch != NULL) { 6234 int i; 6235 6236 for (i = 0; i < nsubch; ++i) 6237 hn_chan_drain(sc, subch[i]); 6238 } 6239 hn_chan_drain(sc, sc->hn_prichan); 6240 6241 if (subch != NULL) 6242 vmbus_subchan_rel(subch, nsubch); 6243 } 6244 6245 static void 6246 hn_suspend_data(struct hn_softc *sc) 6247 { 6248 struct hn_tx_ring *txr; 6249 int i; 6250 6251 HN_LOCK_ASSERT(sc); 6252 6253 /* 6254 * Suspend TX. 6255 */ 6256 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { 6257 txr = &sc->hn_tx_ring[i]; 6258 6259 mtx_lock(&txr->hn_tx_lock); 6260 txr->hn_suspended = 1; 6261 mtx_unlock(&txr->hn_tx_lock); 6262 /* No one is able send more packets now. */ 6263 6264 /* 6265 * Wait for all pending sends to finish. 6266 * 6267 * NOTE: 6268 * We will _not_ receive all pending send-done, if the 6269 * primary channel is revoked. 6270 */ 6271 while (hn_tx_ring_pending(txr) && 6272 !vmbus_chan_is_revoked(sc->hn_prichan)) 6273 pause("hnwtx", 1 /* 1 tick */); 6274 } 6275 6276 /* 6277 * Disable RX. 6278 */ 6279 hn_disable_rx(sc); 6280 6281 /* 6282 * Drain RX/TX. 6283 */ 6284 hn_drain_rxtx(sc, sc->hn_rx_ring_inuse); 6285 6286 /* 6287 * Drain any pending TX tasks. 6288 * 6289 * NOTE: 6290 * The above hn_drain_rxtx() can dispatch TX tasks, so the TX 6291 * tasks will have to be drained _after_ the above hn_drain_rxtx(). 6292 */ 6293 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { 6294 txr = &sc->hn_tx_ring[i]; 6295 6296 taskqueue_drain(txr->hn_tx_taskq, &txr->hn_tx_task); 6297 taskqueue_drain(txr->hn_tx_taskq, &txr->hn_txeof_task); 6298 } 6299 } 6300 6301 static void 6302 hn_suspend_mgmt_taskfunc(void *xsc, int pending __unused) 6303 { 6304 6305 ((struct hn_softc *)xsc)->hn_mgmt_taskq = NULL; 6306 } 6307 6308 static void 6309 hn_suspend_mgmt(struct hn_softc *sc) 6310 { 6311 struct task task; 6312 6313 HN_LOCK_ASSERT(sc); 6314 6315 /* 6316 * Make sure that hn_mgmt_taskq0 can nolonger be accessed 6317 * through hn_mgmt_taskq. 6318 */ 6319 TASK_INIT(&task, 0, hn_suspend_mgmt_taskfunc, sc); 6320 vmbus_chan_run_task(sc->hn_prichan, &task); 6321 6322 /* 6323 * Make sure that all pending management tasks are completed. 6324 */ 6325 taskqueue_drain(sc->hn_mgmt_taskq0, &sc->hn_netchg_init); 6326 taskqueue_drain_timeout(sc->hn_mgmt_taskq0, &sc->hn_netchg_status); 6327 taskqueue_drain_all(sc->hn_mgmt_taskq0); 6328 } 6329 6330 static void 6331 hn_suspend(struct hn_softc *sc) 6332 { 6333 6334 /* Disable polling. */ 6335 hn_polling(sc, 0); 6336 6337 /* 6338 * If the non-transparent mode VF is activated, the synthetic 6339 * device is receiving packets, so the data path of the 6340 * synthetic device must be suspended. 6341 */ 6342 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) || 6343 (sc->hn_flags & HN_FLAG_RXVF)) 6344 hn_suspend_data(sc); 6345 hn_suspend_mgmt(sc); 6346 } 6347 6348 static void 6349 hn_resume_tx(struct hn_softc *sc, int tx_ring_cnt) 6350 { 6351 int i; 6352 6353 KASSERT(tx_ring_cnt <= sc->hn_tx_ring_cnt, 6354 ("invalid TX ring count %d", tx_ring_cnt)); 6355 6356 for (i = 0; i < tx_ring_cnt; ++i) { 6357 struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; 6358 6359 mtx_lock(&txr->hn_tx_lock); 6360 txr->hn_suspended = 0; 6361 mtx_unlock(&txr->hn_tx_lock); 6362 } 6363 } 6364 6365 static void 6366 hn_resume_data(struct hn_softc *sc) 6367 { 6368 int i; 6369 6370 HN_LOCK_ASSERT(sc); 6371 6372 /* 6373 * Re-enable RX. 6374 */ 6375 hn_rxfilter_config(sc); 6376 6377 /* 6378 * Make sure to clear suspend status on "all" TX rings, 6379 * since hn_tx_ring_inuse can be changed after 6380 * hn_suspend_data(). 6381 */ 6382 hn_resume_tx(sc, sc->hn_tx_ring_cnt); 6383 6384 #ifdef HN_IFSTART_SUPPORT 6385 if (!hn_use_if_start) 6386 #endif 6387 { 6388 /* 6389 * Flush unused drbrs, since hn_tx_ring_inuse may be 6390 * reduced. 6391 */ 6392 for (i = sc->hn_tx_ring_inuse; i < sc->hn_tx_ring_cnt; ++i) 6393 hn_tx_ring_qflush(&sc->hn_tx_ring[i]); 6394 } 6395 6396 /* 6397 * Kick start TX. 6398 */ 6399 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { 6400 struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; 6401 6402 /* 6403 * Use txeof task, so that any pending oactive can be 6404 * cleared properly. 6405 */ 6406 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); 6407 } 6408 } 6409 6410 static void 6411 hn_resume_mgmt(struct hn_softc *sc) 6412 { 6413 6414 sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0; 6415 6416 /* 6417 * Kick off network change detection, if it was pending. 6418 * If no network change was pending, start link status 6419 * checks, which is more lightweight than network change 6420 * detection. 6421 */ 6422 if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG) 6423 hn_change_network(sc); 6424 else 6425 hn_update_link_status(sc); 6426 } 6427 6428 static void 6429 hn_resume(struct hn_softc *sc) 6430 { 6431 6432 /* 6433 * If the non-transparent mode VF is activated, the synthetic 6434 * device have to receive packets, so the data path of the 6435 * synthetic device must be resumed. 6436 */ 6437 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) || 6438 (sc->hn_flags & HN_FLAG_RXVF)) 6439 hn_resume_data(sc); 6440 6441 /* 6442 * Don't resume link status change if VF is attached/activated. 6443 * - In the non-transparent VF mode, the synthetic device marks 6444 * link down until the VF is deactivated; i.e. VF is down. 6445 * - In transparent VF mode, VF's media status is used until 6446 * the VF is detached. 6447 */ 6448 if ((sc->hn_flags & HN_FLAG_RXVF) == 0 && 6449 !(hn_xpnt_vf && sc->hn_vf_ifp != NULL)) 6450 hn_resume_mgmt(sc); 6451 6452 /* 6453 * Re-enable polling if this interface is running and 6454 * the polling is requested. 6455 */ 6456 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) && sc->hn_pollhz > 0) 6457 hn_polling(sc, sc->hn_pollhz); 6458 } 6459 6460 static void 6461 hn_rndis_rx_status(struct hn_softc *sc, const void *data, int dlen) 6462 { 6463 const struct rndis_status_msg *msg; 6464 int ofs; 6465 6466 if (dlen < sizeof(*msg)) { 6467 if_printf(sc->hn_ifp, "invalid RNDIS status\n"); 6468 return; 6469 } 6470 msg = data; 6471 6472 switch (msg->rm_status) { 6473 case RNDIS_STATUS_MEDIA_CONNECT: 6474 case RNDIS_STATUS_MEDIA_DISCONNECT: 6475 hn_update_link_status(sc); 6476 break; 6477 6478 case RNDIS_STATUS_TASK_OFFLOAD_CURRENT_CONFIG: 6479 case RNDIS_STATUS_LINK_SPEED_CHANGE: 6480 /* Not really useful; ignore. */ 6481 break; 6482 6483 case RNDIS_STATUS_NETWORK_CHANGE: 6484 ofs = RNDIS_STBUFOFFSET_ABS(msg->rm_stbufoffset); 6485 if (dlen < ofs + msg->rm_stbuflen || 6486 msg->rm_stbuflen < sizeof(uint32_t)) { 6487 if_printf(sc->hn_ifp, "network changed\n"); 6488 } else { 6489 uint32_t change; 6490 6491 memcpy(&change, ((const uint8_t *)msg) + ofs, 6492 sizeof(change)); 6493 if_printf(sc->hn_ifp, "network changed, change %u\n", 6494 change); 6495 } 6496 hn_change_network(sc); 6497 break; 6498 6499 default: 6500 if_printf(sc->hn_ifp, "unknown RNDIS status 0x%08x\n", 6501 msg->rm_status); 6502 break; 6503 } 6504 } 6505 6506 static int 6507 hn_rndis_rxinfo(const void *info_data, int info_dlen, struct hn_rxinfo *info) 6508 { 6509 const struct rndis_pktinfo *pi = info_data; 6510 uint32_t mask = 0; 6511 6512 while (info_dlen != 0) { 6513 const void *data; 6514 uint32_t dlen; 6515 6516 if (__predict_false(info_dlen < sizeof(*pi))) 6517 return (EINVAL); 6518 if (__predict_false(info_dlen < pi->rm_size)) 6519 return (EINVAL); 6520 info_dlen -= pi->rm_size; 6521 6522 if (__predict_false(pi->rm_size & RNDIS_PKTINFO_SIZE_ALIGNMASK)) 6523 return (EINVAL); 6524 if (__predict_false(pi->rm_size < pi->rm_pktinfooffset)) 6525 return (EINVAL); 6526 dlen = pi->rm_size - pi->rm_pktinfooffset; 6527 data = pi->rm_data; 6528 6529 switch (pi->rm_type) { 6530 case NDIS_PKTINFO_TYPE_VLAN: 6531 if (__predict_false(dlen < NDIS_VLAN_INFO_SIZE)) 6532 return (EINVAL); 6533 info->vlan_info = *((const uint32_t *)data); 6534 mask |= HN_RXINFO_VLAN; 6535 break; 6536 6537 case NDIS_PKTINFO_TYPE_CSUM: 6538 if (__predict_false(dlen < NDIS_RXCSUM_INFO_SIZE)) 6539 return (EINVAL); 6540 info->csum_info = *((const uint32_t *)data); 6541 mask |= HN_RXINFO_CSUM; 6542 break; 6543 6544 case HN_NDIS_PKTINFO_TYPE_HASHVAL: 6545 if (__predict_false(dlen < HN_NDIS_HASH_VALUE_SIZE)) 6546 return (EINVAL); 6547 info->hash_value = *((const uint32_t *)data); 6548 mask |= HN_RXINFO_HASHVAL; 6549 break; 6550 6551 case HN_NDIS_PKTINFO_TYPE_HASHINF: 6552 if (__predict_false(dlen < HN_NDIS_HASH_INFO_SIZE)) 6553 return (EINVAL); 6554 info->hash_info = *((const uint32_t *)data); 6555 mask |= HN_RXINFO_HASHINF; 6556 break; 6557 6558 default: 6559 goto next; 6560 } 6561 6562 if (mask == HN_RXINFO_ALL) { 6563 /* All found; done */ 6564 break; 6565 } 6566 next: 6567 pi = (const struct rndis_pktinfo *) 6568 ((const uint8_t *)pi + pi->rm_size); 6569 } 6570 6571 /* 6572 * Final fixup. 6573 * - If there is no hash value, invalidate the hash info. 6574 */ 6575 if ((mask & HN_RXINFO_HASHVAL) == 0) 6576 info->hash_info = HN_NDIS_HASH_INFO_INVALID; 6577 return (0); 6578 } 6579 6580 static __inline bool 6581 hn_rndis_check_overlap(int off, int len, int check_off, int check_len) 6582 { 6583 6584 if (off < check_off) { 6585 if (__predict_true(off + len <= check_off)) 6586 return (false); 6587 } else if (off > check_off) { 6588 if (__predict_true(check_off + check_len <= off)) 6589 return (false); 6590 } 6591 return (true); 6592 } 6593 6594 static void 6595 hn_rndis_rx_data(struct hn_rx_ring *rxr, const void *data, int dlen) 6596 { 6597 const struct rndis_packet_msg *pkt; 6598 struct hn_rxinfo info; 6599 int data_off, pktinfo_off, data_len, pktinfo_len; 6600 6601 /* 6602 * Check length. 6603 */ 6604 if (__predict_false(dlen < sizeof(*pkt))) { 6605 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg\n"); 6606 return; 6607 } 6608 pkt = data; 6609 6610 if (__predict_false(dlen < pkt->rm_len)) { 6611 if_printf(rxr->hn_ifp, "truncated RNDIS packet msg, " 6612 "dlen %d, msglen %u\n", dlen, pkt->rm_len); 6613 return; 6614 } 6615 if (__predict_false(pkt->rm_len < 6616 pkt->rm_datalen + pkt->rm_oobdatalen + pkt->rm_pktinfolen)) { 6617 if_printf(rxr->hn_ifp, "invalid RNDIS packet msglen, " 6618 "msglen %u, data %u, oob %u, pktinfo %u\n", 6619 pkt->rm_len, pkt->rm_datalen, pkt->rm_oobdatalen, 6620 pkt->rm_pktinfolen); 6621 return; 6622 } 6623 if (__predict_false(pkt->rm_datalen == 0)) { 6624 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, no data\n"); 6625 return; 6626 } 6627 6628 /* 6629 * Check offests. 6630 */ 6631 #define IS_OFFSET_INVALID(ofs) \ 6632 ((ofs) < RNDIS_PACKET_MSG_OFFSET_MIN || \ 6633 ((ofs) & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK)) 6634 6635 /* XXX Hyper-V does not meet data offset alignment requirement */ 6636 if (__predict_false(pkt->rm_dataoffset < RNDIS_PACKET_MSG_OFFSET_MIN)) { 6637 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 6638 "data offset %u\n", pkt->rm_dataoffset); 6639 return; 6640 } 6641 if (__predict_false(pkt->rm_oobdataoffset > 0 && 6642 IS_OFFSET_INVALID(pkt->rm_oobdataoffset))) { 6643 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 6644 "oob offset %u\n", pkt->rm_oobdataoffset); 6645 return; 6646 } 6647 if (__predict_true(pkt->rm_pktinfooffset > 0) && 6648 __predict_false(IS_OFFSET_INVALID(pkt->rm_pktinfooffset))) { 6649 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 6650 "pktinfo offset %u\n", pkt->rm_pktinfooffset); 6651 return; 6652 } 6653 6654 #undef IS_OFFSET_INVALID 6655 6656 data_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_dataoffset); 6657 data_len = pkt->rm_datalen; 6658 pktinfo_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_pktinfooffset); 6659 pktinfo_len = pkt->rm_pktinfolen; 6660 6661 /* 6662 * Check OOB coverage. 6663 */ 6664 if (__predict_false(pkt->rm_oobdatalen != 0)) { 6665 int oob_off, oob_len; 6666 6667 if_printf(rxr->hn_ifp, "got oobdata\n"); 6668 oob_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_oobdataoffset); 6669 oob_len = pkt->rm_oobdatalen; 6670 6671 if (__predict_false(oob_off + oob_len > pkt->rm_len)) { 6672 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 6673 "oob overflow, msglen %u, oob abs %d len %d\n", 6674 pkt->rm_len, oob_off, oob_len); 6675 return; 6676 } 6677 6678 /* 6679 * Check against data. 6680 */ 6681 if (hn_rndis_check_overlap(oob_off, oob_len, 6682 data_off, data_len)) { 6683 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 6684 "oob overlaps data, oob abs %d len %d, " 6685 "data abs %d len %d\n", 6686 oob_off, oob_len, data_off, data_len); 6687 return; 6688 } 6689 6690 /* 6691 * Check against pktinfo. 6692 */ 6693 if (pktinfo_len != 0 && 6694 hn_rndis_check_overlap(oob_off, oob_len, 6695 pktinfo_off, pktinfo_len)) { 6696 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 6697 "oob overlaps pktinfo, oob abs %d len %d, " 6698 "pktinfo abs %d len %d\n", 6699 oob_off, oob_len, pktinfo_off, pktinfo_len); 6700 return; 6701 } 6702 } 6703 6704 /* 6705 * Check per-packet-info coverage and find useful per-packet-info. 6706 */ 6707 info.vlan_info = HN_NDIS_VLAN_INFO_INVALID; 6708 info.csum_info = HN_NDIS_RXCSUM_INFO_INVALID; 6709 info.hash_info = HN_NDIS_HASH_INFO_INVALID; 6710 if (__predict_true(pktinfo_len != 0)) { 6711 bool overlap; 6712 int error; 6713 6714 if (__predict_false(pktinfo_off + pktinfo_len > pkt->rm_len)) { 6715 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 6716 "pktinfo overflow, msglen %u, " 6717 "pktinfo abs %d len %d\n", 6718 pkt->rm_len, pktinfo_off, pktinfo_len); 6719 return; 6720 } 6721 6722 /* 6723 * Check packet info coverage. 6724 */ 6725 overlap = hn_rndis_check_overlap(pktinfo_off, pktinfo_len, 6726 data_off, data_len); 6727 if (__predict_false(overlap)) { 6728 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 6729 "pktinfo overlap data, pktinfo abs %d len %d, " 6730 "data abs %d len %d\n", 6731 pktinfo_off, pktinfo_len, data_off, data_len); 6732 return; 6733 } 6734 6735 /* 6736 * Find useful per-packet-info. 6737 */ 6738 error = hn_rndis_rxinfo(((const uint8_t *)pkt) + pktinfo_off, 6739 pktinfo_len, &info); 6740 if (__predict_false(error)) { 6741 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg " 6742 "pktinfo\n"); 6743 return; 6744 } 6745 } 6746 6747 if (__predict_false(data_off + data_len > pkt->rm_len)) { 6748 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 6749 "data overflow, msglen %u, data abs %d len %d\n", 6750 pkt->rm_len, data_off, data_len); 6751 return; 6752 } 6753 hn_rxpkt(rxr, ((const uint8_t *)pkt) + data_off, data_len, &info); 6754 } 6755 6756 static __inline void 6757 hn_rndis_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen) 6758 { 6759 const struct rndis_msghdr *hdr; 6760 6761 if (__predict_false(dlen < sizeof(*hdr))) { 6762 if_printf(rxr->hn_ifp, "invalid RNDIS msg\n"); 6763 return; 6764 } 6765 hdr = data; 6766 6767 if (__predict_true(hdr->rm_type == REMOTE_NDIS_PACKET_MSG)) { 6768 /* Hot data path. */ 6769 hn_rndis_rx_data(rxr, data, dlen); 6770 /* Done! */ 6771 return; 6772 } 6773 6774 if (hdr->rm_type == REMOTE_NDIS_INDICATE_STATUS_MSG) 6775 hn_rndis_rx_status(rxr->hn_ifp->if_softc, data, dlen); 6776 else 6777 hn_rndis_rx_ctrl(rxr->hn_ifp->if_softc, data, dlen); 6778 } 6779 6780 static void 6781 hn_nvs_handle_notify(struct hn_softc *sc, const struct vmbus_chanpkt_hdr *pkt) 6782 { 6783 const struct hn_nvs_hdr *hdr; 6784 6785 if (VMBUS_CHANPKT_DATALEN(pkt) < sizeof(*hdr)) { 6786 if_printf(sc->hn_ifp, "invalid nvs notify\n"); 6787 return; 6788 } 6789 hdr = VMBUS_CHANPKT_CONST_DATA(pkt); 6790 6791 if (hdr->nvs_type == HN_NVS_TYPE_TXTBL_NOTE) { 6792 /* Useless; ignore */ 6793 return; 6794 } 6795 if_printf(sc->hn_ifp, "got notify, nvs type %u\n", hdr->nvs_type); 6796 } 6797 6798 static void 6799 hn_nvs_handle_comp(struct hn_softc *sc, struct vmbus_channel *chan, 6800 const struct vmbus_chanpkt_hdr *pkt) 6801 { 6802 struct hn_nvs_sendctx *sndc; 6803 6804 sndc = (struct hn_nvs_sendctx *)(uintptr_t)pkt->cph_xactid; 6805 sndc->hn_cb(sndc, sc, chan, VMBUS_CHANPKT_CONST_DATA(pkt), 6806 VMBUS_CHANPKT_DATALEN(pkt)); 6807 /* 6808 * NOTE: 6809 * 'sndc' CAN NOT be accessed anymore, since it can be freed by 6810 * its callback. 6811 */ 6812 } 6813 6814 static void 6815 hn_nvs_handle_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan, 6816 const struct vmbus_chanpkt_hdr *pkthdr) 6817 { 6818 const struct vmbus_chanpkt_rxbuf *pkt; 6819 const struct hn_nvs_hdr *nvs_hdr; 6820 int count, i, hlen; 6821 6822 if (__predict_false(VMBUS_CHANPKT_DATALEN(pkthdr) < sizeof(*nvs_hdr))) { 6823 if_printf(rxr->hn_ifp, "invalid nvs RNDIS\n"); 6824 return; 6825 } 6826 nvs_hdr = VMBUS_CHANPKT_CONST_DATA(pkthdr); 6827 6828 /* Make sure that this is a RNDIS message. */ 6829 if (__predict_false(nvs_hdr->nvs_type != HN_NVS_TYPE_RNDIS)) { 6830 if_printf(rxr->hn_ifp, "nvs type %u, not RNDIS\n", 6831 nvs_hdr->nvs_type); 6832 return; 6833 } 6834 6835 hlen = VMBUS_CHANPKT_GETLEN(pkthdr->cph_hlen); 6836 if (__predict_false(hlen < sizeof(*pkt))) { 6837 if_printf(rxr->hn_ifp, "invalid rxbuf chanpkt\n"); 6838 return; 6839 } 6840 pkt = (const struct vmbus_chanpkt_rxbuf *)pkthdr; 6841 6842 if (__predict_false(pkt->cp_rxbuf_id != HN_NVS_RXBUF_SIG)) { 6843 if_printf(rxr->hn_ifp, "invalid rxbuf_id 0x%08x\n", 6844 pkt->cp_rxbuf_id); 6845 return; 6846 } 6847 6848 count = pkt->cp_rxbuf_cnt; 6849 if (__predict_false(hlen < 6850 __offsetof(struct vmbus_chanpkt_rxbuf, cp_rxbuf[count]))) { 6851 if_printf(rxr->hn_ifp, "invalid rxbuf_cnt %d\n", count); 6852 return; 6853 } 6854 6855 /* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */ 6856 for (i = 0; i < count; ++i) { 6857 int ofs, len; 6858 6859 ofs = pkt->cp_rxbuf[i].rb_ofs; 6860 len = pkt->cp_rxbuf[i].rb_len; 6861 if (__predict_false(ofs + len > HN_RXBUF_SIZE)) { 6862 if_printf(rxr->hn_ifp, "%dth RNDIS msg overflow rxbuf, " 6863 "ofs %d, len %d\n", i, ofs, len); 6864 continue; 6865 } 6866 hn_rndis_rxpkt(rxr, rxr->hn_rxbuf + ofs, len); 6867 } 6868 6869 /* 6870 * Ack the consumed RXBUF associated w/ this channel packet, 6871 * so that this RXBUF can be recycled by the hypervisor. 6872 */ 6873 hn_nvs_ack_rxbuf(rxr, chan, pkt->cp_hdr.cph_xactid); 6874 } 6875 6876 static void 6877 hn_nvs_ack_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan, 6878 uint64_t tid) 6879 { 6880 struct hn_nvs_rndis_ack ack; 6881 int retries, error; 6882 6883 ack.nvs_type = HN_NVS_TYPE_RNDIS_ACK; 6884 ack.nvs_status = HN_NVS_STATUS_OK; 6885 6886 retries = 0; 6887 again: 6888 error = vmbus_chan_send(chan, VMBUS_CHANPKT_TYPE_COMP, 6889 VMBUS_CHANPKT_FLAG_NONE, &ack, sizeof(ack), tid); 6890 if (__predict_false(error == EAGAIN)) { 6891 /* 6892 * NOTE: 6893 * This should _not_ happen in real world, since the 6894 * consumption of the TX bufring from the TX path is 6895 * controlled. 6896 */ 6897 if (rxr->hn_ack_failed == 0) 6898 if_printf(rxr->hn_ifp, "RXBUF ack retry\n"); 6899 rxr->hn_ack_failed++; 6900 retries++; 6901 if (retries < 10) { 6902 DELAY(100); 6903 goto again; 6904 } 6905 /* RXBUF leaks! */ 6906 if_printf(rxr->hn_ifp, "RXBUF ack failed\n"); 6907 } 6908 } 6909 6910 static void 6911 hn_chan_callback(struct vmbus_channel *chan, void *xrxr) 6912 { 6913 struct hn_rx_ring *rxr = xrxr; 6914 struct hn_softc *sc = rxr->hn_ifp->if_softc; 6915 6916 for (;;) { 6917 struct vmbus_chanpkt_hdr *pkt = rxr->hn_pktbuf; 6918 int error, pktlen; 6919 6920 pktlen = rxr->hn_pktbuf_len; 6921 error = vmbus_chan_recv_pkt(chan, pkt, &pktlen); 6922 if (__predict_false(error == ENOBUFS)) { 6923 void *nbuf; 6924 int nlen; 6925 6926 /* 6927 * Expand channel packet buffer. 6928 * 6929 * XXX 6930 * Use M_WAITOK here, since allocation failure 6931 * is fatal. 6932 */ 6933 nlen = rxr->hn_pktbuf_len * 2; 6934 while (nlen < pktlen) 6935 nlen *= 2; 6936 nbuf = malloc(nlen, M_DEVBUF, M_WAITOK); 6937 6938 if_printf(rxr->hn_ifp, "expand pktbuf %d -> %d\n", 6939 rxr->hn_pktbuf_len, nlen); 6940 6941 free(rxr->hn_pktbuf, M_DEVBUF); 6942 rxr->hn_pktbuf = nbuf; 6943 rxr->hn_pktbuf_len = nlen; 6944 /* Retry! */ 6945 continue; 6946 } else if (__predict_false(error == EAGAIN)) { 6947 /* No more channel packets; done! */ 6948 break; 6949 } 6950 KASSERT(!error, ("vmbus_chan_recv_pkt failed: %d", error)); 6951 6952 switch (pkt->cph_type) { 6953 case VMBUS_CHANPKT_TYPE_COMP: 6954 hn_nvs_handle_comp(sc, chan, pkt); 6955 break; 6956 6957 case VMBUS_CHANPKT_TYPE_RXBUF: 6958 hn_nvs_handle_rxbuf(rxr, chan, pkt); 6959 break; 6960 6961 case VMBUS_CHANPKT_TYPE_INBAND: 6962 hn_nvs_handle_notify(sc, pkt); 6963 break; 6964 6965 default: 6966 if_printf(rxr->hn_ifp, "unknown chan pkt %u\n", 6967 pkt->cph_type); 6968 break; 6969 } 6970 } 6971 hn_chan_rollup(rxr, rxr->hn_txr); 6972 } 6973 6974 static void 6975 hn_sysinit(void *arg __unused) 6976 { 6977 int i; 6978 6979 #ifdef HN_IFSTART_SUPPORT 6980 /* 6981 * Don't use ifnet.if_start if transparent VF mode is requested; 6982 * mainly due to the IFF_DRV_OACTIVE flag. 6983 */ 6984 if (hn_xpnt_vf && hn_use_if_start) { 6985 hn_use_if_start = 0; 6986 printf("hn: tranparent VF mode, if_transmit will be used, " 6987 "instead of if_start\n"); 6988 } 6989 #endif 6990 if (hn_xpnt_vf_attwait < HN_XPNT_VF_ATTWAIT_MIN) { 6991 printf("hn: invalid transparent VF attach routing " 6992 "wait timeout %d, reset to %d\n", 6993 hn_xpnt_vf_attwait, HN_XPNT_VF_ATTWAIT_MIN); 6994 hn_xpnt_vf_attwait = HN_XPNT_VF_ATTWAIT_MIN; 6995 } 6996 6997 /* 6998 * Initialize VF map. 6999 */ 7000 rm_init_flags(&hn_vfmap_lock, "hn_vfmap", RM_SLEEPABLE); 7001 hn_vfmap_size = HN_VFMAP_SIZE_DEF; 7002 hn_vfmap = malloc(sizeof(struct ifnet *) * hn_vfmap_size, M_DEVBUF, 7003 M_WAITOK | M_ZERO); 7004 7005 /* 7006 * Fix the # of TX taskqueues. 7007 */ 7008 if (hn_tx_taskq_cnt <= 0) 7009 hn_tx_taskq_cnt = 1; 7010 else if (hn_tx_taskq_cnt > mp_ncpus) 7011 hn_tx_taskq_cnt = mp_ncpus; 7012 7013 /* 7014 * Fix the TX taskqueue mode. 7015 */ 7016 switch (hn_tx_taskq_mode) { 7017 case HN_TX_TASKQ_M_INDEP: 7018 case HN_TX_TASKQ_M_GLOBAL: 7019 case HN_TX_TASKQ_M_EVTTQ: 7020 break; 7021 default: 7022 hn_tx_taskq_mode = HN_TX_TASKQ_M_INDEP; 7023 break; 7024 } 7025 7026 if (vm_guest != VM_GUEST_HV) 7027 return; 7028 7029 if (hn_tx_taskq_mode != HN_TX_TASKQ_M_GLOBAL) 7030 return; 7031 7032 hn_tx_taskque = malloc(hn_tx_taskq_cnt * sizeof(struct taskqueue *), 7033 M_DEVBUF, M_WAITOK); 7034 for (i = 0; i < hn_tx_taskq_cnt; ++i) { 7035 hn_tx_taskque[i] = taskqueue_create("hn_tx", M_WAITOK, 7036 taskqueue_thread_enqueue, &hn_tx_taskque[i]); 7037 taskqueue_start_threads(&hn_tx_taskque[i], 1, PI_NET, 7038 "hn tx%d", i); 7039 } 7040 } 7041 SYSINIT(hn_sysinit, SI_SUB_DRIVERS, SI_ORDER_SECOND, hn_sysinit, NULL); 7042 7043 static void 7044 hn_sysuninit(void *arg __unused) 7045 { 7046 7047 if (hn_tx_taskque != NULL) { 7048 int i; 7049 7050 for (i = 0; i < hn_tx_taskq_cnt; ++i) 7051 taskqueue_free(hn_tx_taskque[i]); 7052 free(hn_tx_taskque, M_DEVBUF); 7053 } 7054 7055 if (hn_vfmap != NULL) 7056 free(hn_vfmap, M_DEVBUF); 7057 rm_destroy(&hn_vfmap_lock); 7058 } 7059 SYSUNINIT(hn_sysuninit, SI_SUB_DRIVERS, SI_ORDER_SECOND, hn_sysuninit, NULL); 7060