1 /*- 2 * Copyright (c) 2010-2012 Citrix Inc. 3 * Copyright (c) 2009-2012,2016-2017 Microsoft Corp. 4 * Copyright (c) 2012 NetApp Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /*- 30 * Copyright (c) 2004-2006 Kip Macy 31 * All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 45 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 52 * SUCH DAMAGE. 53 */ 54 55 #include <sys/cdefs.h> 56 __FBSDID("$FreeBSD$"); 57 58 #include "opt_hn.h" 59 #include "opt_inet6.h" 60 #include "opt_inet.h" 61 #include "opt_rss.h" 62 63 #include <sys/param.h> 64 #include <sys/systm.h> 65 #include <sys/bus.h> 66 #include <sys/counter.h> 67 #include <sys/kernel.h> 68 #include <sys/limits.h> 69 #include <sys/malloc.h> 70 #include <sys/mbuf.h> 71 #include <sys/module.h> 72 #include <sys/queue.h> 73 #include <sys/lock.h> 74 #include <sys/proc.h> 75 #include <sys/rmlock.h> 76 #include <sys/sbuf.h> 77 #include <sys/sched.h> 78 #include <sys/smp.h> 79 #include <sys/socket.h> 80 #include <sys/sockio.h> 81 #include <sys/sx.h> 82 #include <sys/sysctl.h> 83 #include <sys/taskqueue.h> 84 #include <sys/buf_ring.h> 85 #include <sys/eventhandler.h> 86 87 #include <machine/atomic.h> 88 #include <machine/in_cksum.h> 89 90 #include <net/bpf.h> 91 #include <net/ethernet.h> 92 #include <net/if.h> 93 #include <net/if_dl.h> 94 #include <net/if_media.h> 95 #include <net/if_types.h> 96 #include <net/if_var.h> 97 #include <net/rndis.h> 98 #ifdef RSS 99 #include <net/rss_config.h> 100 #endif 101 102 #include <netinet/in_systm.h> 103 #include <netinet/in.h> 104 #include <netinet/ip.h> 105 #include <netinet/ip6.h> 106 #include <netinet/tcp.h> 107 #include <netinet/tcp_lro.h> 108 #include <netinet/udp.h> 109 110 #include <dev/hyperv/include/hyperv.h> 111 #include <dev/hyperv/include/hyperv_busdma.h> 112 #include <dev/hyperv/include/vmbus.h> 113 #include <dev/hyperv/include/vmbus_xact.h> 114 115 #include <dev/hyperv/netvsc/ndis.h> 116 #include <dev/hyperv/netvsc/if_hnreg.h> 117 #include <dev/hyperv/netvsc/if_hnvar.h> 118 #include <dev/hyperv/netvsc/hn_nvs.h> 119 #include <dev/hyperv/netvsc/hn_rndis.h> 120 121 #include "vmbus_if.h" 122 123 #define HN_IFSTART_SUPPORT 124 125 #define HN_RING_CNT_DEF_MAX 8 126 127 #define HN_VFMAP_SIZE_DEF 8 128 129 #define HN_XPNT_VF_ATTWAIT_MIN 2 /* seconds */ 130 131 /* YYY should get it from the underlying channel */ 132 #define HN_TX_DESC_CNT 512 133 134 #define HN_RNDIS_PKT_LEN \ 135 (sizeof(struct rndis_packet_msg) + \ 136 HN_RNDIS_PKTINFO_SIZE(HN_NDIS_HASH_VALUE_SIZE) + \ 137 HN_RNDIS_PKTINFO_SIZE(NDIS_VLAN_INFO_SIZE) + \ 138 HN_RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) + \ 139 HN_RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE)) 140 #define HN_RNDIS_PKT_BOUNDARY PAGE_SIZE 141 #define HN_RNDIS_PKT_ALIGN CACHE_LINE_SIZE 142 143 #define HN_TX_DATA_BOUNDARY PAGE_SIZE 144 #define HN_TX_DATA_MAXSIZE IP_MAXPACKET 145 #define HN_TX_DATA_SEGSIZE PAGE_SIZE 146 /* -1 for RNDIS packet message */ 147 #define HN_TX_DATA_SEGCNT_MAX (HN_GPACNT_MAX - 1) 148 149 #define HN_DIRECT_TX_SIZE_DEF 128 150 151 #define HN_EARLY_TXEOF_THRESH 8 152 153 #define HN_PKTBUF_LEN_DEF (16 * 1024) 154 155 #define HN_LROENT_CNT_DEF 128 156 157 #define HN_LRO_LENLIM_MULTIRX_DEF (12 * ETHERMTU) 158 #define HN_LRO_LENLIM_DEF (25 * ETHERMTU) 159 /* YYY 2*MTU is a bit rough, but should be good enough. */ 160 #define HN_LRO_LENLIM_MIN(ifp) (2 * (ifp)->if_mtu) 161 162 #define HN_LRO_ACKCNT_DEF 1 163 164 #define HN_LOCK_INIT(sc) \ 165 sx_init(&(sc)->hn_lock, device_get_nameunit((sc)->hn_dev)) 166 #define HN_LOCK_DESTROY(sc) sx_destroy(&(sc)->hn_lock) 167 #define HN_LOCK_ASSERT(sc) sx_assert(&(sc)->hn_lock, SA_XLOCKED) 168 #define HN_LOCK(sc) \ 169 do { \ 170 while (sx_try_xlock(&(sc)->hn_lock) == 0) { \ 171 /* Relinquish cpu to avoid deadlock */ \ 172 sched_relinquish(curthread); \ 173 DELAY(1000); \ 174 } \ 175 } while (0) 176 #define HN_UNLOCK(sc) sx_xunlock(&(sc)->hn_lock) 177 178 #define HN_CSUM_IP_MASK (CSUM_IP | CSUM_IP_TCP | CSUM_IP_UDP) 179 #define HN_CSUM_IP6_MASK (CSUM_IP6_TCP | CSUM_IP6_UDP) 180 #define HN_CSUM_IP_HWASSIST(sc) \ 181 ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP_MASK) 182 #define HN_CSUM_IP6_HWASSIST(sc) \ 183 ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP6_MASK) 184 185 #define HN_PKTSIZE_MIN(align) \ 186 roundup2(ETHER_MIN_LEN + ETHER_VLAN_ENCAP_LEN - ETHER_CRC_LEN + \ 187 HN_RNDIS_PKT_LEN, (align)) 188 #define HN_PKTSIZE(m, align) \ 189 roundup2((m)->m_pkthdr.len + HN_RNDIS_PKT_LEN, (align)) 190 191 #ifdef RSS 192 #define HN_RING_IDX2CPU(sc, idx) rss_getcpu((idx) % rss_getnumbuckets()) 193 #else 194 #define HN_RING_IDX2CPU(sc, idx) (((sc)->hn_cpu + (idx)) % mp_ncpus) 195 #endif 196 197 struct hn_txdesc { 198 #ifndef HN_USE_TXDESC_BUFRING 199 SLIST_ENTRY(hn_txdesc) link; 200 #endif 201 STAILQ_ENTRY(hn_txdesc) agg_link; 202 203 /* Aggregated txdescs, in sending order. */ 204 STAILQ_HEAD(, hn_txdesc) agg_list; 205 206 /* The oldest packet, if transmission aggregation happens. */ 207 struct mbuf *m; 208 struct hn_tx_ring *txr; 209 int refs; 210 uint32_t flags; /* HN_TXD_FLAG_ */ 211 struct hn_nvs_sendctx send_ctx; 212 uint32_t chim_index; 213 int chim_size; 214 215 bus_dmamap_t data_dmap; 216 217 bus_addr_t rndis_pkt_paddr; 218 struct rndis_packet_msg *rndis_pkt; 219 bus_dmamap_t rndis_pkt_dmap; 220 }; 221 222 #define HN_TXD_FLAG_ONLIST 0x0001 223 #define HN_TXD_FLAG_DMAMAP 0x0002 224 #define HN_TXD_FLAG_ONAGG 0x0004 225 226 struct hn_rxinfo { 227 uint32_t vlan_info; 228 uint32_t csum_info; 229 uint32_t hash_info; 230 uint32_t hash_value; 231 }; 232 233 struct hn_rxvf_setarg { 234 struct hn_rx_ring *rxr; 235 struct ifnet *vf_ifp; 236 }; 237 238 #define HN_RXINFO_VLAN 0x0001 239 #define HN_RXINFO_CSUM 0x0002 240 #define HN_RXINFO_HASHINF 0x0004 241 #define HN_RXINFO_HASHVAL 0x0008 242 #define HN_RXINFO_ALL \ 243 (HN_RXINFO_VLAN | \ 244 HN_RXINFO_CSUM | \ 245 HN_RXINFO_HASHINF | \ 246 HN_RXINFO_HASHVAL) 247 248 #define HN_NDIS_VLAN_INFO_INVALID 0xffffffff 249 #define HN_NDIS_RXCSUM_INFO_INVALID 0 250 #define HN_NDIS_HASH_INFO_INVALID 0 251 252 static int hn_probe(device_t); 253 static int hn_attach(device_t); 254 static int hn_detach(device_t); 255 static int hn_shutdown(device_t); 256 static void hn_chan_callback(struct vmbus_channel *, 257 void *); 258 259 static void hn_init(void *); 260 static int hn_ioctl(struct ifnet *, u_long, caddr_t); 261 #ifdef HN_IFSTART_SUPPORT 262 static void hn_start(struct ifnet *); 263 #endif 264 static int hn_transmit(struct ifnet *, struct mbuf *); 265 static void hn_xmit_qflush(struct ifnet *); 266 static int hn_ifmedia_upd(struct ifnet *); 267 static void hn_ifmedia_sts(struct ifnet *, 268 struct ifmediareq *); 269 270 static void hn_ifnet_event(void *, struct ifnet *, int); 271 static void hn_ifaddr_event(void *, struct ifnet *); 272 static void hn_ifnet_attevent(void *, struct ifnet *); 273 static void hn_ifnet_detevent(void *, struct ifnet *); 274 static void hn_ifnet_lnkevent(void *, struct ifnet *, int); 275 276 static bool hn_ismyvf(const struct hn_softc *, 277 const struct ifnet *); 278 static void hn_rxvf_change(struct hn_softc *, 279 struct ifnet *, bool); 280 static void hn_rxvf_set(struct hn_softc *, struct ifnet *); 281 static void hn_rxvf_set_task(void *, int); 282 static void hn_xpnt_vf_input(struct ifnet *, struct mbuf *); 283 static int hn_xpnt_vf_iocsetflags(struct hn_softc *); 284 static int hn_xpnt_vf_iocsetcaps(struct hn_softc *, 285 struct ifreq *); 286 static void hn_xpnt_vf_saveifflags(struct hn_softc *); 287 static bool hn_xpnt_vf_isready(struct hn_softc *); 288 static void hn_xpnt_vf_setready(struct hn_softc *); 289 static void hn_xpnt_vf_init_taskfunc(void *, int); 290 static void hn_xpnt_vf_init(struct hn_softc *); 291 static void hn_xpnt_vf_setenable(struct hn_softc *); 292 static void hn_xpnt_vf_setdisable(struct hn_softc *, bool); 293 static void hn_vf_rss_fixup(struct hn_softc *, bool); 294 static void hn_vf_rss_restore(struct hn_softc *); 295 296 static int hn_rndis_rxinfo(const void *, int, 297 struct hn_rxinfo *); 298 static void hn_rndis_rx_data(struct hn_rx_ring *, 299 const void *, int); 300 static void hn_rndis_rx_status(struct hn_softc *, 301 const void *, int); 302 static void hn_rndis_init_fixat(struct hn_softc *, int); 303 304 static void hn_nvs_handle_notify(struct hn_softc *, 305 const struct vmbus_chanpkt_hdr *); 306 static void hn_nvs_handle_comp(struct hn_softc *, 307 struct vmbus_channel *, 308 const struct vmbus_chanpkt_hdr *); 309 static void hn_nvs_handle_rxbuf(struct hn_rx_ring *, 310 struct vmbus_channel *, 311 const struct vmbus_chanpkt_hdr *); 312 static void hn_nvs_ack_rxbuf(struct hn_rx_ring *, 313 struct vmbus_channel *, uint64_t); 314 315 #if __FreeBSD_version >= 1100099 316 static int hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS); 317 static int hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS); 318 #endif 319 static int hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS); 320 static int hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS); 321 #if __FreeBSD_version < 1100095 322 static int hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS); 323 #else 324 static int hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS); 325 #endif 326 static int hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS); 327 static int hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS); 328 static int hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS); 329 static int hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS); 330 static int hn_caps_sysctl(SYSCTL_HANDLER_ARGS); 331 static int hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS); 332 static int hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS); 333 #ifndef RSS 334 static int hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS); 335 static int hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS); 336 #endif 337 static int hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS); 338 static int hn_rss_hcap_sysctl(SYSCTL_HANDLER_ARGS); 339 static int hn_rss_mbuf_sysctl(SYSCTL_HANDLER_ARGS); 340 static int hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS); 341 static int hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS); 342 static int hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS); 343 static int hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS); 344 static int hn_polling_sysctl(SYSCTL_HANDLER_ARGS); 345 static int hn_vf_sysctl(SYSCTL_HANDLER_ARGS); 346 static int hn_rxvf_sysctl(SYSCTL_HANDLER_ARGS); 347 static int hn_vflist_sysctl(SYSCTL_HANDLER_ARGS); 348 static int hn_vfmap_sysctl(SYSCTL_HANDLER_ARGS); 349 static int hn_xpnt_vf_accbpf_sysctl(SYSCTL_HANDLER_ARGS); 350 static int hn_xpnt_vf_enabled_sysctl(SYSCTL_HANDLER_ARGS); 351 352 static void hn_stop(struct hn_softc *, bool); 353 static void hn_init_locked(struct hn_softc *); 354 static int hn_chan_attach(struct hn_softc *, 355 struct vmbus_channel *); 356 static void hn_chan_detach(struct hn_softc *, 357 struct vmbus_channel *); 358 static int hn_attach_subchans(struct hn_softc *); 359 static void hn_detach_allchans(struct hn_softc *); 360 static void hn_chan_rollup(struct hn_rx_ring *, 361 struct hn_tx_ring *); 362 static void hn_set_ring_inuse(struct hn_softc *, int); 363 static int hn_synth_attach(struct hn_softc *, int); 364 static void hn_synth_detach(struct hn_softc *); 365 static int hn_synth_alloc_subchans(struct hn_softc *, 366 int *); 367 static bool hn_synth_attachable(const struct hn_softc *); 368 static void hn_suspend(struct hn_softc *); 369 static void hn_suspend_data(struct hn_softc *); 370 static void hn_suspend_mgmt(struct hn_softc *); 371 static void hn_resume(struct hn_softc *); 372 static void hn_resume_data(struct hn_softc *); 373 static void hn_resume_mgmt(struct hn_softc *); 374 static void hn_suspend_mgmt_taskfunc(void *, int); 375 static void hn_chan_drain(struct hn_softc *, 376 struct vmbus_channel *); 377 static void hn_disable_rx(struct hn_softc *); 378 static void hn_drain_rxtx(struct hn_softc *, int); 379 static void hn_polling(struct hn_softc *, u_int); 380 static void hn_chan_polling(struct vmbus_channel *, u_int); 381 static void hn_mtu_change_fixup(struct hn_softc *); 382 383 static void hn_update_link_status(struct hn_softc *); 384 static void hn_change_network(struct hn_softc *); 385 static void hn_link_taskfunc(void *, int); 386 static void hn_netchg_init_taskfunc(void *, int); 387 static void hn_netchg_status_taskfunc(void *, int); 388 static void hn_link_status(struct hn_softc *); 389 390 static int hn_create_rx_data(struct hn_softc *, int); 391 static void hn_destroy_rx_data(struct hn_softc *); 392 static int hn_check_iplen(const struct mbuf *, int); 393 static void hn_rxpkt_proto(const struct mbuf *, int *, int *); 394 static int hn_set_rxfilter(struct hn_softc *, uint32_t); 395 static int hn_rxfilter_config(struct hn_softc *); 396 static int hn_rss_reconfig(struct hn_softc *); 397 static void hn_rss_ind_fixup(struct hn_softc *); 398 static void hn_rss_mbuf_hash(struct hn_softc *, uint32_t); 399 static int hn_rxpkt(struct hn_rx_ring *, const void *, 400 int, const struct hn_rxinfo *); 401 static uint32_t hn_rss_type_fromndis(uint32_t); 402 static uint32_t hn_rss_type_tondis(uint32_t); 403 404 static int hn_tx_ring_create(struct hn_softc *, int); 405 static void hn_tx_ring_destroy(struct hn_tx_ring *); 406 static int hn_create_tx_data(struct hn_softc *, int); 407 static void hn_fixup_tx_data(struct hn_softc *); 408 static void hn_fixup_rx_data(struct hn_softc *); 409 static void hn_destroy_tx_data(struct hn_softc *); 410 static void hn_txdesc_dmamap_destroy(struct hn_txdesc *); 411 static void hn_txdesc_gc(struct hn_tx_ring *, 412 struct hn_txdesc *); 413 static int hn_encap(struct ifnet *, struct hn_tx_ring *, 414 struct hn_txdesc *, struct mbuf **); 415 static int hn_txpkt(struct ifnet *, struct hn_tx_ring *, 416 struct hn_txdesc *); 417 static void hn_set_chim_size(struct hn_softc *, int); 418 static void hn_set_tso_maxsize(struct hn_softc *, int, int); 419 static bool hn_tx_ring_pending(struct hn_tx_ring *); 420 static void hn_tx_ring_qflush(struct hn_tx_ring *); 421 static void hn_resume_tx(struct hn_softc *, int); 422 static void hn_set_txagg(struct hn_softc *); 423 static void *hn_try_txagg(struct ifnet *, 424 struct hn_tx_ring *, struct hn_txdesc *, 425 int); 426 static int hn_get_txswq_depth(const struct hn_tx_ring *); 427 static void hn_txpkt_done(struct hn_nvs_sendctx *, 428 struct hn_softc *, struct vmbus_channel *, 429 const void *, int); 430 static int hn_txpkt_sglist(struct hn_tx_ring *, 431 struct hn_txdesc *); 432 static int hn_txpkt_chim(struct hn_tx_ring *, 433 struct hn_txdesc *); 434 static int hn_xmit(struct hn_tx_ring *, int); 435 static void hn_xmit_taskfunc(void *, int); 436 static void hn_xmit_txeof(struct hn_tx_ring *); 437 static void hn_xmit_txeof_taskfunc(void *, int); 438 #ifdef HN_IFSTART_SUPPORT 439 static int hn_start_locked(struct hn_tx_ring *, int); 440 static void hn_start_taskfunc(void *, int); 441 static void hn_start_txeof(struct hn_tx_ring *); 442 static void hn_start_txeof_taskfunc(void *, int); 443 #endif 444 445 SYSCTL_NODE(_hw, OID_AUTO, hn, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 446 "Hyper-V network interface"); 447 448 /* Trust tcp segements verification on host side. */ 449 static int hn_trust_hosttcp = 1; 450 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hosttcp, CTLFLAG_RDTUN, 451 &hn_trust_hosttcp, 0, 452 "Trust tcp segement verification on host side, " 453 "when csum info is missing (global setting)"); 454 455 /* Trust udp datagrams verification on host side. */ 456 static int hn_trust_hostudp = 1; 457 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostudp, CTLFLAG_RDTUN, 458 &hn_trust_hostudp, 0, 459 "Trust udp datagram verification on host side, " 460 "when csum info is missing (global setting)"); 461 462 /* Trust ip packets verification on host side. */ 463 static int hn_trust_hostip = 1; 464 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostip, CTLFLAG_RDTUN, 465 &hn_trust_hostip, 0, 466 "Trust ip packet verification on host side, " 467 "when csum info is missing (global setting)"); 468 469 /* 470 * Offload UDP/IPv4 checksum. 471 */ 472 static int hn_enable_udp4cs = 1; 473 SYSCTL_INT(_hw_hn, OID_AUTO, enable_udp4cs, CTLFLAG_RDTUN, 474 &hn_enable_udp4cs, 0, "Offload UDP/IPv4 checksum"); 475 476 /* 477 * Offload UDP/IPv6 checksum. 478 */ 479 static int hn_enable_udp6cs = 1; 480 SYSCTL_INT(_hw_hn, OID_AUTO, enable_udp6cs, CTLFLAG_RDTUN, 481 &hn_enable_udp6cs, 0, "Offload UDP/IPv6 checksum"); 482 483 /* Stats. */ 484 static counter_u64_t hn_udpcs_fixup; 485 SYSCTL_COUNTER_U64(_hw_hn, OID_AUTO, udpcs_fixup, CTLFLAG_RW, 486 &hn_udpcs_fixup, "# of UDP checksum fixup"); 487 488 /* 489 * See hn_set_hlen(). 490 * 491 * This value is for Azure. For Hyper-V, set this above 492 * 65536 to disable UDP datagram checksum fixup. 493 */ 494 static int hn_udpcs_fixup_mtu = 1420; 495 SYSCTL_INT(_hw_hn, OID_AUTO, udpcs_fixup_mtu, CTLFLAG_RWTUN, 496 &hn_udpcs_fixup_mtu, 0, "UDP checksum fixup MTU threshold"); 497 498 /* Limit TSO burst size */ 499 static int hn_tso_maxlen = IP_MAXPACKET; 500 SYSCTL_INT(_hw_hn, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN, 501 &hn_tso_maxlen, 0, "TSO burst limit"); 502 503 /* Limit chimney send size */ 504 static int hn_tx_chimney_size = 0; 505 SYSCTL_INT(_hw_hn, OID_AUTO, tx_chimney_size, CTLFLAG_RDTUN, 506 &hn_tx_chimney_size, 0, "Chimney send packet size limit"); 507 508 /* Limit the size of packet for direct transmission */ 509 static int hn_direct_tx_size = HN_DIRECT_TX_SIZE_DEF; 510 SYSCTL_INT(_hw_hn, OID_AUTO, direct_tx_size, CTLFLAG_RDTUN, 511 &hn_direct_tx_size, 0, "Size of the packet for direct transmission"); 512 513 /* # of LRO entries per RX ring */ 514 #if defined(INET) || defined(INET6) 515 #if __FreeBSD_version >= 1100095 516 static int hn_lro_entry_count = HN_LROENT_CNT_DEF; 517 SYSCTL_INT(_hw_hn, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN, 518 &hn_lro_entry_count, 0, "LRO entry count"); 519 #endif 520 #endif 521 522 static int hn_tx_taskq_cnt = 1; 523 SYSCTL_INT(_hw_hn, OID_AUTO, tx_taskq_cnt, CTLFLAG_RDTUN, 524 &hn_tx_taskq_cnt, 0, "# of TX taskqueues"); 525 526 #define HN_TX_TASKQ_M_INDEP 0 527 #define HN_TX_TASKQ_M_GLOBAL 1 528 #define HN_TX_TASKQ_M_EVTTQ 2 529 530 static int hn_tx_taskq_mode = HN_TX_TASKQ_M_INDEP; 531 SYSCTL_INT(_hw_hn, OID_AUTO, tx_taskq_mode, CTLFLAG_RDTUN, 532 &hn_tx_taskq_mode, 0, "TX taskqueue modes: " 533 "0 - independent, 1 - share global tx taskqs, 2 - share event taskqs"); 534 535 #ifndef HN_USE_TXDESC_BUFRING 536 static int hn_use_txdesc_bufring = 0; 537 #else 538 static int hn_use_txdesc_bufring = 1; 539 #endif 540 SYSCTL_INT(_hw_hn, OID_AUTO, use_txdesc_bufring, CTLFLAG_RD, 541 &hn_use_txdesc_bufring, 0, "Use buf_ring for TX descriptors"); 542 543 #ifdef HN_IFSTART_SUPPORT 544 /* Use ifnet.if_start instead of ifnet.if_transmit */ 545 static int hn_use_if_start = 0; 546 SYSCTL_INT(_hw_hn, OID_AUTO, use_if_start, CTLFLAG_RDTUN, 547 &hn_use_if_start, 0, "Use if_start TX method"); 548 #endif 549 550 /* # of channels to use */ 551 static int hn_chan_cnt = 0; 552 SYSCTL_INT(_hw_hn, OID_AUTO, chan_cnt, CTLFLAG_RDTUN, 553 &hn_chan_cnt, 0, 554 "# of channels to use; each channel has one RX ring and one TX ring"); 555 556 /* # of transmit rings to use */ 557 static int hn_tx_ring_cnt = 0; 558 SYSCTL_INT(_hw_hn, OID_AUTO, tx_ring_cnt, CTLFLAG_RDTUN, 559 &hn_tx_ring_cnt, 0, "# of TX rings to use"); 560 561 /* Software TX ring deptch */ 562 static int hn_tx_swq_depth = 0; 563 SYSCTL_INT(_hw_hn, OID_AUTO, tx_swq_depth, CTLFLAG_RDTUN, 564 &hn_tx_swq_depth, 0, "Depth of IFQ or BUFRING"); 565 566 /* Enable sorted LRO, and the depth of the per-channel mbuf queue */ 567 #if __FreeBSD_version >= 1100095 568 static u_int hn_lro_mbufq_depth = 0; 569 SYSCTL_UINT(_hw_hn, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN, 570 &hn_lro_mbufq_depth, 0, "Depth of LRO mbuf queue"); 571 #endif 572 573 /* Packet transmission aggregation size limit */ 574 static int hn_tx_agg_size = -1; 575 SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_size, CTLFLAG_RDTUN, 576 &hn_tx_agg_size, 0, "Packet transmission aggregation size limit"); 577 578 /* Packet transmission aggregation count limit */ 579 static int hn_tx_agg_pkts = -1; 580 SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_pkts, CTLFLAG_RDTUN, 581 &hn_tx_agg_pkts, 0, "Packet transmission aggregation packet limit"); 582 583 /* VF list */ 584 SYSCTL_PROC(_hw_hn, OID_AUTO, vflist, 585 CTLFLAG_RD | CTLTYPE_STRING | CTLFLAG_NEEDGIANT, 0, 0, 586 hn_vflist_sysctl, "A", 587 "VF list"); 588 589 /* VF mapping */ 590 SYSCTL_PROC(_hw_hn, OID_AUTO, vfmap, 591 CTLFLAG_RD | CTLTYPE_STRING | CTLFLAG_NEEDGIANT, 0, 0, 592 hn_vfmap_sysctl, "A", 593 "VF mapping"); 594 595 /* Transparent VF */ 596 static int hn_xpnt_vf = 1; 597 SYSCTL_INT(_hw_hn, OID_AUTO, vf_transparent, CTLFLAG_RDTUN, 598 &hn_xpnt_vf, 0, "Transparent VF mod"); 599 600 /* Accurate BPF support for Transparent VF */ 601 static int hn_xpnt_vf_accbpf = 0; 602 SYSCTL_INT(_hw_hn, OID_AUTO, vf_xpnt_accbpf, CTLFLAG_RDTUN, 603 &hn_xpnt_vf_accbpf, 0, "Accurate BPF for transparent VF"); 604 605 /* Extra wait for transparent VF attach routing; unit seconds. */ 606 static int hn_xpnt_vf_attwait = HN_XPNT_VF_ATTWAIT_MIN; 607 SYSCTL_INT(_hw_hn, OID_AUTO, vf_xpnt_attwait, CTLFLAG_RWTUN, 608 &hn_xpnt_vf_attwait, 0, 609 "Extra wait for transparent VF attach routing; unit: seconds"); 610 611 static u_int hn_cpu_index; /* next CPU for channel */ 612 static struct taskqueue **hn_tx_taskque;/* shared TX taskqueues */ 613 614 static struct rmlock hn_vfmap_lock; 615 static int hn_vfmap_size; 616 static struct ifnet **hn_vfmap; 617 618 #ifndef RSS 619 static const uint8_t 620 hn_rss_key_default[NDIS_HASH_KEYSIZE_TOEPLITZ] = { 621 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 622 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 623 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, 624 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, 625 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa 626 }; 627 #endif /* !RSS */ 628 629 static const struct hyperv_guid hn_guid = { 630 .hv_guid = { 631 0x63, 0x51, 0x61, 0xf8, 0x3e, 0xdf, 0xc5, 0x46, 632 0x91, 0x3f, 0xf2, 0xd2, 0xf9, 0x65, 0xed, 0x0e } 633 }; 634 635 static device_method_t hn_methods[] = { 636 /* Device interface */ 637 DEVMETHOD(device_probe, hn_probe), 638 DEVMETHOD(device_attach, hn_attach), 639 DEVMETHOD(device_detach, hn_detach), 640 DEVMETHOD(device_shutdown, hn_shutdown), 641 DEVMETHOD_END 642 }; 643 644 static driver_t hn_driver = { 645 "hn", 646 hn_methods, 647 sizeof(struct hn_softc) 648 }; 649 650 static devclass_t hn_devclass; 651 652 DRIVER_MODULE(hn, vmbus, hn_driver, hn_devclass, 0, 0); 653 MODULE_VERSION(hn, 1); 654 MODULE_DEPEND(hn, vmbus, 1, 1, 1); 655 656 #if __FreeBSD_version >= 1100099 657 static void 658 hn_set_lro_lenlim(struct hn_softc *sc, int lenlim) 659 { 660 int i; 661 662 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 663 sc->hn_rx_ring[i].hn_lro.lro_length_lim = lenlim; 664 } 665 #endif 666 667 static int 668 hn_txpkt_sglist(struct hn_tx_ring *txr, struct hn_txdesc *txd) 669 { 670 671 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID && 672 txd->chim_size == 0, ("invalid rndis sglist txd")); 673 return (hn_nvs_send_rndis_sglist(txr->hn_chan, HN_NVS_RNDIS_MTYPE_DATA, 674 &txd->send_ctx, txr->hn_gpa, txr->hn_gpa_cnt)); 675 } 676 677 static int 678 hn_txpkt_chim(struct hn_tx_ring *txr, struct hn_txdesc *txd) 679 { 680 struct hn_nvs_rndis rndis; 681 682 KASSERT(txd->chim_index != HN_NVS_CHIM_IDX_INVALID && 683 txd->chim_size > 0, ("invalid rndis chim txd")); 684 685 rndis.nvs_type = HN_NVS_TYPE_RNDIS; 686 rndis.nvs_rndis_mtype = HN_NVS_RNDIS_MTYPE_DATA; 687 rndis.nvs_chim_idx = txd->chim_index; 688 rndis.nvs_chim_sz = txd->chim_size; 689 690 return (hn_nvs_send(txr->hn_chan, VMBUS_CHANPKT_FLAG_RC, 691 &rndis, sizeof(rndis), &txd->send_ctx)); 692 } 693 694 static __inline uint32_t 695 hn_chim_alloc(struct hn_softc *sc) 696 { 697 int i, bmap_cnt = sc->hn_chim_bmap_cnt; 698 u_long *bmap = sc->hn_chim_bmap; 699 uint32_t ret = HN_NVS_CHIM_IDX_INVALID; 700 701 for (i = 0; i < bmap_cnt; ++i) { 702 int idx; 703 704 idx = ffsl(~bmap[i]); 705 if (idx == 0) 706 continue; 707 708 --idx; /* ffsl is 1-based */ 709 KASSERT(i * LONG_BIT + idx < sc->hn_chim_cnt, 710 ("invalid i %d and idx %d", i, idx)); 711 712 if (atomic_testandset_long(&bmap[i], idx)) 713 continue; 714 715 ret = i * LONG_BIT + idx; 716 break; 717 } 718 return (ret); 719 } 720 721 static __inline void 722 hn_chim_free(struct hn_softc *sc, uint32_t chim_idx) 723 { 724 u_long mask; 725 uint32_t idx; 726 727 idx = chim_idx / LONG_BIT; 728 KASSERT(idx < sc->hn_chim_bmap_cnt, 729 ("invalid chimney index 0x%x", chim_idx)); 730 731 mask = 1UL << (chim_idx % LONG_BIT); 732 KASSERT(sc->hn_chim_bmap[idx] & mask, 733 ("index bitmap 0x%lx, chimney index %u, " 734 "bitmap idx %d, bitmask 0x%lx", 735 sc->hn_chim_bmap[idx], chim_idx, idx, mask)); 736 737 atomic_clear_long(&sc->hn_chim_bmap[idx], mask); 738 } 739 740 #if defined(INET6) || defined(INET) 741 742 #define PULLUP_HDR(m, len) \ 743 do { \ 744 if (__predict_false((m)->m_len < (len))) { \ 745 (m) = m_pullup((m), (len)); \ 746 if ((m) == NULL) \ 747 return (NULL); \ 748 } \ 749 } while (0) 750 751 /* 752 * NOTE: If this function failed, the m_head would be freed. 753 */ 754 static __inline struct mbuf * 755 hn_tso_fixup(struct mbuf *m_head) 756 { 757 struct ether_vlan_header *evl; 758 struct tcphdr *th; 759 int ehlen; 760 761 KASSERT(M_WRITABLE(m_head), ("TSO mbuf not writable")); 762 763 PULLUP_HDR(m_head, sizeof(*evl)); 764 evl = mtod(m_head, struct ether_vlan_header *); 765 if (evl->evl_encap_proto == ntohs(ETHERTYPE_VLAN)) 766 ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 767 else 768 ehlen = ETHER_HDR_LEN; 769 m_head->m_pkthdr.l2hlen = ehlen; 770 771 #ifdef INET 772 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) { 773 struct ip *ip; 774 int iphlen; 775 776 PULLUP_HDR(m_head, ehlen + sizeof(*ip)); 777 ip = mtodo(m_head, ehlen); 778 iphlen = ip->ip_hl << 2; 779 m_head->m_pkthdr.l3hlen = iphlen; 780 781 PULLUP_HDR(m_head, ehlen + iphlen + sizeof(*th)); 782 th = mtodo(m_head, ehlen + iphlen); 783 784 ip->ip_len = 0; 785 ip->ip_sum = 0; 786 th->th_sum = in_pseudo(ip->ip_src.s_addr, 787 ip->ip_dst.s_addr, htons(IPPROTO_TCP)); 788 } 789 #endif 790 #if defined(INET6) && defined(INET) 791 else 792 #endif 793 #ifdef INET6 794 { 795 struct ip6_hdr *ip6; 796 797 PULLUP_HDR(m_head, ehlen + sizeof(*ip6)); 798 ip6 = mtodo(m_head, ehlen); 799 if (ip6->ip6_nxt != IPPROTO_TCP) { 800 m_freem(m_head); 801 return (NULL); 802 } 803 m_head->m_pkthdr.l3hlen = sizeof(*ip6); 804 805 PULLUP_HDR(m_head, ehlen + sizeof(*ip6) + sizeof(*th)); 806 th = mtodo(m_head, ehlen + sizeof(*ip6)); 807 808 ip6->ip6_plen = 0; 809 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); 810 } 811 #endif 812 return (m_head); 813 } 814 815 /* 816 * NOTE: If this function failed, the m_head would be freed. 817 */ 818 static __inline struct mbuf * 819 hn_set_hlen(struct mbuf *m_head) 820 { 821 const struct ether_vlan_header *evl; 822 int ehlen; 823 824 PULLUP_HDR(m_head, sizeof(*evl)); 825 evl = mtod(m_head, const struct ether_vlan_header *); 826 if (evl->evl_encap_proto == ntohs(ETHERTYPE_VLAN)) 827 ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 828 else 829 ehlen = ETHER_HDR_LEN; 830 m_head->m_pkthdr.l2hlen = ehlen; 831 832 #ifdef INET 833 if (m_head->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP_UDP)) { 834 const struct ip *ip; 835 int iphlen; 836 837 PULLUP_HDR(m_head, ehlen + sizeof(*ip)); 838 ip = mtodo(m_head, ehlen); 839 iphlen = ip->ip_hl << 2; 840 m_head->m_pkthdr.l3hlen = iphlen; 841 842 /* 843 * UDP checksum offload does not work in Azure, if the 844 * following conditions meet: 845 * - sizeof(IP hdr + UDP hdr + payload) > 1420. 846 * - IP_DF is not set in the IP hdr. 847 * 848 * Fallback to software checksum for these UDP datagrams. 849 */ 850 if ((m_head->m_pkthdr.csum_flags & CSUM_IP_UDP) && 851 m_head->m_pkthdr.len > hn_udpcs_fixup_mtu + ehlen && 852 (ntohs(ip->ip_off) & IP_DF) == 0) { 853 uint16_t off = ehlen + iphlen; 854 855 counter_u64_add(hn_udpcs_fixup, 1); 856 PULLUP_HDR(m_head, off + sizeof(struct udphdr)); 857 *(uint16_t *)(m_head->m_data + off + 858 m_head->m_pkthdr.csum_data) = in_cksum_skip( 859 m_head, m_head->m_pkthdr.len, off); 860 m_head->m_pkthdr.csum_flags &= ~CSUM_IP_UDP; 861 } 862 } 863 #endif 864 #if defined(INET6) && defined(INET) 865 else 866 #endif 867 #ifdef INET6 868 { 869 const struct ip6_hdr *ip6; 870 871 PULLUP_HDR(m_head, ehlen + sizeof(*ip6)); 872 ip6 = mtodo(m_head, ehlen); 873 if (ip6->ip6_nxt != IPPROTO_TCP && 874 ip6->ip6_nxt != IPPROTO_UDP) { 875 m_freem(m_head); 876 return (NULL); 877 } 878 m_head->m_pkthdr.l3hlen = sizeof(*ip6); 879 } 880 #endif 881 return (m_head); 882 } 883 884 /* 885 * NOTE: If this function failed, the m_head would be freed. 886 */ 887 static __inline struct mbuf * 888 hn_check_tcpsyn(struct mbuf *m_head, int *tcpsyn) 889 { 890 const struct tcphdr *th; 891 int ehlen, iphlen; 892 893 *tcpsyn = 0; 894 ehlen = m_head->m_pkthdr.l2hlen; 895 iphlen = m_head->m_pkthdr.l3hlen; 896 897 PULLUP_HDR(m_head, ehlen + iphlen + sizeof(*th)); 898 th = mtodo(m_head, ehlen + iphlen); 899 if (th->th_flags & TH_SYN) 900 *tcpsyn = 1; 901 return (m_head); 902 } 903 904 #undef PULLUP_HDR 905 906 #endif /* INET6 || INET */ 907 908 static int 909 hn_set_rxfilter(struct hn_softc *sc, uint32_t filter) 910 { 911 int error = 0; 912 913 HN_LOCK_ASSERT(sc); 914 915 if (sc->hn_rx_filter != filter) { 916 error = hn_rndis_set_rxfilter(sc, filter); 917 if (!error) 918 sc->hn_rx_filter = filter; 919 } 920 return (error); 921 } 922 923 static int 924 hn_rxfilter_config(struct hn_softc *sc) 925 { 926 struct ifnet *ifp = sc->hn_ifp; 927 uint32_t filter; 928 929 HN_LOCK_ASSERT(sc); 930 931 /* 932 * If the non-transparent mode VF is activated, we don't know how 933 * its RX filter is configured, so stick the synthetic device in 934 * the promiscous mode. 935 */ 936 if ((ifp->if_flags & IFF_PROMISC) || (sc->hn_flags & HN_FLAG_RXVF)) { 937 filter = NDIS_PACKET_TYPE_PROMISCUOUS; 938 } else { 939 filter = NDIS_PACKET_TYPE_DIRECTED; 940 if (ifp->if_flags & IFF_BROADCAST) 941 filter |= NDIS_PACKET_TYPE_BROADCAST; 942 /* TODO: support multicast list */ 943 if ((ifp->if_flags & IFF_ALLMULTI) || 944 !CK_STAILQ_EMPTY(&ifp->if_multiaddrs)) 945 filter |= NDIS_PACKET_TYPE_ALL_MULTICAST; 946 } 947 return (hn_set_rxfilter(sc, filter)); 948 } 949 950 static void 951 hn_set_txagg(struct hn_softc *sc) 952 { 953 uint32_t size, pkts; 954 int i; 955 956 /* 957 * Setup aggregation size. 958 */ 959 if (sc->hn_agg_size < 0) 960 size = UINT32_MAX; 961 else 962 size = sc->hn_agg_size; 963 964 if (sc->hn_rndis_agg_size < size) 965 size = sc->hn_rndis_agg_size; 966 967 /* NOTE: We only aggregate packets using chimney sending buffers. */ 968 if (size > (uint32_t)sc->hn_chim_szmax) 969 size = sc->hn_chim_szmax; 970 971 if (size <= 2 * HN_PKTSIZE_MIN(sc->hn_rndis_agg_align)) { 972 /* Disable */ 973 size = 0; 974 pkts = 0; 975 goto done; 976 } 977 978 /* NOTE: Type of the per TX ring setting is 'int'. */ 979 if (size > INT_MAX) 980 size = INT_MAX; 981 982 /* 983 * Setup aggregation packet count. 984 */ 985 if (sc->hn_agg_pkts < 0) 986 pkts = UINT32_MAX; 987 else 988 pkts = sc->hn_agg_pkts; 989 990 if (sc->hn_rndis_agg_pkts < pkts) 991 pkts = sc->hn_rndis_agg_pkts; 992 993 if (pkts <= 1) { 994 /* Disable */ 995 size = 0; 996 pkts = 0; 997 goto done; 998 } 999 1000 /* NOTE: Type of the per TX ring setting is 'short'. */ 1001 if (pkts > SHRT_MAX) 1002 pkts = SHRT_MAX; 1003 1004 done: 1005 /* NOTE: Type of the per TX ring setting is 'short'. */ 1006 if (sc->hn_rndis_agg_align > SHRT_MAX) { 1007 /* Disable */ 1008 size = 0; 1009 pkts = 0; 1010 } 1011 1012 if (bootverbose) { 1013 if_printf(sc->hn_ifp, "TX agg size %u, pkts %u, align %u\n", 1014 size, pkts, sc->hn_rndis_agg_align); 1015 } 1016 1017 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 1018 struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; 1019 1020 mtx_lock(&txr->hn_tx_lock); 1021 txr->hn_agg_szmax = size; 1022 txr->hn_agg_pktmax = pkts; 1023 txr->hn_agg_align = sc->hn_rndis_agg_align; 1024 mtx_unlock(&txr->hn_tx_lock); 1025 } 1026 } 1027 1028 static int 1029 hn_get_txswq_depth(const struct hn_tx_ring *txr) 1030 { 1031 1032 KASSERT(txr->hn_txdesc_cnt > 0, ("tx ring is not setup yet")); 1033 if (hn_tx_swq_depth < txr->hn_txdesc_cnt) 1034 return txr->hn_txdesc_cnt; 1035 return hn_tx_swq_depth; 1036 } 1037 1038 static int 1039 hn_rss_reconfig(struct hn_softc *sc) 1040 { 1041 int error; 1042 1043 HN_LOCK_ASSERT(sc); 1044 1045 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) 1046 return (ENXIO); 1047 1048 /* 1049 * Disable RSS first. 1050 * 1051 * NOTE: 1052 * Direct reconfiguration by setting the UNCHG flags does 1053 * _not_ work properly. 1054 */ 1055 if (bootverbose) 1056 if_printf(sc->hn_ifp, "disable RSS\n"); 1057 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_DISABLE); 1058 if (error) { 1059 if_printf(sc->hn_ifp, "RSS disable failed\n"); 1060 return (error); 1061 } 1062 1063 /* 1064 * Reenable the RSS w/ the updated RSS key or indirect 1065 * table. 1066 */ 1067 if (bootverbose) 1068 if_printf(sc->hn_ifp, "reconfig RSS\n"); 1069 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE); 1070 if (error) { 1071 if_printf(sc->hn_ifp, "RSS reconfig failed\n"); 1072 return (error); 1073 } 1074 return (0); 1075 } 1076 1077 static void 1078 hn_rss_ind_fixup(struct hn_softc *sc) 1079 { 1080 struct ndis_rssprm_toeplitz *rss = &sc->hn_rss; 1081 int i, nchan; 1082 1083 nchan = sc->hn_rx_ring_inuse; 1084 KASSERT(nchan > 1, ("invalid # of channels %d", nchan)); 1085 1086 /* 1087 * Check indirect table to make sure that all channels in it 1088 * can be used. 1089 */ 1090 for (i = 0; i < NDIS_HASH_INDCNT; ++i) { 1091 if (rss->rss_ind[i] >= nchan) { 1092 if_printf(sc->hn_ifp, 1093 "RSS indirect table %d fixup: %u -> %d\n", 1094 i, rss->rss_ind[i], nchan - 1); 1095 rss->rss_ind[i] = nchan - 1; 1096 } 1097 } 1098 } 1099 1100 static int 1101 hn_ifmedia_upd(struct ifnet *ifp __unused) 1102 { 1103 1104 return EOPNOTSUPP; 1105 } 1106 1107 static void 1108 hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) 1109 { 1110 struct hn_softc *sc = ifp->if_softc; 1111 1112 ifmr->ifm_status = IFM_AVALID; 1113 ifmr->ifm_active = IFM_ETHER; 1114 1115 if ((sc->hn_link_flags & HN_LINK_FLAG_LINKUP) == 0) { 1116 ifmr->ifm_active |= IFM_NONE; 1117 return; 1118 } 1119 ifmr->ifm_status |= IFM_ACTIVE; 1120 ifmr->ifm_active |= IFM_10G_T | IFM_FDX; 1121 } 1122 1123 static void 1124 hn_rxvf_set_task(void *xarg, int pending __unused) 1125 { 1126 struct hn_rxvf_setarg *arg = xarg; 1127 1128 arg->rxr->hn_rxvf_ifp = arg->vf_ifp; 1129 } 1130 1131 static void 1132 hn_rxvf_set(struct hn_softc *sc, struct ifnet *vf_ifp) 1133 { 1134 struct hn_rx_ring *rxr; 1135 struct hn_rxvf_setarg arg; 1136 struct task task; 1137 int i; 1138 1139 HN_LOCK_ASSERT(sc); 1140 1141 TASK_INIT(&task, 0, hn_rxvf_set_task, &arg); 1142 1143 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 1144 rxr = &sc->hn_rx_ring[i]; 1145 1146 if (i < sc->hn_rx_ring_inuse) { 1147 arg.rxr = rxr; 1148 arg.vf_ifp = vf_ifp; 1149 vmbus_chan_run_task(rxr->hn_chan, &task); 1150 } else { 1151 rxr->hn_rxvf_ifp = vf_ifp; 1152 } 1153 } 1154 } 1155 1156 static bool 1157 hn_ismyvf(const struct hn_softc *sc, const struct ifnet *ifp) 1158 { 1159 const struct ifnet *hn_ifp; 1160 1161 hn_ifp = sc->hn_ifp; 1162 1163 if (ifp == hn_ifp) 1164 return (false); 1165 1166 if (ifp->if_alloctype != IFT_ETHER) 1167 return (false); 1168 1169 /* Ignore lagg/vlan interfaces */ 1170 if (strcmp(ifp->if_dname, "lagg") == 0 || 1171 strcmp(ifp->if_dname, "vlan") == 0) 1172 return (false); 1173 1174 /* 1175 * During detach events ifp->if_addr might be NULL. 1176 * Make sure the bcmp() below doesn't panic on that: 1177 */ 1178 if (ifp->if_addr == NULL || hn_ifp->if_addr == NULL) 1179 return (false); 1180 1181 if (bcmp(IF_LLADDR(ifp), IF_LLADDR(hn_ifp), ETHER_ADDR_LEN) != 0) 1182 return (false); 1183 1184 return (true); 1185 } 1186 1187 static void 1188 hn_rxvf_change(struct hn_softc *sc, struct ifnet *ifp, bool rxvf) 1189 { 1190 struct ifnet *hn_ifp; 1191 1192 HN_LOCK(sc); 1193 1194 if (!(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED)) 1195 goto out; 1196 1197 if (!hn_ismyvf(sc, ifp)) 1198 goto out; 1199 hn_ifp = sc->hn_ifp; 1200 1201 if (rxvf) { 1202 if (sc->hn_flags & HN_FLAG_RXVF) 1203 goto out; 1204 1205 sc->hn_flags |= HN_FLAG_RXVF; 1206 hn_rxfilter_config(sc); 1207 } else { 1208 if (!(sc->hn_flags & HN_FLAG_RXVF)) 1209 goto out; 1210 1211 sc->hn_flags &= ~HN_FLAG_RXVF; 1212 if (hn_ifp->if_drv_flags & IFF_DRV_RUNNING) 1213 hn_rxfilter_config(sc); 1214 else 1215 hn_set_rxfilter(sc, NDIS_PACKET_TYPE_NONE); 1216 } 1217 1218 hn_nvs_set_datapath(sc, 1219 rxvf ? HN_NVS_DATAPATH_VF : HN_NVS_DATAPATH_SYNTH); 1220 1221 hn_rxvf_set(sc, rxvf ? ifp : NULL); 1222 1223 if (rxvf) { 1224 hn_vf_rss_fixup(sc, true); 1225 hn_suspend_mgmt(sc); 1226 sc->hn_link_flags &= 1227 ~(HN_LINK_FLAG_LINKUP | HN_LINK_FLAG_NETCHG); 1228 if_link_state_change(hn_ifp, LINK_STATE_DOWN); 1229 } else { 1230 hn_vf_rss_restore(sc); 1231 hn_resume_mgmt(sc); 1232 } 1233 1234 devctl_notify("HYPERV_NIC_VF", hn_ifp->if_xname, 1235 rxvf ? "VF_UP" : "VF_DOWN", NULL); 1236 1237 if (bootverbose) { 1238 if_printf(hn_ifp, "datapath is switched %s %s\n", 1239 rxvf ? "to" : "from", ifp->if_xname); 1240 } 1241 out: 1242 HN_UNLOCK(sc); 1243 } 1244 1245 static void 1246 hn_ifnet_event(void *arg, struct ifnet *ifp, int event) 1247 { 1248 1249 if (event != IFNET_EVENT_UP && event != IFNET_EVENT_DOWN) 1250 return; 1251 hn_rxvf_change(arg, ifp, event == IFNET_EVENT_UP); 1252 } 1253 1254 static void 1255 hn_ifaddr_event(void *arg, struct ifnet *ifp) 1256 { 1257 1258 hn_rxvf_change(arg, ifp, ifp->if_flags & IFF_UP); 1259 } 1260 1261 static int 1262 hn_xpnt_vf_iocsetcaps(struct hn_softc *sc, struct ifreq *ifr) 1263 { 1264 struct ifnet *ifp, *vf_ifp; 1265 uint64_t tmp; 1266 int error; 1267 1268 HN_LOCK_ASSERT(sc); 1269 ifp = sc->hn_ifp; 1270 vf_ifp = sc->hn_vf_ifp; 1271 1272 /* 1273 * Fix up requested capabilities w/ supported capabilities, 1274 * since the supported capabilities could have been changed. 1275 */ 1276 ifr->ifr_reqcap &= ifp->if_capabilities; 1277 /* Pass SIOCSIFCAP to VF. */ 1278 error = vf_ifp->if_ioctl(vf_ifp, SIOCSIFCAP, (caddr_t)ifr); 1279 1280 /* 1281 * NOTE: 1282 * The error will be propagated to the callers, however, it 1283 * is _not_ useful here. 1284 */ 1285 1286 /* 1287 * Merge VF's enabled capabilities. 1288 */ 1289 ifp->if_capenable = vf_ifp->if_capenable & ifp->if_capabilities; 1290 1291 tmp = vf_ifp->if_hwassist & HN_CSUM_IP_HWASSIST(sc); 1292 if (ifp->if_capenable & IFCAP_TXCSUM) 1293 ifp->if_hwassist |= tmp; 1294 else 1295 ifp->if_hwassist &= ~tmp; 1296 1297 tmp = vf_ifp->if_hwassist & HN_CSUM_IP6_HWASSIST(sc); 1298 if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) 1299 ifp->if_hwassist |= tmp; 1300 else 1301 ifp->if_hwassist &= ~tmp; 1302 1303 tmp = vf_ifp->if_hwassist & CSUM_IP_TSO; 1304 if (ifp->if_capenable & IFCAP_TSO4) 1305 ifp->if_hwassist |= tmp; 1306 else 1307 ifp->if_hwassist &= ~tmp; 1308 1309 tmp = vf_ifp->if_hwassist & CSUM_IP6_TSO; 1310 if (ifp->if_capenable & IFCAP_TSO6) 1311 ifp->if_hwassist |= tmp; 1312 else 1313 ifp->if_hwassist &= ~tmp; 1314 1315 return (error); 1316 } 1317 1318 static int 1319 hn_xpnt_vf_iocsetflags(struct hn_softc *sc) 1320 { 1321 struct ifnet *vf_ifp; 1322 struct ifreq ifr; 1323 1324 HN_LOCK_ASSERT(sc); 1325 vf_ifp = sc->hn_vf_ifp; 1326 1327 memset(&ifr, 0, sizeof(ifr)); 1328 strlcpy(ifr.ifr_name, vf_ifp->if_xname, sizeof(ifr.ifr_name)); 1329 ifr.ifr_flags = vf_ifp->if_flags & 0xffff; 1330 ifr.ifr_flagshigh = vf_ifp->if_flags >> 16; 1331 return (vf_ifp->if_ioctl(vf_ifp, SIOCSIFFLAGS, (caddr_t)&ifr)); 1332 } 1333 1334 static void 1335 hn_xpnt_vf_saveifflags(struct hn_softc *sc) 1336 { 1337 struct ifnet *ifp = sc->hn_ifp; 1338 int allmulti = 0; 1339 1340 HN_LOCK_ASSERT(sc); 1341 1342 /* XXX vlan(4) style mcast addr maintenance */ 1343 if (!CK_STAILQ_EMPTY(&ifp->if_multiaddrs)) 1344 allmulti = IFF_ALLMULTI; 1345 1346 /* Always set the VF's if_flags */ 1347 sc->hn_vf_ifp->if_flags = ifp->if_flags | allmulti; 1348 } 1349 1350 static void 1351 hn_xpnt_vf_input(struct ifnet *vf_ifp, struct mbuf *m) 1352 { 1353 struct rm_priotracker pt; 1354 struct ifnet *hn_ifp = NULL; 1355 struct mbuf *mn; 1356 1357 /* 1358 * XXX racy, if hn(4) ever detached. 1359 */ 1360 rm_rlock(&hn_vfmap_lock, &pt); 1361 if (vf_ifp->if_index < hn_vfmap_size) 1362 hn_ifp = hn_vfmap[vf_ifp->if_index]; 1363 rm_runlock(&hn_vfmap_lock, &pt); 1364 1365 if (hn_ifp != NULL) { 1366 for (mn = m; mn != NULL; mn = mn->m_nextpkt) { 1367 /* 1368 * Allow tapping on the VF. 1369 */ 1370 ETHER_BPF_MTAP(vf_ifp, mn); 1371 1372 /* 1373 * Update VF stats. 1374 */ 1375 if ((vf_ifp->if_capenable & IFCAP_HWSTATS) == 0) { 1376 if_inc_counter(vf_ifp, IFCOUNTER_IBYTES, 1377 mn->m_pkthdr.len); 1378 } 1379 /* 1380 * XXX IFCOUNTER_IMCAST 1381 * This stat updating is kinda invasive, since it 1382 * requires two checks on the mbuf: the length check 1383 * and the ethernet header check. As of this write, 1384 * all multicast packets go directly to hn(4), which 1385 * makes imcast stat updating in the VF a try in vian. 1386 */ 1387 1388 /* 1389 * Fix up rcvif and increase hn(4)'s ipackets. 1390 */ 1391 mn->m_pkthdr.rcvif = hn_ifp; 1392 if_inc_counter(hn_ifp, IFCOUNTER_IPACKETS, 1); 1393 } 1394 /* 1395 * Go through hn(4)'s if_input. 1396 */ 1397 hn_ifp->if_input(hn_ifp, m); 1398 } else { 1399 /* 1400 * In the middle of the transition; free this 1401 * mbuf chain. 1402 */ 1403 while (m != NULL) { 1404 mn = m->m_nextpkt; 1405 m->m_nextpkt = NULL; 1406 m_freem(m); 1407 m = mn; 1408 } 1409 } 1410 } 1411 1412 static void 1413 hn_mtu_change_fixup(struct hn_softc *sc) 1414 { 1415 struct ifnet *ifp; 1416 1417 HN_LOCK_ASSERT(sc); 1418 ifp = sc->hn_ifp; 1419 1420 hn_set_tso_maxsize(sc, hn_tso_maxlen, ifp->if_mtu); 1421 #if __FreeBSD_version >= 1100099 1422 if (sc->hn_rx_ring[0].hn_lro.lro_length_lim < HN_LRO_LENLIM_MIN(ifp)) 1423 hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MIN(ifp)); 1424 #endif 1425 } 1426 1427 static uint32_t 1428 hn_rss_type_fromndis(uint32_t rss_hash) 1429 { 1430 uint32_t types = 0; 1431 1432 if (rss_hash & NDIS_HASH_IPV4) 1433 types |= RSS_TYPE_IPV4; 1434 if (rss_hash & NDIS_HASH_TCP_IPV4) 1435 types |= RSS_TYPE_TCP_IPV4; 1436 if (rss_hash & NDIS_HASH_IPV6) 1437 types |= RSS_TYPE_IPV6; 1438 if (rss_hash & NDIS_HASH_IPV6_EX) 1439 types |= RSS_TYPE_IPV6_EX; 1440 if (rss_hash & NDIS_HASH_TCP_IPV6) 1441 types |= RSS_TYPE_TCP_IPV6; 1442 if (rss_hash & NDIS_HASH_TCP_IPV6_EX) 1443 types |= RSS_TYPE_TCP_IPV6_EX; 1444 if (rss_hash & NDIS_HASH_UDP_IPV4_X) 1445 types |= RSS_TYPE_UDP_IPV4; 1446 return (types); 1447 } 1448 1449 static uint32_t 1450 hn_rss_type_tondis(uint32_t types) 1451 { 1452 uint32_t rss_hash = 0; 1453 1454 KASSERT((types & (RSS_TYPE_UDP_IPV6 | RSS_TYPE_UDP_IPV6_EX)) == 0, 1455 ("UDP6 and UDP6EX are not supported")); 1456 1457 if (types & RSS_TYPE_IPV4) 1458 rss_hash |= NDIS_HASH_IPV4; 1459 if (types & RSS_TYPE_TCP_IPV4) 1460 rss_hash |= NDIS_HASH_TCP_IPV4; 1461 if (types & RSS_TYPE_IPV6) 1462 rss_hash |= NDIS_HASH_IPV6; 1463 if (types & RSS_TYPE_IPV6_EX) 1464 rss_hash |= NDIS_HASH_IPV6_EX; 1465 if (types & RSS_TYPE_TCP_IPV6) 1466 rss_hash |= NDIS_HASH_TCP_IPV6; 1467 if (types & RSS_TYPE_TCP_IPV6_EX) 1468 rss_hash |= NDIS_HASH_TCP_IPV6_EX; 1469 if (types & RSS_TYPE_UDP_IPV4) 1470 rss_hash |= NDIS_HASH_UDP_IPV4_X; 1471 return (rss_hash); 1472 } 1473 1474 static void 1475 hn_rss_mbuf_hash(struct hn_softc *sc, uint32_t mbuf_hash) 1476 { 1477 int i; 1478 1479 HN_LOCK_ASSERT(sc); 1480 1481 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 1482 sc->hn_rx_ring[i].hn_mbuf_hash = mbuf_hash; 1483 } 1484 1485 static void 1486 hn_vf_rss_fixup(struct hn_softc *sc, bool reconf) 1487 { 1488 struct ifnet *ifp, *vf_ifp; 1489 struct ifrsshash ifrh; 1490 struct ifrsskey ifrk; 1491 int error; 1492 uint32_t my_types, diff_types, mbuf_types = 0; 1493 1494 HN_LOCK_ASSERT(sc); 1495 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, 1496 ("%s: synthetic parts are not attached", sc->hn_ifp->if_xname)); 1497 1498 if (sc->hn_rx_ring_inuse == 1) { 1499 /* No RSS on synthetic parts; done. */ 1500 return; 1501 } 1502 if ((sc->hn_rss_hcap & NDIS_HASH_FUNCTION_TOEPLITZ) == 0) { 1503 /* Synthetic parts do not support Toeplitz; done. */ 1504 return; 1505 } 1506 1507 ifp = sc->hn_ifp; 1508 vf_ifp = sc->hn_vf_ifp; 1509 1510 /* 1511 * Extract VF's RSS key. Only 40 bytes key for Toeplitz is 1512 * supported. 1513 */ 1514 memset(&ifrk, 0, sizeof(ifrk)); 1515 strlcpy(ifrk.ifrk_name, vf_ifp->if_xname, sizeof(ifrk.ifrk_name)); 1516 error = vf_ifp->if_ioctl(vf_ifp, SIOCGIFRSSKEY, (caddr_t)&ifrk); 1517 if (error) { 1518 if_printf(ifp, "%s SIOCGIFRSSKEY failed: %d\n", 1519 vf_ifp->if_xname, error); 1520 goto done; 1521 } 1522 if (ifrk.ifrk_func != RSS_FUNC_TOEPLITZ) { 1523 if_printf(ifp, "%s RSS function %u is not Toeplitz\n", 1524 vf_ifp->if_xname, ifrk.ifrk_func); 1525 goto done; 1526 } 1527 if (ifrk.ifrk_keylen != NDIS_HASH_KEYSIZE_TOEPLITZ) { 1528 if_printf(ifp, "%s invalid RSS Toeplitz key length %d\n", 1529 vf_ifp->if_xname, ifrk.ifrk_keylen); 1530 goto done; 1531 } 1532 1533 /* 1534 * Extract VF's RSS hash. Only Toeplitz is supported. 1535 */ 1536 memset(&ifrh, 0, sizeof(ifrh)); 1537 strlcpy(ifrh.ifrh_name, vf_ifp->if_xname, sizeof(ifrh.ifrh_name)); 1538 error = vf_ifp->if_ioctl(vf_ifp, SIOCGIFRSSHASH, (caddr_t)&ifrh); 1539 if (error) { 1540 if_printf(ifp, "%s SIOCGRSSHASH failed: %d\n", 1541 vf_ifp->if_xname, error); 1542 goto done; 1543 } 1544 if (ifrh.ifrh_func != RSS_FUNC_TOEPLITZ) { 1545 if_printf(ifp, "%s RSS function %u is not Toeplitz\n", 1546 vf_ifp->if_xname, ifrh.ifrh_func); 1547 goto done; 1548 } 1549 1550 my_types = hn_rss_type_fromndis(sc->hn_rss_hcap); 1551 if ((ifrh.ifrh_types & my_types) == 0) { 1552 /* This disables RSS; ignore it then */ 1553 if_printf(ifp, "%s intersection of RSS types failed. " 1554 "VF %#x, mine %#x\n", vf_ifp->if_xname, 1555 ifrh.ifrh_types, my_types); 1556 goto done; 1557 } 1558 1559 diff_types = my_types ^ ifrh.ifrh_types; 1560 my_types &= ifrh.ifrh_types; 1561 mbuf_types = my_types; 1562 1563 /* 1564 * Detect RSS hash value/type confliction. 1565 * 1566 * NOTE: 1567 * We don't disable the hash type, but stop delivery the hash 1568 * value/type through mbufs on RX path. 1569 * 1570 * XXX If HN_CAP_UDPHASH is set in hn_caps, then UDP 4-tuple 1571 * hash is delivered with type of TCP_IPV4. This means if 1572 * UDP_IPV4 is enabled, then TCP_IPV4 should be forced, at 1573 * least to hn_mbuf_hash. However, given that _all_ of the 1574 * NICs implement TCP_IPV4, this will _not_ impose any issues 1575 * here. 1576 */ 1577 if ((my_types & RSS_TYPE_IPV4) && 1578 (diff_types & ifrh.ifrh_types & 1579 (RSS_TYPE_TCP_IPV4 | RSS_TYPE_UDP_IPV4))) { 1580 /* Conflict; disable IPV4 hash type/value delivery. */ 1581 if_printf(ifp, "disable IPV4 mbuf hash delivery\n"); 1582 mbuf_types &= ~RSS_TYPE_IPV4; 1583 } 1584 if ((my_types & RSS_TYPE_IPV6) && 1585 (diff_types & ifrh.ifrh_types & 1586 (RSS_TYPE_TCP_IPV6 | RSS_TYPE_UDP_IPV6 | 1587 RSS_TYPE_TCP_IPV6_EX | RSS_TYPE_UDP_IPV6_EX | 1588 RSS_TYPE_IPV6_EX))) { 1589 /* Conflict; disable IPV6 hash type/value delivery. */ 1590 if_printf(ifp, "disable IPV6 mbuf hash delivery\n"); 1591 mbuf_types &= ~RSS_TYPE_IPV6; 1592 } 1593 if ((my_types & RSS_TYPE_IPV6_EX) && 1594 (diff_types & ifrh.ifrh_types & 1595 (RSS_TYPE_TCP_IPV6 | RSS_TYPE_UDP_IPV6 | 1596 RSS_TYPE_TCP_IPV6_EX | RSS_TYPE_UDP_IPV6_EX | 1597 RSS_TYPE_IPV6))) { 1598 /* Conflict; disable IPV6_EX hash type/value delivery. */ 1599 if_printf(ifp, "disable IPV6_EX mbuf hash delivery\n"); 1600 mbuf_types &= ~RSS_TYPE_IPV6_EX; 1601 } 1602 if ((my_types & RSS_TYPE_TCP_IPV6) && 1603 (diff_types & ifrh.ifrh_types & RSS_TYPE_TCP_IPV6_EX)) { 1604 /* Conflict; disable TCP_IPV6 hash type/value delivery. */ 1605 if_printf(ifp, "disable TCP_IPV6 mbuf hash delivery\n"); 1606 mbuf_types &= ~RSS_TYPE_TCP_IPV6; 1607 } 1608 if ((my_types & RSS_TYPE_TCP_IPV6_EX) && 1609 (diff_types & ifrh.ifrh_types & RSS_TYPE_TCP_IPV6)) { 1610 /* Conflict; disable TCP_IPV6_EX hash type/value delivery. */ 1611 if_printf(ifp, "disable TCP_IPV6_EX mbuf hash delivery\n"); 1612 mbuf_types &= ~RSS_TYPE_TCP_IPV6_EX; 1613 } 1614 if ((my_types & RSS_TYPE_UDP_IPV6) && 1615 (diff_types & ifrh.ifrh_types & RSS_TYPE_UDP_IPV6_EX)) { 1616 /* Conflict; disable UDP_IPV6 hash type/value delivery. */ 1617 if_printf(ifp, "disable UDP_IPV6 mbuf hash delivery\n"); 1618 mbuf_types &= ~RSS_TYPE_UDP_IPV6; 1619 } 1620 if ((my_types & RSS_TYPE_UDP_IPV6_EX) && 1621 (diff_types & ifrh.ifrh_types & RSS_TYPE_UDP_IPV6)) { 1622 /* Conflict; disable UDP_IPV6_EX hash type/value delivery. */ 1623 if_printf(ifp, "disable UDP_IPV6_EX mbuf hash delivery\n"); 1624 mbuf_types &= ~RSS_TYPE_UDP_IPV6_EX; 1625 } 1626 1627 /* 1628 * Indirect table does not matter. 1629 */ 1630 1631 sc->hn_rss_hash = (sc->hn_rss_hcap & NDIS_HASH_FUNCTION_MASK) | 1632 hn_rss_type_tondis(my_types); 1633 memcpy(sc->hn_rss.rss_key, ifrk.ifrk_key, sizeof(sc->hn_rss.rss_key)); 1634 sc->hn_flags |= HN_FLAG_HAS_RSSKEY; 1635 1636 if (reconf) { 1637 error = hn_rss_reconfig(sc); 1638 if (error) { 1639 /* XXX roll-back? */ 1640 if_printf(ifp, "hn_rss_reconfig failed: %d\n", error); 1641 /* XXX keep going. */ 1642 } 1643 } 1644 done: 1645 /* Hash deliverability for mbufs. */ 1646 hn_rss_mbuf_hash(sc, hn_rss_type_tondis(mbuf_types)); 1647 } 1648 1649 static void 1650 hn_vf_rss_restore(struct hn_softc *sc) 1651 { 1652 1653 HN_LOCK_ASSERT(sc); 1654 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, 1655 ("%s: synthetic parts are not attached", sc->hn_ifp->if_xname)); 1656 1657 if (sc->hn_rx_ring_inuse == 1) 1658 goto done; 1659 1660 /* 1661 * Restore hash types. Key does _not_ matter. 1662 */ 1663 if (sc->hn_rss_hash != sc->hn_rss_hcap) { 1664 int error; 1665 1666 sc->hn_rss_hash = sc->hn_rss_hcap; 1667 error = hn_rss_reconfig(sc); 1668 if (error) { 1669 if_printf(sc->hn_ifp, "hn_rss_reconfig failed: %d\n", 1670 error); 1671 /* XXX keep going. */ 1672 } 1673 } 1674 done: 1675 /* Hash deliverability for mbufs. */ 1676 hn_rss_mbuf_hash(sc, NDIS_HASH_ALL); 1677 } 1678 1679 static void 1680 hn_xpnt_vf_setready(struct hn_softc *sc) 1681 { 1682 struct ifnet *ifp, *vf_ifp; 1683 struct ifreq ifr; 1684 1685 HN_LOCK_ASSERT(sc); 1686 ifp = sc->hn_ifp; 1687 vf_ifp = sc->hn_vf_ifp; 1688 1689 /* 1690 * Mark the VF ready. 1691 */ 1692 sc->hn_vf_rdytick = 0; 1693 1694 /* 1695 * Save information for restoration. 1696 */ 1697 sc->hn_saved_caps = ifp->if_capabilities; 1698 sc->hn_saved_tsomax = ifp->if_hw_tsomax; 1699 sc->hn_saved_tsosegcnt = ifp->if_hw_tsomaxsegcount; 1700 sc->hn_saved_tsosegsz = ifp->if_hw_tsomaxsegsize; 1701 1702 /* 1703 * Intersect supported/enabled capabilities. 1704 * 1705 * NOTE: 1706 * if_hwassist is not changed here. 1707 */ 1708 ifp->if_capabilities &= vf_ifp->if_capabilities; 1709 ifp->if_capenable &= ifp->if_capabilities; 1710 1711 /* 1712 * Fix TSO settings. 1713 */ 1714 if (ifp->if_hw_tsomax > vf_ifp->if_hw_tsomax) 1715 ifp->if_hw_tsomax = vf_ifp->if_hw_tsomax; 1716 if (ifp->if_hw_tsomaxsegcount > vf_ifp->if_hw_tsomaxsegcount) 1717 ifp->if_hw_tsomaxsegcount = vf_ifp->if_hw_tsomaxsegcount; 1718 if (ifp->if_hw_tsomaxsegsize > vf_ifp->if_hw_tsomaxsegsize) 1719 ifp->if_hw_tsomaxsegsize = vf_ifp->if_hw_tsomaxsegsize; 1720 1721 /* 1722 * Change VF's enabled capabilities. 1723 */ 1724 memset(&ifr, 0, sizeof(ifr)); 1725 strlcpy(ifr.ifr_name, vf_ifp->if_xname, sizeof(ifr.ifr_name)); 1726 ifr.ifr_reqcap = ifp->if_capenable; 1727 hn_xpnt_vf_iocsetcaps(sc, &ifr); 1728 1729 if (ifp->if_mtu != ETHERMTU) { 1730 int error; 1731 1732 /* 1733 * Change VF's MTU. 1734 */ 1735 memset(&ifr, 0, sizeof(ifr)); 1736 strlcpy(ifr.ifr_name, vf_ifp->if_xname, sizeof(ifr.ifr_name)); 1737 ifr.ifr_mtu = ifp->if_mtu; 1738 error = vf_ifp->if_ioctl(vf_ifp, SIOCSIFMTU, (caddr_t)&ifr); 1739 if (error) { 1740 if_printf(ifp, "%s SIOCSIFMTU %u failed\n", 1741 vf_ifp->if_xname, ifp->if_mtu); 1742 if (ifp->if_mtu > ETHERMTU) { 1743 if_printf(ifp, "change MTU to %d\n", ETHERMTU); 1744 1745 /* 1746 * XXX 1747 * No need to adjust the synthetic parts' MTU; 1748 * failure of the adjustment will cause us 1749 * infinite headache. 1750 */ 1751 ifp->if_mtu = ETHERMTU; 1752 hn_mtu_change_fixup(sc); 1753 } 1754 } 1755 } 1756 } 1757 1758 static bool 1759 hn_xpnt_vf_isready(struct hn_softc *sc) 1760 { 1761 1762 HN_LOCK_ASSERT(sc); 1763 1764 if (!hn_xpnt_vf || sc->hn_vf_ifp == NULL) 1765 return (false); 1766 1767 if (sc->hn_vf_rdytick == 0) 1768 return (true); 1769 1770 if (sc->hn_vf_rdytick > ticks) 1771 return (false); 1772 1773 /* Mark VF as ready. */ 1774 hn_xpnt_vf_setready(sc); 1775 return (true); 1776 } 1777 1778 static void 1779 hn_xpnt_vf_setenable(struct hn_softc *sc) 1780 { 1781 int i; 1782 1783 HN_LOCK_ASSERT(sc); 1784 1785 /* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */ 1786 rm_wlock(&sc->hn_vf_lock); 1787 sc->hn_xvf_flags |= HN_XVFFLAG_ENABLED; 1788 rm_wunlock(&sc->hn_vf_lock); 1789 1790 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 1791 sc->hn_rx_ring[i].hn_rx_flags |= HN_RX_FLAG_XPNT_VF; 1792 } 1793 1794 static void 1795 hn_xpnt_vf_setdisable(struct hn_softc *sc, bool clear_vf) 1796 { 1797 int i; 1798 1799 HN_LOCK_ASSERT(sc); 1800 1801 /* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */ 1802 rm_wlock(&sc->hn_vf_lock); 1803 sc->hn_xvf_flags &= ~HN_XVFFLAG_ENABLED; 1804 if (clear_vf) 1805 sc->hn_vf_ifp = NULL; 1806 rm_wunlock(&sc->hn_vf_lock); 1807 1808 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 1809 sc->hn_rx_ring[i].hn_rx_flags &= ~HN_RX_FLAG_XPNT_VF; 1810 } 1811 1812 static void 1813 hn_xpnt_vf_init(struct hn_softc *sc) 1814 { 1815 int error; 1816 1817 HN_LOCK_ASSERT(sc); 1818 1819 KASSERT((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) == 0, 1820 ("%s: transparent VF was enabled", sc->hn_ifp->if_xname)); 1821 1822 if (bootverbose) { 1823 if_printf(sc->hn_ifp, "try bringing up %s\n", 1824 sc->hn_vf_ifp->if_xname); 1825 } 1826 1827 /* 1828 * Bring the VF up. 1829 */ 1830 hn_xpnt_vf_saveifflags(sc); 1831 sc->hn_vf_ifp->if_flags |= IFF_UP; 1832 error = hn_xpnt_vf_iocsetflags(sc); 1833 if (error) { 1834 if_printf(sc->hn_ifp, "bringing up %s failed: %d\n", 1835 sc->hn_vf_ifp->if_xname, error); 1836 return; 1837 } 1838 1839 /* 1840 * NOTE: 1841 * Datapath setting must happen _after_ bringing the VF up. 1842 */ 1843 hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_VF); 1844 1845 /* 1846 * NOTE: 1847 * Fixup RSS related bits _after_ the VF is brought up, since 1848 * many VFs generate RSS key during it's initialization. 1849 */ 1850 hn_vf_rss_fixup(sc, true); 1851 1852 /* Mark transparent mode VF as enabled. */ 1853 hn_xpnt_vf_setenable(sc); 1854 } 1855 1856 static void 1857 hn_xpnt_vf_init_taskfunc(void *xsc, int pending __unused) 1858 { 1859 struct hn_softc *sc = xsc; 1860 1861 HN_LOCK(sc); 1862 1863 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) 1864 goto done; 1865 if (sc->hn_vf_ifp == NULL) 1866 goto done; 1867 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) 1868 goto done; 1869 1870 if (sc->hn_vf_rdytick != 0) { 1871 /* Mark VF as ready. */ 1872 hn_xpnt_vf_setready(sc); 1873 } 1874 1875 if (sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) { 1876 /* 1877 * Delayed VF initialization. 1878 */ 1879 if (bootverbose) { 1880 if_printf(sc->hn_ifp, "delayed initialize %s\n", 1881 sc->hn_vf_ifp->if_xname); 1882 } 1883 hn_xpnt_vf_init(sc); 1884 } 1885 done: 1886 HN_UNLOCK(sc); 1887 } 1888 1889 static void 1890 hn_ifnet_attevent(void *xsc, struct ifnet *ifp) 1891 { 1892 struct hn_softc *sc = xsc; 1893 1894 HN_LOCK(sc); 1895 1896 if (!(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED)) 1897 goto done; 1898 1899 if (!hn_ismyvf(sc, ifp)) 1900 goto done; 1901 1902 if (sc->hn_vf_ifp != NULL) { 1903 if_printf(sc->hn_ifp, "%s was attached as VF\n", 1904 sc->hn_vf_ifp->if_xname); 1905 goto done; 1906 } 1907 1908 if (hn_xpnt_vf && ifp->if_start != NULL) { 1909 /* 1910 * ifnet.if_start is _not_ supported by transparent 1911 * mode VF; mainly due to the IFF_DRV_OACTIVE flag. 1912 */ 1913 if_printf(sc->hn_ifp, "%s uses if_start, which is unsupported " 1914 "in transparent VF mode.\n", ifp->if_xname); 1915 goto done; 1916 } 1917 1918 rm_wlock(&hn_vfmap_lock); 1919 1920 if (ifp->if_index >= hn_vfmap_size) { 1921 struct ifnet **newmap; 1922 int newsize; 1923 1924 newsize = ifp->if_index + HN_VFMAP_SIZE_DEF; 1925 newmap = malloc(sizeof(struct ifnet *) * newsize, M_DEVBUF, 1926 M_WAITOK | M_ZERO); 1927 1928 memcpy(newmap, hn_vfmap, 1929 sizeof(struct ifnet *) * hn_vfmap_size); 1930 free(hn_vfmap, M_DEVBUF); 1931 hn_vfmap = newmap; 1932 hn_vfmap_size = newsize; 1933 } 1934 KASSERT(hn_vfmap[ifp->if_index] == NULL, 1935 ("%s: ifindex %d was mapped to %s", 1936 ifp->if_xname, ifp->if_index, hn_vfmap[ifp->if_index]->if_xname)); 1937 hn_vfmap[ifp->if_index] = sc->hn_ifp; 1938 1939 rm_wunlock(&hn_vfmap_lock); 1940 1941 /* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */ 1942 rm_wlock(&sc->hn_vf_lock); 1943 KASSERT((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) == 0, 1944 ("%s: transparent VF was enabled", sc->hn_ifp->if_xname)); 1945 sc->hn_vf_ifp = ifp; 1946 rm_wunlock(&sc->hn_vf_lock); 1947 1948 if (hn_xpnt_vf) { 1949 int wait_ticks; 1950 1951 /* 1952 * Install if_input for vf_ifp, which does vf_ifp -> hn_ifp. 1953 * Save vf_ifp's current if_input for later restoration. 1954 */ 1955 sc->hn_vf_input = ifp->if_input; 1956 ifp->if_input = hn_xpnt_vf_input; 1957 1958 /* 1959 * Stop link status management; use the VF's. 1960 */ 1961 hn_suspend_mgmt(sc); 1962 1963 /* 1964 * Give VF sometime to complete its attach routing. 1965 */ 1966 wait_ticks = hn_xpnt_vf_attwait * hz; 1967 sc->hn_vf_rdytick = ticks + wait_ticks; 1968 1969 taskqueue_enqueue_timeout(sc->hn_vf_taskq, &sc->hn_vf_init, 1970 wait_ticks); 1971 } 1972 done: 1973 HN_UNLOCK(sc); 1974 } 1975 1976 static void 1977 hn_ifnet_detevent(void *xsc, struct ifnet *ifp) 1978 { 1979 struct hn_softc *sc = xsc; 1980 1981 HN_LOCK(sc); 1982 1983 if (sc->hn_vf_ifp == NULL) 1984 goto done; 1985 1986 if (!hn_ismyvf(sc, ifp)) 1987 goto done; 1988 1989 if (hn_xpnt_vf) { 1990 /* 1991 * Make sure that the delayed initialization is not running. 1992 * 1993 * NOTE: 1994 * - This lock _must_ be released, since the hn_vf_init task 1995 * will try holding this lock. 1996 * - It is safe to release this lock here, since the 1997 * hn_ifnet_attevent() is interlocked by the hn_vf_ifp. 1998 * 1999 * XXX racy, if hn(4) ever detached. 2000 */ 2001 HN_UNLOCK(sc); 2002 taskqueue_drain_timeout(sc->hn_vf_taskq, &sc->hn_vf_init); 2003 HN_LOCK(sc); 2004 2005 KASSERT(sc->hn_vf_input != NULL, ("%s VF input is not saved", 2006 sc->hn_ifp->if_xname)); 2007 ifp->if_input = sc->hn_vf_input; 2008 sc->hn_vf_input = NULL; 2009 2010 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) && 2011 (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) 2012 hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_SYNTH); 2013 2014 if (sc->hn_vf_rdytick == 0) { 2015 /* 2016 * The VF was ready; restore some settings. 2017 */ 2018 sc->hn_ifp->if_capabilities = sc->hn_saved_caps; 2019 /* 2020 * NOTE: 2021 * There is _no_ need to fixup if_capenable and 2022 * if_hwassist, since the if_capabilities before 2023 * restoration was an intersection of the VF's 2024 * if_capabilites and the synthetic device's 2025 * if_capabilites. 2026 */ 2027 sc->hn_ifp->if_hw_tsomax = sc->hn_saved_tsomax; 2028 sc->hn_ifp->if_hw_tsomaxsegcount = 2029 sc->hn_saved_tsosegcnt; 2030 sc->hn_ifp->if_hw_tsomaxsegsize = sc->hn_saved_tsosegsz; 2031 } 2032 2033 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) { 2034 /* 2035 * Restore RSS settings. 2036 */ 2037 hn_vf_rss_restore(sc); 2038 2039 /* 2040 * Resume link status management, which was suspended 2041 * by hn_ifnet_attevent(). 2042 */ 2043 hn_resume_mgmt(sc); 2044 } 2045 } 2046 2047 /* Mark transparent mode VF as disabled. */ 2048 hn_xpnt_vf_setdisable(sc, true /* clear hn_vf_ifp */); 2049 2050 rm_wlock(&hn_vfmap_lock); 2051 2052 KASSERT(ifp->if_index < hn_vfmap_size, 2053 ("ifindex %d, vfmapsize %d", ifp->if_index, hn_vfmap_size)); 2054 if (hn_vfmap[ifp->if_index] != NULL) { 2055 KASSERT(hn_vfmap[ifp->if_index] == sc->hn_ifp, 2056 ("%s: ifindex %d was mapped to %s", 2057 ifp->if_xname, ifp->if_index, 2058 hn_vfmap[ifp->if_index]->if_xname)); 2059 hn_vfmap[ifp->if_index] = NULL; 2060 } 2061 2062 rm_wunlock(&hn_vfmap_lock); 2063 done: 2064 HN_UNLOCK(sc); 2065 } 2066 2067 static void 2068 hn_ifnet_lnkevent(void *xsc, struct ifnet *ifp, int link_state) 2069 { 2070 struct hn_softc *sc = xsc; 2071 2072 if (sc->hn_vf_ifp == ifp) 2073 if_link_state_change(sc->hn_ifp, link_state); 2074 } 2075 2076 static int 2077 hn_probe(device_t dev) 2078 { 2079 2080 if (VMBUS_PROBE_GUID(device_get_parent(dev), dev, &hn_guid) == 0) { 2081 device_set_desc(dev, "Hyper-V Network Interface"); 2082 return BUS_PROBE_DEFAULT; 2083 } 2084 return ENXIO; 2085 } 2086 2087 static int 2088 hn_attach(device_t dev) 2089 { 2090 struct hn_softc *sc = device_get_softc(dev); 2091 struct sysctl_oid_list *child; 2092 struct sysctl_ctx_list *ctx; 2093 uint8_t eaddr[ETHER_ADDR_LEN]; 2094 struct ifnet *ifp = NULL; 2095 int error, ring_cnt, tx_ring_cnt; 2096 uint32_t mtu; 2097 2098 sc->hn_dev = dev; 2099 sc->hn_prichan = vmbus_get_channel(dev); 2100 HN_LOCK_INIT(sc); 2101 rm_init(&sc->hn_vf_lock, "hnvf"); 2102 if (hn_xpnt_vf && hn_xpnt_vf_accbpf) 2103 sc->hn_xvf_flags |= HN_XVFFLAG_ACCBPF; 2104 2105 /* 2106 * Initialize these tunables once. 2107 */ 2108 sc->hn_agg_size = hn_tx_agg_size; 2109 sc->hn_agg_pkts = hn_tx_agg_pkts; 2110 2111 /* 2112 * Setup taskqueue for transmission. 2113 */ 2114 if (hn_tx_taskq_mode == HN_TX_TASKQ_M_INDEP) { 2115 int i; 2116 2117 sc->hn_tx_taskqs = 2118 malloc(hn_tx_taskq_cnt * sizeof(struct taskqueue *), 2119 M_DEVBUF, M_WAITOK); 2120 for (i = 0; i < hn_tx_taskq_cnt; ++i) { 2121 sc->hn_tx_taskqs[i] = taskqueue_create("hn_tx", 2122 M_WAITOK, taskqueue_thread_enqueue, 2123 &sc->hn_tx_taskqs[i]); 2124 taskqueue_start_threads(&sc->hn_tx_taskqs[i], 1, PI_NET, 2125 "%s tx%d", device_get_nameunit(dev), i); 2126 } 2127 } else if (hn_tx_taskq_mode == HN_TX_TASKQ_M_GLOBAL) { 2128 sc->hn_tx_taskqs = hn_tx_taskque; 2129 } 2130 2131 /* 2132 * Setup taskqueue for mangement tasks, e.g. link status. 2133 */ 2134 sc->hn_mgmt_taskq0 = taskqueue_create("hn_mgmt", M_WAITOK, 2135 taskqueue_thread_enqueue, &sc->hn_mgmt_taskq0); 2136 taskqueue_start_threads(&sc->hn_mgmt_taskq0, 1, PI_NET, "%s mgmt", 2137 device_get_nameunit(dev)); 2138 TASK_INIT(&sc->hn_link_task, 0, hn_link_taskfunc, sc); 2139 TASK_INIT(&sc->hn_netchg_init, 0, hn_netchg_init_taskfunc, sc); 2140 TIMEOUT_TASK_INIT(sc->hn_mgmt_taskq0, &sc->hn_netchg_status, 0, 2141 hn_netchg_status_taskfunc, sc); 2142 2143 if (hn_xpnt_vf) { 2144 /* 2145 * Setup taskqueue for VF tasks, e.g. delayed VF bringing up. 2146 */ 2147 sc->hn_vf_taskq = taskqueue_create("hn_vf", M_WAITOK, 2148 taskqueue_thread_enqueue, &sc->hn_vf_taskq); 2149 taskqueue_start_threads(&sc->hn_vf_taskq, 1, PI_NET, "%s vf", 2150 device_get_nameunit(dev)); 2151 TIMEOUT_TASK_INIT(sc->hn_vf_taskq, &sc->hn_vf_init, 0, 2152 hn_xpnt_vf_init_taskfunc, sc); 2153 } 2154 2155 /* 2156 * Allocate ifnet and setup its name earlier, so that if_printf 2157 * can be used by functions, which will be called after 2158 * ether_ifattach(). 2159 */ 2160 ifp = sc->hn_ifp = if_alloc(IFT_ETHER); 2161 ifp->if_softc = sc; 2162 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 2163 2164 /* 2165 * Initialize ifmedia earlier so that it can be unconditionally 2166 * destroyed, if error happened later on. 2167 */ 2168 ifmedia_init(&sc->hn_media, 0, hn_ifmedia_upd, hn_ifmedia_sts); 2169 2170 /* 2171 * Figure out the # of RX rings (ring_cnt) and the # of TX rings 2172 * to use (tx_ring_cnt). 2173 * 2174 * NOTE: 2175 * The # of RX rings to use is same as the # of channels to use. 2176 */ 2177 ring_cnt = hn_chan_cnt; 2178 if (ring_cnt <= 0) { 2179 /* Default */ 2180 ring_cnt = mp_ncpus; 2181 if (ring_cnt > HN_RING_CNT_DEF_MAX) 2182 ring_cnt = HN_RING_CNT_DEF_MAX; 2183 } else if (ring_cnt > mp_ncpus) { 2184 ring_cnt = mp_ncpus; 2185 } 2186 #ifdef RSS 2187 if (ring_cnt > rss_getnumbuckets()) 2188 ring_cnt = rss_getnumbuckets(); 2189 #endif 2190 2191 tx_ring_cnt = hn_tx_ring_cnt; 2192 if (tx_ring_cnt <= 0 || tx_ring_cnt > ring_cnt) 2193 tx_ring_cnt = ring_cnt; 2194 #ifdef HN_IFSTART_SUPPORT 2195 if (hn_use_if_start) { 2196 /* ifnet.if_start only needs one TX ring. */ 2197 tx_ring_cnt = 1; 2198 } 2199 #endif 2200 2201 /* 2202 * Set the leader CPU for channels. 2203 */ 2204 sc->hn_cpu = atomic_fetchadd_int(&hn_cpu_index, ring_cnt) % mp_ncpus; 2205 2206 /* 2207 * Create enough TX/RX rings, even if only limited number of 2208 * channels can be allocated. 2209 */ 2210 error = hn_create_tx_data(sc, tx_ring_cnt); 2211 if (error) 2212 goto failed; 2213 error = hn_create_rx_data(sc, ring_cnt); 2214 if (error) 2215 goto failed; 2216 2217 /* 2218 * Create transaction context for NVS and RNDIS transactions. 2219 */ 2220 sc->hn_xact = vmbus_xact_ctx_create(bus_get_dma_tag(dev), 2221 HN_XACT_REQ_SIZE, HN_XACT_RESP_SIZE, 0); 2222 if (sc->hn_xact == NULL) { 2223 error = ENXIO; 2224 goto failed; 2225 } 2226 2227 /* 2228 * Install orphan handler for the revocation of this device's 2229 * primary channel. 2230 * 2231 * NOTE: 2232 * The processing order is critical here: 2233 * Install the orphan handler, _before_ testing whether this 2234 * device's primary channel has been revoked or not. 2235 */ 2236 vmbus_chan_set_orphan(sc->hn_prichan, sc->hn_xact); 2237 if (vmbus_chan_is_revoked(sc->hn_prichan)) { 2238 error = ENXIO; 2239 goto failed; 2240 } 2241 2242 /* 2243 * Attach the synthetic parts, i.e. NVS and RNDIS. 2244 */ 2245 error = hn_synth_attach(sc, ETHERMTU); 2246 if (error) 2247 goto failed; 2248 2249 error = hn_rndis_get_eaddr(sc, eaddr); 2250 if (error) 2251 goto failed; 2252 2253 error = hn_rndis_get_mtu(sc, &mtu); 2254 if (error) 2255 mtu = ETHERMTU; 2256 else if (bootverbose) 2257 device_printf(dev, "RNDIS mtu %u\n", mtu); 2258 2259 #if __FreeBSD_version >= 1100099 2260 if (sc->hn_rx_ring_inuse > 1) { 2261 /* 2262 * Reduce TCP segment aggregation limit for multiple 2263 * RX rings to increase ACK timeliness. 2264 */ 2265 hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MULTIRX_DEF); 2266 } 2267 #endif 2268 2269 /* 2270 * Fixup TX/RX stuffs after synthetic parts are attached. 2271 */ 2272 hn_fixup_tx_data(sc); 2273 hn_fixup_rx_data(sc); 2274 2275 ctx = device_get_sysctl_ctx(dev); 2276 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); 2277 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "nvs_version", CTLFLAG_RD, 2278 &sc->hn_nvs_ver, 0, "NVS version"); 2279 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "ndis_version", 2280 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2281 hn_ndis_version_sysctl, "A", "NDIS version"); 2282 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "caps", 2283 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2284 hn_caps_sysctl, "A", "capabilities"); 2285 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "hwassist", 2286 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2287 hn_hwassist_sysctl, "A", "hwassist"); 2288 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tso_max", 2289 CTLFLAG_RD, &ifp->if_hw_tsomax, 0, "max TSO size"); 2290 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tso_maxsegcnt", 2291 CTLFLAG_RD, &ifp->if_hw_tsomaxsegcount, 0, 2292 "max # of TSO segments"); 2293 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tso_maxsegsz", 2294 CTLFLAG_RD, &ifp->if_hw_tsomaxsegsize, 0, 2295 "max size of TSO segment"); 2296 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxfilter", 2297 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2298 hn_rxfilter_sysctl, "A", "rxfilter"); 2299 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_hash", 2300 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2301 hn_rss_hash_sysctl, "A", "RSS hash"); 2302 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_hashcap", 2303 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2304 hn_rss_hcap_sysctl, "A", "RSS hash capabilities"); 2305 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "mbuf_hash", 2306 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2307 hn_rss_mbuf_sysctl, "A", "RSS hash for mbufs"); 2308 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rss_ind_size", 2309 CTLFLAG_RD, &sc->hn_rss_ind_size, 0, "RSS indirect entry count"); 2310 #ifndef RSS 2311 /* 2312 * Don't allow RSS key/indirect table changes, if RSS is defined. 2313 */ 2314 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_key", 2315 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 2316 hn_rss_key_sysctl, "IU", "RSS key"); 2317 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_ind", 2318 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 2319 hn_rss_ind_sysctl, "IU", "RSS indirect table"); 2320 #endif 2321 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_size", 2322 CTLFLAG_RD, &sc->hn_rndis_agg_size, 0, 2323 "RNDIS offered packet transmission aggregation size limit"); 2324 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_pkts", 2325 CTLFLAG_RD, &sc->hn_rndis_agg_pkts, 0, 2326 "RNDIS offered packet transmission aggregation count limit"); 2327 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_align", 2328 CTLFLAG_RD, &sc->hn_rndis_agg_align, 0, 2329 "RNDIS packet transmission aggregation alignment"); 2330 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_size", 2331 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 2332 hn_txagg_size_sysctl, "I", 2333 "Packet transmission aggregation size, 0 -- disable, -1 -- auto"); 2334 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pkts", 2335 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 2336 hn_txagg_pkts_sysctl, "I", 2337 "Packet transmission aggregation packets, " 2338 "0 -- disable, -1 -- auto"); 2339 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "polling", 2340 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 2341 hn_polling_sysctl, "I", 2342 "Polling frequency: [100,1000000], 0 disable polling"); 2343 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf", 2344 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2345 hn_vf_sysctl, "A", "Virtual Function's name"); 2346 if (!hn_xpnt_vf) { 2347 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxvf", 2348 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2349 hn_rxvf_sysctl, "A", "activated Virtual Function's name"); 2350 } else { 2351 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf_xpnt_enabled", 2352 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2353 hn_xpnt_vf_enabled_sysctl, "I", 2354 "Transparent VF enabled"); 2355 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf_xpnt_accbpf", 2356 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 2357 hn_xpnt_vf_accbpf_sysctl, "I", 2358 "Accurate BPF for transparent VF"); 2359 } 2360 2361 /* 2362 * Setup the ifmedia, which has been initialized earlier. 2363 */ 2364 ifmedia_add(&sc->hn_media, IFM_ETHER | IFM_AUTO, 0, NULL); 2365 ifmedia_set(&sc->hn_media, IFM_ETHER | IFM_AUTO); 2366 /* XXX ifmedia_set really should do this for us */ 2367 sc->hn_media.ifm_media = sc->hn_media.ifm_cur->ifm_media; 2368 2369 /* 2370 * Setup the ifnet for this interface. 2371 */ 2372 2373 ifp->if_baudrate = IF_Gbps(10); 2374 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 2375 ifp->if_ioctl = hn_ioctl; 2376 ifp->if_init = hn_init; 2377 #ifdef HN_IFSTART_SUPPORT 2378 if (hn_use_if_start) { 2379 int qdepth = hn_get_txswq_depth(&sc->hn_tx_ring[0]); 2380 2381 ifp->if_start = hn_start; 2382 IFQ_SET_MAXLEN(&ifp->if_snd, qdepth); 2383 ifp->if_snd.ifq_drv_maxlen = qdepth - 1; 2384 IFQ_SET_READY(&ifp->if_snd); 2385 } else 2386 #endif 2387 { 2388 ifp->if_transmit = hn_transmit; 2389 ifp->if_qflush = hn_xmit_qflush; 2390 } 2391 2392 ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_LRO | IFCAP_LINKSTATE; 2393 #ifdef foo 2394 /* We can't diff IPv6 packets from IPv4 packets on RX path. */ 2395 ifp->if_capabilities |= IFCAP_RXCSUM_IPV6; 2396 #endif 2397 if (sc->hn_caps & HN_CAP_VLAN) { 2398 /* XXX not sure about VLAN_MTU. */ 2399 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU; 2400 } 2401 2402 ifp->if_hwassist = sc->hn_tx_ring[0].hn_csum_assist; 2403 if (ifp->if_hwassist & HN_CSUM_IP_MASK) 2404 ifp->if_capabilities |= IFCAP_TXCSUM; 2405 if (ifp->if_hwassist & HN_CSUM_IP6_MASK) 2406 ifp->if_capabilities |= IFCAP_TXCSUM_IPV6; 2407 if (sc->hn_caps & HN_CAP_TSO4) { 2408 ifp->if_capabilities |= IFCAP_TSO4; 2409 ifp->if_hwassist |= CSUM_IP_TSO; 2410 } 2411 if (sc->hn_caps & HN_CAP_TSO6) { 2412 ifp->if_capabilities |= IFCAP_TSO6; 2413 ifp->if_hwassist |= CSUM_IP6_TSO; 2414 } 2415 2416 /* Enable all available capabilities by default. */ 2417 ifp->if_capenable = ifp->if_capabilities; 2418 2419 /* 2420 * Disable IPv6 TSO and TXCSUM by default, they still can 2421 * be enabled through SIOCSIFCAP. 2422 */ 2423 ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6 | IFCAP_TSO6); 2424 ifp->if_hwassist &= ~(HN_CSUM_IP6_MASK | CSUM_IP6_TSO); 2425 2426 if (ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) { 2427 /* 2428 * Lock hn_set_tso_maxsize() to simplify its 2429 * internal logic. 2430 */ 2431 HN_LOCK(sc); 2432 hn_set_tso_maxsize(sc, hn_tso_maxlen, ETHERMTU); 2433 HN_UNLOCK(sc); 2434 ifp->if_hw_tsomaxsegcount = HN_TX_DATA_SEGCNT_MAX; 2435 ifp->if_hw_tsomaxsegsize = PAGE_SIZE; 2436 } 2437 2438 ether_ifattach(ifp, eaddr); 2439 2440 if ((ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) && bootverbose) { 2441 if_printf(ifp, "TSO segcnt %u segsz %u\n", 2442 ifp->if_hw_tsomaxsegcount, ifp->if_hw_tsomaxsegsize); 2443 } 2444 if (mtu < ETHERMTU) { 2445 if_printf(ifp, "fixup mtu %u -> %u\n", ifp->if_mtu, mtu); 2446 ifp->if_mtu = mtu; 2447 } 2448 2449 /* Inform the upper layer about the long frame support. */ 2450 ifp->if_hdrlen = sizeof(struct ether_vlan_header); 2451 2452 /* 2453 * Kick off link status check. 2454 */ 2455 sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0; 2456 hn_update_link_status(sc); 2457 2458 if (!hn_xpnt_vf) { 2459 sc->hn_ifnet_evthand = EVENTHANDLER_REGISTER(ifnet_event, 2460 hn_ifnet_event, sc, EVENTHANDLER_PRI_ANY); 2461 sc->hn_ifaddr_evthand = EVENTHANDLER_REGISTER(ifaddr_event, 2462 hn_ifaddr_event, sc, EVENTHANDLER_PRI_ANY); 2463 } else { 2464 sc->hn_ifnet_lnkhand = EVENTHANDLER_REGISTER(ifnet_link_event, 2465 hn_ifnet_lnkevent, sc, EVENTHANDLER_PRI_ANY); 2466 } 2467 2468 /* 2469 * NOTE: 2470 * Subscribe ether_ifattach event, instead of ifnet_arrival event, 2471 * since interface's LLADDR is needed; interface LLADDR is not 2472 * available when ifnet_arrival event is triggered. 2473 */ 2474 sc->hn_ifnet_atthand = EVENTHANDLER_REGISTER(ether_ifattach_event, 2475 hn_ifnet_attevent, sc, EVENTHANDLER_PRI_ANY); 2476 sc->hn_ifnet_dethand = EVENTHANDLER_REGISTER(ifnet_departure_event, 2477 hn_ifnet_detevent, sc, EVENTHANDLER_PRI_ANY); 2478 2479 return (0); 2480 failed: 2481 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) 2482 hn_synth_detach(sc); 2483 hn_detach(dev); 2484 return (error); 2485 } 2486 2487 static int 2488 hn_detach(device_t dev) 2489 { 2490 struct hn_softc *sc = device_get_softc(dev); 2491 struct ifnet *ifp = sc->hn_ifp, *vf_ifp; 2492 2493 if (sc->hn_xact != NULL && vmbus_chan_is_revoked(sc->hn_prichan)) { 2494 /* 2495 * In case that the vmbus missed the orphan handler 2496 * installation. 2497 */ 2498 vmbus_xact_ctx_orphan(sc->hn_xact); 2499 } 2500 2501 if (sc->hn_ifaddr_evthand != NULL) 2502 EVENTHANDLER_DEREGISTER(ifaddr_event, sc->hn_ifaddr_evthand); 2503 if (sc->hn_ifnet_evthand != NULL) 2504 EVENTHANDLER_DEREGISTER(ifnet_event, sc->hn_ifnet_evthand); 2505 if (sc->hn_ifnet_atthand != NULL) { 2506 EVENTHANDLER_DEREGISTER(ether_ifattach_event, 2507 sc->hn_ifnet_atthand); 2508 } 2509 if (sc->hn_ifnet_dethand != NULL) { 2510 EVENTHANDLER_DEREGISTER(ifnet_departure_event, 2511 sc->hn_ifnet_dethand); 2512 } 2513 if (sc->hn_ifnet_lnkhand != NULL) 2514 EVENTHANDLER_DEREGISTER(ifnet_link_event, sc->hn_ifnet_lnkhand); 2515 2516 vf_ifp = sc->hn_vf_ifp; 2517 __compiler_membar(); 2518 if (vf_ifp != NULL) 2519 hn_ifnet_detevent(sc, vf_ifp); 2520 2521 if (device_is_attached(dev)) { 2522 HN_LOCK(sc); 2523 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) { 2524 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 2525 hn_stop(sc, true); 2526 /* 2527 * NOTE: 2528 * hn_stop() only suspends data, so managment 2529 * stuffs have to be suspended manually here. 2530 */ 2531 hn_suspend_mgmt(sc); 2532 hn_synth_detach(sc); 2533 } 2534 HN_UNLOCK(sc); 2535 ether_ifdetach(ifp); 2536 } 2537 2538 ifmedia_removeall(&sc->hn_media); 2539 hn_destroy_rx_data(sc); 2540 hn_destroy_tx_data(sc); 2541 2542 if (sc->hn_tx_taskqs != NULL && sc->hn_tx_taskqs != hn_tx_taskque) { 2543 int i; 2544 2545 for (i = 0; i < hn_tx_taskq_cnt; ++i) 2546 taskqueue_free(sc->hn_tx_taskqs[i]); 2547 free(sc->hn_tx_taskqs, M_DEVBUF); 2548 } 2549 taskqueue_free(sc->hn_mgmt_taskq0); 2550 if (sc->hn_vf_taskq != NULL) 2551 taskqueue_free(sc->hn_vf_taskq); 2552 2553 if (sc->hn_xact != NULL) { 2554 /* 2555 * Uninstall the orphan handler _before_ the xact is 2556 * destructed. 2557 */ 2558 vmbus_chan_unset_orphan(sc->hn_prichan); 2559 vmbus_xact_ctx_destroy(sc->hn_xact); 2560 } 2561 2562 if_free(ifp); 2563 2564 HN_LOCK_DESTROY(sc); 2565 rm_destroy(&sc->hn_vf_lock); 2566 return (0); 2567 } 2568 2569 static int 2570 hn_shutdown(device_t dev) 2571 { 2572 2573 return (0); 2574 } 2575 2576 static void 2577 hn_link_status(struct hn_softc *sc) 2578 { 2579 uint32_t link_status; 2580 int error; 2581 2582 error = hn_rndis_get_linkstatus(sc, &link_status); 2583 if (error) { 2584 /* XXX what to do? */ 2585 return; 2586 } 2587 2588 if (link_status == NDIS_MEDIA_STATE_CONNECTED) 2589 sc->hn_link_flags |= HN_LINK_FLAG_LINKUP; 2590 else 2591 sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP; 2592 if_link_state_change(sc->hn_ifp, 2593 (sc->hn_link_flags & HN_LINK_FLAG_LINKUP) ? 2594 LINK_STATE_UP : LINK_STATE_DOWN); 2595 } 2596 2597 static void 2598 hn_link_taskfunc(void *xsc, int pending __unused) 2599 { 2600 struct hn_softc *sc = xsc; 2601 2602 if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG) 2603 return; 2604 hn_link_status(sc); 2605 } 2606 2607 static void 2608 hn_netchg_init_taskfunc(void *xsc, int pending __unused) 2609 { 2610 struct hn_softc *sc = xsc; 2611 2612 /* Prevent any link status checks from running. */ 2613 sc->hn_link_flags |= HN_LINK_FLAG_NETCHG; 2614 2615 /* 2616 * Fake up a [link down --> link up] state change; 5 seconds 2617 * delay is used, which closely simulates miibus reaction 2618 * upon link down event. 2619 */ 2620 sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP; 2621 if_link_state_change(sc->hn_ifp, LINK_STATE_DOWN); 2622 taskqueue_enqueue_timeout(sc->hn_mgmt_taskq0, 2623 &sc->hn_netchg_status, 5 * hz); 2624 } 2625 2626 static void 2627 hn_netchg_status_taskfunc(void *xsc, int pending __unused) 2628 { 2629 struct hn_softc *sc = xsc; 2630 2631 /* Re-allow link status checks. */ 2632 sc->hn_link_flags &= ~HN_LINK_FLAG_NETCHG; 2633 hn_link_status(sc); 2634 } 2635 2636 static void 2637 hn_update_link_status(struct hn_softc *sc) 2638 { 2639 2640 if (sc->hn_mgmt_taskq != NULL) 2641 taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_link_task); 2642 } 2643 2644 static void 2645 hn_change_network(struct hn_softc *sc) 2646 { 2647 2648 if (sc->hn_mgmt_taskq != NULL) 2649 taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_netchg_init); 2650 } 2651 2652 static __inline int 2653 hn_txdesc_dmamap_load(struct hn_tx_ring *txr, struct hn_txdesc *txd, 2654 struct mbuf **m_head, bus_dma_segment_t *segs, int *nsegs) 2655 { 2656 struct mbuf *m = *m_head; 2657 int error; 2658 2659 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID, ("txd uses chim")); 2660 2661 error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap, 2662 m, segs, nsegs, BUS_DMA_NOWAIT); 2663 if (error == EFBIG) { 2664 struct mbuf *m_new; 2665 2666 m_new = m_collapse(m, M_NOWAIT, HN_TX_DATA_SEGCNT_MAX); 2667 if (m_new == NULL) 2668 return ENOBUFS; 2669 else 2670 *m_head = m = m_new; 2671 txr->hn_tx_collapsed++; 2672 2673 error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, 2674 txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT); 2675 } 2676 if (!error) { 2677 bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap, 2678 BUS_DMASYNC_PREWRITE); 2679 txd->flags |= HN_TXD_FLAG_DMAMAP; 2680 } 2681 return error; 2682 } 2683 2684 static __inline int 2685 hn_txdesc_put(struct hn_tx_ring *txr, struct hn_txdesc *txd) 2686 { 2687 2688 KASSERT((txd->flags & HN_TXD_FLAG_ONLIST) == 0, 2689 ("put an onlist txd %#x", txd->flags)); 2690 KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0, 2691 ("put an onagg txd %#x", txd->flags)); 2692 2693 KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs)); 2694 if (atomic_fetchadd_int(&txd->refs, -1) != 1) 2695 return 0; 2696 2697 if (!STAILQ_EMPTY(&txd->agg_list)) { 2698 struct hn_txdesc *tmp_txd; 2699 2700 while ((tmp_txd = STAILQ_FIRST(&txd->agg_list)) != NULL) { 2701 int freed; 2702 2703 KASSERT(STAILQ_EMPTY(&tmp_txd->agg_list), 2704 ("resursive aggregation on aggregated txdesc")); 2705 KASSERT((tmp_txd->flags & HN_TXD_FLAG_ONAGG), 2706 ("not aggregated txdesc")); 2707 KASSERT((tmp_txd->flags & HN_TXD_FLAG_DMAMAP) == 0, 2708 ("aggregated txdesc uses dmamap")); 2709 KASSERT(tmp_txd->chim_index == HN_NVS_CHIM_IDX_INVALID, 2710 ("aggregated txdesc consumes " 2711 "chimney sending buffer")); 2712 KASSERT(tmp_txd->chim_size == 0, 2713 ("aggregated txdesc has non-zero " 2714 "chimney sending size")); 2715 2716 STAILQ_REMOVE_HEAD(&txd->agg_list, agg_link); 2717 tmp_txd->flags &= ~HN_TXD_FLAG_ONAGG; 2718 freed = hn_txdesc_put(txr, tmp_txd); 2719 KASSERT(freed, ("failed to free aggregated txdesc")); 2720 } 2721 } 2722 2723 if (txd->chim_index != HN_NVS_CHIM_IDX_INVALID) { 2724 KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, 2725 ("chim txd uses dmamap")); 2726 hn_chim_free(txr->hn_sc, txd->chim_index); 2727 txd->chim_index = HN_NVS_CHIM_IDX_INVALID; 2728 txd->chim_size = 0; 2729 } else if (txd->flags & HN_TXD_FLAG_DMAMAP) { 2730 bus_dmamap_sync(txr->hn_tx_data_dtag, 2731 txd->data_dmap, BUS_DMASYNC_POSTWRITE); 2732 bus_dmamap_unload(txr->hn_tx_data_dtag, 2733 txd->data_dmap); 2734 txd->flags &= ~HN_TXD_FLAG_DMAMAP; 2735 } 2736 2737 if (txd->m != NULL) { 2738 m_freem(txd->m); 2739 txd->m = NULL; 2740 } 2741 2742 txd->flags |= HN_TXD_FLAG_ONLIST; 2743 #ifndef HN_USE_TXDESC_BUFRING 2744 mtx_lock_spin(&txr->hn_txlist_spin); 2745 KASSERT(txr->hn_txdesc_avail >= 0 && 2746 txr->hn_txdesc_avail < txr->hn_txdesc_cnt, 2747 ("txdesc_put: invalid txd avail %d", txr->hn_txdesc_avail)); 2748 txr->hn_txdesc_avail++; 2749 SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link); 2750 mtx_unlock_spin(&txr->hn_txlist_spin); 2751 #else /* HN_USE_TXDESC_BUFRING */ 2752 #ifdef HN_DEBUG 2753 atomic_add_int(&txr->hn_txdesc_avail, 1); 2754 #endif 2755 buf_ring_enqueue(txr->hn_txdesc_br, txd); 2756 #endif /* !HN_USE_TXDESC_BUFRING */ 2757 2758 return 1; 2759 } 2760 2761 static __inline struct hn_txdesc * 2762 hn_txdesc_get(struct hn_tx_ring *txr) 2763 { 2764 struct hn_txdesc *txd; 2765 2766 #ifndef HN_USE_TXDESC_BUFRING 2767 mtx_lock_spin(&txr->hn_txlist_spin); 2768 txd = SLIST_FIRST(&txr->hn_txlist); 2769 if (txd != NULL) { 2770 KASSERT(txr->hn_txdesc_avail > 0, 2771 ("txdesc_get: invalid txd avail %d", txr->hn_txdesc_avail)); 2772 txr->hn_txdesc_avail--; 2773 SLIST_REMOVE_HEAD(&txr->hn_txlist, link); 2774 } 2775 mtx_unlock_spin(&txr->hn_txlist_spin); 2776 #else 2777 txd = buf_ring_dequeue_sc(txr->hn_txdesc_br); 2778 #endif 2779 2780 if (txd != NULL) { 2781 #ifdef HN_USE_TXDESC_BUFRING 2782 #ifdef HN_DEBUG 2783 atomic_subtract_int(&txr->hn_txdesc_avail, 1); 2784 #endif 2785 #endif /* HN_USE_TXDESC_BUFRING */ 2786 KASSERT(txd->m == NULL && txd->refs == 0 && 2787 STAILQ_EMPTY(&txd->agg_list) && 2788 txd->chim_index == HN_NVS_CHIM_IDX_INVALID && 2789 txd->chim_size == 0 && 2790 (txd->flags & HN_TXD_FLAG_ONLIST) && 2791 (txd->flags & HN_TXD_FLAG_ONAGG) == 0 && 2792 (txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("invalid txd")); 2793 txd->flags &= ~HN_TXD_FLAG_ONLIST; 2794 txd->refs = 1; 2795 } 2796 return txd; 2797 } 2798 2799 static __inline void 2800 hn_txdesc_hold(struct hn_txdesc *txd) 2801 { 2802 2803 /* 0->1 transition will never work */ 2804 KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs)); 2805 atomic_add_int(&txd->refs, 1); 2806 } 2807 2808 static __inline void 2809 hn_txdesc_agg(struct hn_txdesc *agg_txd, struct hn_txdesc *txd) 2810 { 2811 2812 KASSERT((agg_txd->flags & HN_TXD_FLAG_ONAGG) == 0, 2813 ("recursive aggregation on aggregating txdesc")); 2814 2815 KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0, 2816 ("already aggregated")); 2817 KASSERT(STAILQ_EMPTY(&txd->agg_list), 2818 ("recursive aggregation on to-be-aggregated txdesc")); 2819 2820 txd->flags |= HN_TXD_FLAG_ONAGG; 2821 STAILQ_INSERT_TAIL(&agg_txd->agg_list, txd, agg_link); 2822 } 2823 2824 static bool 2825 hn_tx_ring_pending(struct hn_tx_ring *txr) 2826 { 2827 bool pending = false; 2828 2829 #ifndef HN_USE_TXDESC_BUFRING 2830 mtx_lock_spin(&txr->hn_txlist_spin); 2831 if (txr->hn_txdesc_avail != txr->hn_txdesc_cnt) 2832 pending = true; 2833 mtx_unlock_spin(&txr->hn_txlist_spin); 2834 #else 2835 if (!buf_ring_full(txr->hn_txdesc_br)) 2836 pending = true; 2837 #endif 2838 return (pending); 2839 } 2840 2841 static __inline void 2842 hn_txeof(struct hn_tx_ring *txr) 2843 { 2844 txr->hn_has_txeof = 0; 2845 txr->hn_txeof(txr); 2846 } 2847 2848 static void 2849 hn_txpkt_done(struct hn_nvs_sendctx *sndc, struct hn_softc *sc, 2850 struct vmbus_channel *chan, const void *data __unused, int dlen __unused) 2851 { 2852 struct hn_txdesc *txd = sndc->hn_cbarg; 2853 struct hn_tx_ring *txr; 2854 2855 txr = txd->txr; 2856 KASSERT(txr->hn_chan == chan, 2857 ("channel mismatch, on chan%u, should be chan%u", 2858 vmbus_chan_id(chan), vmbus_chan_id(txr->hn_chan))); 2859 2860 txr->hn_has_txeof = 1; 2861 hn_txdesc_put(txr, txd); 2862 2863 ++txr->hn_txdone_cnt; 2864 if (txr->hn_txdone_cnt >= HN_EARLY_TXEOF_THRESH) { 2865 txr->hn_txdone_cnt = 0; 2866 if (txr->hn_oactive) 2867 hn_txeof(txr); 2868 } 2869 } 2870 2871 static void 2872 hn_chan_rollup(struct hn_rx_ring *rxr, struct hn_tx_ring *txr) 2873 { 2874 #if defined(INET) || defined(INET6) 2875 tcp_lro_flush_all(&rxr->hn_lro); 2876 #endif 2877 2878 /* 2879 * NOTE: 2880 * 'txr' could be NULL, if multiple channels and 2881 * ifnet.if_start method are enabled. 2882 */ 2883 if (txr == NULL || !txr->hn_has_txeof) 2884 return; 2885 2886 txr->hn_txdone_cnt = 0; 2887 hn_txeof(txr); 2888 } 2889 2890 static __inline uint32_t 2891 hn_rndis_pktmsg_offset(uint32_t ofs) 2892 { 2893 2894 KASSERT(ofs >= sizeof(struct rndis_packet_msg), 2895 ("invalid RNDIS packet msg offset %u", ofs)); 2896 return (ofs - __offsetof(struct rndis_packet_msg, rm_dataoffset)); 2897 } 2898 2899 static __inline void * 2900 hn_rndis_pktinfo_append(struct rndis_packet_msg *pkt, size_t pktsize, 2901 size_t pi_dlen, uint32_t pi_type) 2902 { 2903 const size_t pi_size = HN_RNDIS_PKTINFO_SIZE(pi_dlen); 2904 struct rndis_pktinfo *pi; 2905 2906 KASSERT((pi_size & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK) == 0, 2907 ("unaligned pktinfo size %zu, pktinfo dlen %zu", pi_size, pi_dlen)); 2908 2909 /* 2910 * Per-packet-info does not move; it only grows. 2911 * 2912 * NOTE: 2913 * rm_pktinfooffset in this phase counts from the beginning 2914 * of rndis_packet_msg. 2915 */ 2916 KASSERT(pkt->rm_pktinfooffset + pkt->rm_pktinfolen + pi_size <= pktsize, 2917 ("%u pktinfo overflows RNDIS packet msg", pi_type)); 2918 pi = (struct rndis_pktinfo *)((uint8_t *)pkt + pkt->rm_pktinfooffset + 2919 pkt->rm_pktinfolen); 2920 pkt->rm_pktinfolen += pi_size; 2921 2922 pi->rm_size = pi_size; 2923 pi->rm_type = pi_type; 2924 pi->rm_pktinfooffset = RNDIS_PKTINFO_OFFSET; 2925 2926 return (pi->rm_data); 2927 } 2928 2929 static __inline int 2930 hn_flush_txagg(struct ifnet *ifp, struct hn_tx_ring *txr) 2931 { 2932 struct hn_txdesc *txd; 2933 struct mbuf *m; 2934 int error, pkts; 2935 2936 txd = txr->hn_agg_txd; 2937 KASSERT(txd != NULL, ("no aggregate txdesc")); 2938 2939 /* 2940 * Since hn_txpkt() will reset this temporary stat, save 2941 * it now, so that oerrors can be updated properly, if 2942 * hn_txpkt() ever fails. 2943 */ 2944 pkts = txr->hn_stat_pkts; 2945 2946 /* 2947 * Since txd's mbuf will _not_ be freed upon hn_txpkt() 2948 * failure, save it for later freeing, if hn_txpkt() ever 2949 * fails. 2950 */ 2951 m = txd->m; 2952 error = hn_txpkt(ifp, txr, txd); 2953 if (__predict_false(error)) { 2954 /* txd is freed, but m is not. */ 2955 m_freem(m); 2956 2957 txr->hn_flush_failed++; 2958 if_inc_counter(ifp, IFCOUNTER_OERRORS, pkts); 2959 } 2960 2961 /* Reset all aggregation states. */ 2962 txr->hn_agg_txd = NULL; 2963 txr->hn_agg_szleft = 0; 2964 txr->hn_agg_pktleft = 0; 2965 txr->hn_agg_prevpkt = NULL; 2966 2967 return (error); 2968 } 2969 2970 static void * 2971 hn_try_txagg(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd, 2972 int pktsize) 2973 { 2974 void *chim; 2975 2976 if (txr->hn_agg_txd != NULL) { 2977 if (txr->hn_agg_pktleft >= 1 && txr->hn_agg_szleft > pktsize) { 2978 struct hn_txdesc *agg_txd = txr->hn_agg_txd; 2979 struct rndis_packet_msg *pkt = txr->hn_agg_prevpkt; 2980 int olen; 2981 2982 /* 2983 * Update the previous RNDIS packet's total length, 2984 * it can be increased due to the mandatory alignment 2985 * padding for this RNDIS packet. And update the 2986 * aggregating txdesc's chimney sending buffer size 2987 * accordingly. 2988 * 2989 * XXX 2990 * Zero-out the padding, as required by the RNDIS spec. 2991 */ 2992 olen = pkt->rm_len; 2993 pkt->rm_len = roundup2(olen, txr->hn_agg_align); 2994 agg_txd->chim_size += pkt->rm_len - olen; 2995 2996 /* Link this txdesc to the parent. */ 2997 hn_txdesc_agg(agg_txd, txd); 2998 2999 chim = (uint8_t *)pkt + pkt->rm_len; 3000 /* Save the current packet for later fixup. */ 3001 txr->hn_agg_prevpkt = chim; 3002 3003 txr->hn_agg_pktleft--; 3004 txr->hn_agg_szleft -= pktsize; 3005 if (txr->hn_agg_szleft <= 3006 HN_PKTSIZE_MIN(txr->hn_agg_align)) { 3007 /* 3008 * Probably can't aggregate more packets, 3009 * flush this aggregating txdesc proactively. 3010 */ 3011 txr->hn_agg_pktleft = 0; 3012 } 3013 /* Done! */ 3014 return (chim); 3015 } 3016 hn_flush_txagg(ifp, txr); 3017 } 3018 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc")); 3019 3020 txr->hn_tx_chimney_tried++; 3021 txd->chim_index = hn_chim_alloc(txr->hn_sc); 3022 if (txd->chim_index == HN_NVS_CHIM_IDX_INVALID) 3023 return (NULL); 3024 txr->hn_tx_chimney++; 3025 3026 chim = txr->hn_sc->hn_chim + 3027 (txd->chim_index * txr->hn_sc->hn_chim_szmax); 3028 3029 if (txr->hn_agg_pktmax > 1 && 3030 txr->hn_agg_szmax > pktsize + HN_PKTSIZE_MIN(txr->hn_agg_align)) { 3031 txr->hn_agg_txd = txd; 3032 txr->hn_agg_pktleft = txr->hn_agg_pktmax - 1; 3033 txr->hn_agg_szleft = txr->hn_agg_szmax - pktsize; 3034 txr->hn_agg_prevpkt = chim; 3035 } 3036 return (chim); 3037 } 3038 3039 /* 3040 * NOTE: 3041 * If this function fails, then both txd and m_head0 will be freed. 3042 */ 3043 static int 3044 hn_encap(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd, 3045 struct mbuf **m_head0) 3046 { 3047 bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX]; 3048 int error, nsegs, i; 3049 struct mbuf *m_head = *m_head0; 3050 struct rndis_packet_msg *pkt; 3051 uint32_t *pi_data; 3052 void *chim = NULL; 3053 int pkt_hlen, pkt_size; 3054 3055 pkt = txd->rndis_pkt; 3056 pkt_size = HN_PKTSIZE(m_head, txr->hn_agg_align); 3057 if (pkt_size < txr->hn_chim_size) { 3058 chim = hn_try_txagg(ifp, txr, txd, pkt_size); 3059 if (chim != NULL) 3060 pkt = chim; 3061 } else { 3062 if (txr->hn_agg_txd != NULL) 3063 hn_flush_txagg(ifp, txr); 3064 } 3065 3066 pkt->rm_type = REMOTE_NDIS_PACKET_MSG; 3067 pkt->rm_len = m_head->m_pkthdr.len; 3068 pkt->rm_dataoffset = 0; 3069 pkt->rm_datalen = m_head->m_pkthdr.len; 3070 pkt->rm_oobdataoffset = 0; 3071 pkt->rm_oobdatalen = 0; 3072 pkt->rm_oobdataelements = 0; 3073 pkt->rm_pktinfooffset = sizeof(*pkt); 3074 pkt->rm_pktinfolen = 0; 3075 pkt->rm_vchandle = 0; 3076 pkt->rm_reserved = 0; 3077 3078 if (txr->hn_tx_flags & HN_TX_FLAG_HASHVAL) { 3079 /* 3080 * Set the hash value for this packet. 3081 */ 3082 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 3083 HN_NDIS_HASH_VALUE_SIZE, HN_NDIS_PKTINFO_TYPE_HASHVAL); 3084 3085 if (M_HASHTYPE_ISHASH(m_head)) 3086 /* 3087 * The flowid field contains the hash value host 3088 * set in the rx queue if it is a ip forwarding pkt. 3089 * Set the same hash value so host can send on the 3090 * cpu it was received. 3091 */ 3092 *pi_data = m_head->m_pkthdr.flowid; 3093 else 3094 /* 3095 * Otherwise just put the tx queue index. 3096 */ 3097 *pi_data = txr->hn_tx_idx; 3098 } 3099 3100 if (m_head->m_flags & M_VLANTAG) { 3101 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 3102 NDIS_VLAN_INFO_SIZE, NDIS_PKTINFO_TYPE_VLAN); 3103 *pi_data = NDIS_VLAN_INFO_MAKE( 3104 EVL_VLANOFTAG(m_head->m_pkthdr.ether_vtag), 3105 EVL_PRIOFTAG(m_head->m_pkthdr.ether_vtag), 3106 EVL_CFIOFTAG(m_head->m_pkthdr.ether_vtag)); 3107 } 3108 3109 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 3110 #if defined(INET6) || defined(INET) 3111 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 3112 NDIS_LSO2_INFO_SIZE, NDIS_PKTINFO_TYPE_LSO); 3113 #ifdef INET 3114 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) { 3115 *pi_data = NDIS_LSO2_INFO_MAKEIPV4( 3116 m_head->m_pkthdr.l2hlen + m_head->m_pkthdr.l3hlen, 3117 m_head->m_pkthdr.tso_segsz); 3118 } 3119 #endif 3120 #if defined(INET6) && defined(INET) 3121 else 3122 #endif 3123 #ifdef INET6 3124 { 3125 *pi_data = NDIS_LSO2_INFO_MAKEIPV6( 3126 m_head->m_pkthdr.l2hlen + m_head->m_pkthdr.l3hlen, 3127 m_head->m_pkthdr.tso_segsz); 3128 } 3129 #endif 3130 #endif /* INET6 || INET */ 3131 } else if (m_head->m_pkthdr.csum_flags & txr->hn_csum_assist) { 3132 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 3133 NDIS_TXCSUM_INFO_SIZE, NDIS_PKTINFO_TYPE_CSUM); 3134 if (m_head->m_pkthdr.csum_flags & 3135 (CSUM_IP6_TCP | CSUM_IP6_UDP)) { 3136 *pi_data = NDIS_TXCSUM_INFO_IPV6; 3137 } else { 3138 *pi_data = NDIS_TXCSUM_INFO_IPV4; 3139 if (m_head->m_pkthdr.csum_flags & CSUM_IP) 3140 *pi_data |= NDIS_TXCSUM_INFO_IPCS; 3141 } 3142 3143 if (m_head->m_pkthdr.csum_flags & 3144 (CSUM_IP_TCP | CSUM_IP6_TCP)) { 3145 *pi_data |= NDIS_TXCSUM_INFO_MKTCPCS( 3146 m_head->m_pkthdr.l2hlen + m_head->m_pkthdr.l3hlen); 3147 } else if (m_head->m_pkthdr.csum_flags & 3148 (CSUM_IP_UDP | CSUM_IP6_UDP)) { 3149 *pi_data |= NDIS_TXCSUM_INFO_MKUDPCS( 3150 m_head->m_pkthdr.l2hlen + m_head->m_pkthdr.l3hlen); 3151 } 3152 } 3153 3154 pkt_hlen = pkt->rm_pktinfooffset + pkt->rm_pktinfolen; 3155 /* Fixup RNDIS packet message total length */ 3156 pkt->rm_len += pkt_hlen; 3157 /* Convert RNDIS packet message offsets */ 3158 pkt->rm_dataoffset = hn_rndis_pktmsg_offset(pkt_hlen); 3159 pkt->rm_pktinfooffset = hn_rndis_pktmsg_offset(pkt->rm_pktinfooffset); 3160 3161 /* 3162 * Fast path: Chimney sending. 3163 */ 3164 if (chim != NULL) { 3165 struct hn_txdesc *tgt_txd = txd; 3166 3167 if (txr->hn_agg_txd != NULL) { 3168 tgt_txd = txr->hn_agg_txd; 3169 #ifdef INVARIANTS 3170 *m_head0 = NULL; 3171 #endif 3172 } 3173 3174 KASSERT(pkt == chim, 3175 ("RNDIS pkt not in chimney sending buffer")); 3176 KASSERT(tgt_txd->chim_index != HN_NVS_CHIM_IDX_INVALID, 3177 ("chimney sending buffer is not used")); 3178 tgt_txd->chim_size += pkt->rm_len; 3179 3180 m_copydata(m_head, 0, m_head->m_pkthdr.len, 3181 ((uint8_t *)chim) + pkt_hlen); 3182 3183 txr->hn_gpa_cnt = 0; 3184 txr->hn_sendpkt = hn_txpkt_chim; 3185 goto done; 3186 } 3187 3188 KASSERT(txr->hn_agg_txd == NULL, ("aggregating sglist txdesc")); 3189 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID, 3190 ("chimney buffer is used")); 3191 KASSERT(pkt == txd->rndis_pkt, ("RNDIS pkt not in txdesc")); 3192 3193 error = hn_txdesc_dmamap_load(txr, txd, &m_head, segs, &nsegs); 3194 if (__predict_false(error)) { 3195 int freed; 3196 3197 /* 3198 * This mbuf is not linked w/ the txd yet, so free it now. 3199 */ 3200 m_freem(m_head); 3201 *m_head0 = NULL; 3202 3203 freed = hn_txdesc_put(txr, txd); 3204 KASSERT(freed != 0, 3205 ("fail to free txd upon txdma error")); 3206 3207 txr->hn_txdma_failed++; 3208 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 3209 return error; 3210 } 3211 *m_head0 = m_head; 3212 3213 /* +1 RNDIS packet message */ 3214 txr->hn_gpa_cnt = nsegs + 1; 3215 3216 /* send packet with page buffer */ 3217 txr->hn_gpa[0].gpa_page = atop(txd->rndis_pkt_paddr); 3218 txr->hn_gpa[0].gpa_ofs = txd->rndis_pkt_paddr & PAGE_MASK; 3219 txr->hn_gpa[0].gpa_len = pkt_hlen; 3220 3221 /* 3222 * Fill the page buffers with mbuf info after the page 3223 * buffer for RNDIS packet message. 3224 */ 3225 for (i = 0; i < nsegs; ++i) { 3226 struct vmbus_gpa *gpa = &txr->hn_gpa[i + 1]; 3227 3228 gpa->gpa_page = atop(segs[i].ds_addr); 3229 gpa->gpa_ofs = segs[i].ds_addr & PAGE_MASK; 3230 gpa->gpa_len = segs[i].ds_len; 3231 } 3232 3233 txd->chim_index = HN_NVS_CHIM_IDX_INVALID; 3234 txd->chim_size = 0; 3235 txr->hn_sendpkt = hn_txpkt_sglist; 3236 done: 3237 txd->m = m_head; 3238 3239 /* Set the completion routine */ 3240 hn_nvs_sendctx_init(&txd->send_ctx, hn_txpkt_done, txd); 3241 3242 /* Update temporary stats for later use. */ 3243 txr->hn_stat_pkts++; 3244 txr->hn_stat_size += m_head->m_pkthdr.len; 3245 if (m_head->m_flags & M_MCAST) 3246 txr->hn_stat_mcasts++; 3247 3248 return 0; 3249 } 3250 3251 /* 3252 * NOTE: 3253 * If this function fails, then txd will be freed, but the mbuf 3254 * associated w/ the txd will _not_ be freed. 3255 */ 3256 static int 3257 hn_txpkt(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd) 3258 { 3259 int error, send_failed = 0, has_bpf; 3260 3261 again: 3262 has_bpf = bpf_peers_present(ifp->if_bpf); 3263 if (has_bpf) { 3264 /* 3265 * Make sure that this txd and any aggregated txds are not 3266 * freed before ETHER_BPF_MTAP. 3267 */ 3268 hn_txdesc_hold(txd); 3269 } 3270 error = txr->hn_sendpkt(txr, txd); 3271 if (!error) { 3272 if (has_bpf) { 3273 const struct hn_txdesc *tmp_txd; 3274 3275 ETHER_BPF_MTAP(ifp, txd->m); 3276 STAILQ_FOREACH(tmp_txd, &txd->agg_list, agg_link) 3277 ETHER_BPF_MTAP(ifp, tmp_txd->m); 3278 } 3279 3280 if_inc_counter(ifp, IFCOUNTER_OPACKETS, txr->hn_stat_pkts); 3281 #ifdef HN_IFSTART_SUPPORT 3282 if (!hn_use_if_start) 3283 #endif 3284 { 3285 if_inc_counter(ifp, IFCOUNTER_OBYTES, 3286 txr->hn_stat_size); 3287 if (txr->hn_stat_mcasts != 0) { 3288 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 3289 txr->hn_stat_mcasts); 3290 } 3291 } 3292 txr->hn_pkts += txr->hn_stat_pkts; 3293 txr->hn_sends++; 3294 } 3295 if (has_bpf) 3296 hn_txdesc_put(txr, txd); 3297 3298 if (__predict_false(error)) { 3299 int freed; 3300 3301 /* 3302 * This should "really rarely" happen. 3303 * 3304 * XXX Too many RX to be acked or too many sideband 3305 * commands to run? Ask netvsc_channel_rollup() 3306 * to kick start later. 3307 */ 3308 txr->hn_has_txeof = 1; 3309 if (!send_failed) { 3310 txr->hn_send_failed++; 3311 send_failed = 1; 3312 /* 3313 * Try sending again after set hn_has_txeof; 3314 * in case that we missed the last 3315 * netvsc_channel_rollup(). 3316 */ 3317 goto again; 3318 } 3319 if_printf(ifp, "send failed\n"); 3320 3321 /* 3322 * Caller will perform further processing on the 3323 * associated mbuf, so don't free it in hn_txdesc_put(); 3324 * only unload it from the DMA map in hn_txdesc_put(), 3325 * if it was loaded. 3326 */ 3327 txd->m = NULL; 3328 freed = hn_txdesc_put(txr, txd); 3329 KASSERT(freed != 0, 3330 ("fail to free txd upon send error")); 3331 3332 txr->hn_send_failed++; 3333 } 3334 3335 /* Reset temporary stats, after this sending is done. */ 3336 txr->hn_stat_size = 0; 3337 txr->hn_stat_pkts = 0; 3338 txr->hn_stat_mcasts = 0; 3339 3340 return (error); 3341 } 3342 3343 /* 3344 * Append the specified data to the indicated mbuf chain, 3345 * Extend the mbuf chain if the new data does not fit in 3346 * existing space. 3347 * 3348 * This is a minor rewrite of m_append() from sys/kern/uipc_mbuf.c. 3349 * There should be an equivalent in the kernel mbuf code, 3350 * but there does not appear to be one yet. 3351 * 3352 * Differs from m_append() in that additional mbufs are 3353 * allocated with cluster size MJUMPAGESIZE, and filled 3354 * accordingly. 3355 * 3356 * Return 1 if able to complete the job; otherwise 0. 3357 */ 3358 static int 3359 hv_m_append(struct mbuf *m0, int len, c_caddr_t cp) 3360 { 3361 struct mbuf *m, *n; 3362 int remainder, space; 3363 3364 for (m = m0; m->m_next != NULL; m = m->m_next) 3365 ; 3366 remainder = len; 3367 space = M_TRAILINGSPACE(m); 3368 if (space > 0) { 3369 /* 3370 * Copy into available space. 3371 */ 3372 if (space > remainder) 3373 space = remainder; 3374 bcopy(cp, mtod(m, caddr_t) + m->m_len, space); 3375 m->m_len += space; 3376 cp += space; 3377 remainder -= space; 3378 } 3379 while (remainder > 0) { 3380 /* 3381 * Allocate a new mbuf; could check space 3382 * and allocate a cluster instead. 3383 */ 3384 n = m_getjcl(M_NOWAIT, m->m_type, 0, MJUMPAGESIZE); 3385 if (n == NULL) 3386 break; 3387 n->m_len = min(MJUMPAGESIZE, remainder); 3388 bcopy(cp, mtod(n, caddr_t), n->m_len); 3389 cp += n->m_len; 3390 remainder -= n->m_len; 3391 m->m_next = n; 3392 m = n; 3393 } 3394 if (m0->m_flags & M_PKTHDR) 3395 m0->m_pkthdr.len += len - remainder; 3396 3397 return (remainder == 0); 3398 } 3399 3400 #if defined(INET) || defined(INET6) 3401 static __inline int 3402 hn_lro_rx(struct lro_ctrl *lc, struct mbuf *m) 3403 { 3404 #if __FreeBSD_version >= 1100095 3405 if (hn_lro_mbufq_depth) { 3406 tcp_lro_queue_mbuf(lc, m); 3407 return 0; 3408 } 3409 #endif 3410 return tcp_lro_rx(lc, m, 0); 3411 } 3412 #endif 3413 3414 static int 3415 hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen, 3416 const struct hn_rxinfo *info) 3417 { 3418 struct ifnet *ifp, *hn_ifp = rxr->hn_ifp; 3419 struct mbuf *m_new; 3420 int size, do_lro = 0, do_csum = 1, is_vf = 0; 3421 int hash_type = M_HASHTYPE_NONE; 3422 int l3proto = ETHERTYPE_MAX, l4proto = IPPROTO_DONE; 3423 3424 ifp = hn_ifp; 3425 if (rxr->hn_rxvf_ifp != NULL) { 3426 /* 3427 * Non-transparent mode VF; pretend this packet is from 3428 * the VF. 3429 */ 3430 ifp = rxr->hn_rxvf_ifp; 3431 is_vf = 1; 3432 } else if (rxr->hn_rx_flags & HN_RX_FLAG_XPNT_VF) { 3433 /* Transparent mode VF. */ 3434 is_vf = 1; 3435 } 3436 3437 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 3438 /* 3439 * NOTE: 3440 * See the NOTE of hn_rndis_init_fixat(). This 3441 * function can be reached, immediately after the 3442 * RNDIS is initialized but before the ifnet is 3443 * setup on the hn_attach() path; drop the unexpected 3444 * packets. 3445 */ 3446 return (0); 3447 } 3448 3449 if (__predict_false(dlen < ETHER_HDR_LEN)) { 3450 if_inc_counter(hn_ifp, IFCOUNTER_IERRORS, 1); 3451 return (0); 3452 } 3453 3454 if (dlen <= MHLEN) { 3455 m_new = m_gethdr(M_NOWAIT, MT_DATA); 3456 if (m_new == NULL) { 3457 if_inc_counter(hn_ifp, IFCOUNTER_IQDROPS, 1); 3458 return (0); 3459 } 3460 memcpy(mtod(m_new, void *), data, dlen); 3461 m_new->m_pkthdr.len = m_new->m_len = dlen; 3462 rxr->hn_small_pkts++; 3463 } else { 3464 /* 3465 * Get an mbuf with a cluster. For packets 2K or less, 3466 * get a standard 2K cluster. For anything larger, get a 3467 * 4K cluster. Any buffers larger than 4K can cause problems 3468 * if looped around to the Hyper-V TX channel, so avoid them. 3469 */ 3470 size = MCLBYTES; 3471 if (dlen > MCLBYTES) { 3472 /* 4096 */ 3473 size = MJUMPAGESIZE; 3474 } 3475 3476 m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size); 3477 if (m_new == NULL) { 3478 if_inc_counter(hn_ifp, IFCOUNTER_IQDROPS, 1); 3479 return (0); 3480 } 3481 3482 hv_m_append(m_new, dlen, data); 3483 } 3484 m_new->m_pkthdr.rcvif = ifp; 3485 3486 if (__predict_false((hn_ifp->if_capenable & IFCAP_RXCSUM) == 0)) 3487 do_csum = 0; 3488 3489 /* receive side checksum offload */ 3490 if (info->csum_info != HN_NDIS_RXCSUM_INFO_INVALID) { 3491 /* IP csum offload */ 3492 if ((info->csum_info & NDIS_RXCSUM_INFO_IPCS_OK) && do_csum) { 3493 m_new->m_pkthdr.csum_flags |= 3494 (CSUM_IP_CHECKED | CSUM_IP_VALID); 3495 rxr->hn_csum_ip++; 3496 } 3497 3498 /* TCP/UDP csum offload */ 3499 if ((info->csum_info & (NDIS_RXCSUM_INFO_UDPCS_OK | 3500 NDIS_RXCSUM_INFO_TCPCS_OK)) && do_csum) { 3501 m_new->m_pkthdr.csum_flags |= 3502 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 3503 m_new->m_pkthdr.csum_data = 0xffff; 3504 if (info->csum_info & NDIS_RXCSUM_INFO_TCPCS_OK) 3505 rxr->hn_csum_tcp++; 3506 else 3507 rxr->hn_csum_udp++; 3508 } 3509 3510 /* 3511 * XXX 3512 * As of this write (Oct 28th, 2016), host side will turn 3513 * on only TCPCS_OK and IPCS_OK even for UDP datagrams, so 3514 * the do_lro setting here is actually _not_ accurate. We 3515 * depend on the RSS hash type check to reset do_lro. 3516 */ 3517 if ((info->csum_info & 3518 (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) == 3519 (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) 3520 do_lro = 1; 3521 } else { 3522 hn_rxpkt_proto(m_new, &l3proto, &l4proto); 3523 if (l3proto == ETHERTYPE_IP) { 3524 if (l4proto == IPPROTO_TCP) { 3525 if (do_csum && 3526 (rxr->hn_trust_hcsum & 3527 HN_TRUST_HCSUM_TCP)) { 3528 rxr->hn_csum_trusted++; 3529 m_new->m_pkthdr.csum_flags |= 3530 (CSUM_IP_CHECKED | CSUM_IP_VALID | 3531 CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 3532 m_new->m_pkthdr.csum_data = 0xffff; 3533 } 3534 do_lro = 1; 3535 } else if (l4proto == IPPROTO_UDP) { 3536 if (do_csum && 3537 (rxr->hn_trust_hcsum & 3538 HN_TRUST_HCSUM_UDP)) { 3539 rxr->hn_csum_trusted++; 3540 m_new->m_pkthdr.csum_flags |= 3541 (CSUM_IP_CHECKED | CSUM_IP_VALID | 3542 CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 3543 m_new->m_pkthdr.csum_data = 0xffff; 3544 } 3545 } else if (l4proto != IPPROTO_DONE && do_csum && 3546 (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_IP)) { 3547 rxr->hn_csum_trusted++; 3548 m_new->m_pkthdr.csum_flags |= 3549 (CSUM_IP_CHECKED | CSUM_IP_VALID); 3550 } 3551 } 3552 } 3553 3554 if (info->vlan_info != HN_NDIS_VLAN_INFO_INVALID) { 3555 m_new->m_pkthdr.ether_vtag = EVL_MAKETAG( 3556 NDIS_VLAN_INFO_ID(info->vlan_info), 3557 NDIS_VLAN_INFO_PRI(info->vlan_info), 3558 NDIS_VLAN_INFO_CFI(info->vlan_info)); 3559 m_new->m_flags |= M_VLANTAG; 3560 } 3561 3562 /* 3563 * If VF is activated (tranparent/non-transparent mode does not 3564 * matter here). 3565 * 3566 * - Disable LRO 3567 * 3568 * hn(4) will only receive broadcast packets, multicast packets, 3569 * TCP SYN and SYN|ACK (in Azure), LRO is useless for these 3570 * packet types. 3571 * 3572 * For non-transparent, we definitely _cannot_ enable LRO at 3573 * all, since the LRO flush will use hn(4) as the receiving 3574 * interface; i.e. hn_ifp->if_input(hn_ifp, m). 3575 */ 3576 if (is_vf) 3577 do_lro = 0; 3578 3579 /* 3580 * If VF is activated (tranparent/non-transparent mode does not 3581 * matter here), do _not_ mess with unsupported hash types or 3582 * functions. 3583 */ 3584 if (info->hash_info != HN_NDIS_HASH_INFO_INVALID) { 3585 rxr->hn_rss_pkts++; 3586 m_new->m_pkthdr.flowid = info->hash_value; 3587 if (!is_vf) 3588 hash_type = M_HASHTYPE_OPAQUE_HASH; 3589 if ((info->hash_info & NDIS_HASH_FUNCTION_MASK) == 3590 NDIS_HASH_FUNCTION_TOEPLITZ) { 3591 uint32_t type = (info->hash_info & NDIS_HASH_TYPE_MASK & 3592 rxr->hn_mbuf_hash); 3593 3594 /* 3595 * NOTE: 3596 * do_lro is resetted, if the hash types are not TCP 3597 * related. See the comment in the above csum_flags 3598 * setup section. 3599 */ 3600 switch (type) { 3601 case NDIS_HASH_IPV4: 3602 hash_type = M_HASHTYPE_RSS_IPV4; 3603 do_lro = 0; 3604 break; 3605 3606 case NDIS_HASH_TCP_IPV4: 3607 hash_type = M_HASHTYPE_RSS_TCP_IPV4; 3608 if (rxr->hn_rx_flags & HN_RX_FLAG_UDP_HASH) { 3609 int def_htype = M_HASHTYPE_OPAQUE_HASH; 3610 3611 if (is_vf) 3612 def_htype = M_HASHTYPE_NONE; 3613 3614 /* 3615 * UDP 4-tuple hash is delivered as 3616 * TCP 4-tuple hash. 3617 */ 3618 if (l3proto == ETHERTYPE_MAX) { 3619 hn_rxpkt_proto(m_new, 3620 &l3proto, &l4proto); 3621 } 3622 if (l3proto == ETHERTYPE_IP) { 3623 if (l4proto == IPPROTO_UDP && 3624 (rxr->hn_mbuf_hash & 3625 NDIS_HASH_UDP_IPV4_X)) { 3626 hash_type = 3627 M_HASHTYPE_RSS_UDP_IPV4; 3628 do_lro = 0; 3629 } else if (l4proto != 3630 IPPROTO_TCP) { 3631 hash_type = def_htype; 3632 do_lro = 0; 3633 } 3634 } else { 3635 hash_type = def_htype; 3636 do_lro = 0; 3637 } 3638 } 3639 break; 3640 3641 case NDIS_HASH_IPV6: 3642 hash_type = M_HASHTYPE_RSS_IPV6; 3643 do_lro = 0; 3644 break; 3645 3646 case NDIS_HASH_IPV6_EX: 3647 hash_type = M_HASHTYPE_RSS_IPV6_EX; 3648 do_lro = 0; 3649 break; 3650 3651 case NDIS_HASH_TCP_IPV6: 3652 hash_type = M_HASHTYPE_RSS_TCP_IPV6; 3653 break; 3654 3655 case NDIS_HASH_TCP_IPV6_EX: 3656 hash_type = M_HASHTYPE_RSS_TCP_IPV6_EX; 3657 break; 3658 } 3659 } 3660 } else if (!is_vf) { 3661 m_new->m_pkthdr.flowid = rxr->hn_rx_idx; 3662 hash_type = M_HASHTYPE_OPAQUE; 3663 } 3664 M_HASHTYPE_SET(m_new, hash_type); 3665 3666 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); 3667 if (hn_ifp != ifp) { 3668 const struct ether_header *eh; 3669 3670 /* 3671 * Non-transparent mode VF is activated. 3672 */ 3673 3674 /* 3675 * Allow tapping on hn(4). 3676 */ 3677 ETHER_BPF_MTAP(hn_ifp, m_new); 3678 3679 /* 3680 * Update hn(4)'s stats. 3681 */ 3682 if_inc_counter(hn_ifp, IFCOUNTER_IPACKETS, 1); 3683 if_inc_counter(hn_ifp, IFCOUNTER_IBYTES, m_new->m_pkthdr.len); 3684 /* Checked at the beginning of this function. */ 3685 KASSERT(m_new->m_len >= ETHER_HDR_LEN, ("not ethernet frame")); 3686 eh = mtod(m_new, struct ether_header *); 3687 if (ETHER_IS_MULTICAST(eh->ether_dhost)) 3688 if_inc_counter(hn_ifp, IFCOUNTER_IMCASTS, 1); 3689 } 3690 rxr->hn_pkts++; 3691 3692 if ((hn_ifp->if_capenable & IFCAP_LRO) && do_lro) { 3693 #if defined(INET) || defined(INET6) 3694 struct lro_ctrl *lro = &rxr->hn_lro; 3695 3696 if (lro->lro_cnt) { 3697 rxr->hn_lro_tried++; 3698 if (hn_lro_rx(lro, m_new) == 0) { 3699 /* DONE! */ 3700 return 0; 3701 } 3702 } 3703 #endif 3704 } 3705 ifp->if_input(ifp, m_new); 3706 3707 return (0); 3708 } 3709 3710 static int 3711 hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 3712 { 3713 struct hn_softc *sc = ifp->if_softc; 3714 struct ifreq *ifr = (struct ifreq *)data, ifr_vf; 3715 struct ifnet *vf_ifp; 3716 int mask, error = 0; 3717 struct ifrsskey *ifrk; 3718 struct ifrsshash *ifrh; 3719 uint32_t mtu; 3720 3721 switch (cmd) { 3722 case SIOCSIFMTU: 3723 if (ifr->ifr_mtu > HN_MTU_MAX) { 3724 error = EINVAL; 3725 break; 3726 } 3727 3728 HN_LOCK(sc); 3729 3730 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) { 3731 HN_UNLOCK(sc); 3732 break; 3733 } 3734 3735 if ((sc->hn_caps & HN_CAP_MTU) == 0) { 3736 /* Can't change MTU */ 3737 HN_UNLOCK(sc); 3738 error = EOPNOTSUPP; 3739 break; 3740 } 3741 3742 if (ifp->if_mtu == ifr->ifr_mtu) { 3743 HN_UNLOCK(sc); 3744 break; 3745 } 3746 3747 if (hn_xpnt_vf_isready(sc)) { 3748 vf_ifp = sc->hn_vf_ifp; 3749 ifr_vf = *ifr; 3750 strlcpy(ifr_vf.ifr_name, vf_ifp->if_xname, 3751 sizeof(ifr_vf.ifr_name)); 3752 error = vf_ifp->if_ioctl(vf_ifp, SIOCSIFMTU, 3753 (caddr_t)&ifr_vf); 3754 if (error) { 3755 HN_UNLOCK(sc); 3756 if_printf(ifp, "%s SIOCSIFMTU %d failed: %d\n", 3757 vf_ifp->if_xname, ifr->ifr_mtu, error); 3758 break; 3759 } 3760 } 3761 3762 /* 3763 * Suspend this interface before the synthetic parts 3764 * are ripped. 3765 */ 3766 hn_suspend(sc); 3767 3768 /* 3769 * Detach the synthetics parts, i.e. NVS and RNDIS. 3770 */ 3771 hn_synth_detach(sc); 3772 3773 /* 3774 * Reattach the synthetic parts, i.e. NVS and RNDIS, 3775 * with the new MTU setting. 3776 */ 3777 error = hn_synth_attach(sc, ifr->ifr_mtu); 3778 if (error) { 3779 HN_UNLOCK(sc); 3780 break; 3781 } 3782 3783 error = hn_rndis_get_mtu(sc, &mtu); 3784 if (error) 3785 mtu = ifr->ifr_mtu; 3786 else if (bootverbose) 3787 if_printf(ifp, "RNDIS mtu %u\n", mtu); 3788 3789 /* 3790 * Commit the requested MTU, after the synthetic parts 3791 * have been successfully attached. 3792 */ 3793 if (mtu >= ifr->ifr_mtu) { 3794 mtu = ifr->ifr_mtu; 3795 } else { 3796 if_printf(ifp, "fixup mtu %d -> %u\n", 3797 ifr->ifr_mtu, mtu); 3798 } 3799 ifp->if_mtu = mtu; 3800 3801 /* 3802 * Synthetic parts' reattach may change the chimney 3803 * sending size; update it. 3804 */ 3805 if (sc->hn_tx_ring[0].hn_chim_size > sc->hn_chim_szmax) 3806 hn_set_chim_size(sc, sc->hn_chim_szmax); 3807 3808 /* 3809 * Make sure that various parameters based on MTU are 3810 * still valid, after the MTU change. 3811 */ 3812 hn_mtu_change_fixup(sc); 3813 3814 /* 3815 * All done! Resume the interface now. 3816 */ 3817 hn_resume(sc); 3818 3819 if ((sc->hn_flags & HN_FLAG_RXVF) || 3820 (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) { 3821 /* 3822 * Since we have reattached the NVS part, 3823 * change the datapath to VF again; in case 3824 * that it is lost, after the NVS was detached. 3825 */ 3826 hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_VF); 3827 } 3828 3829 HN_UNLOCK(sc); 3830 break; 3831 3832 case SIOCSIFFLAGS: 3833 HN_LOCK(sc); 3834 3835 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) { 3836 HN_UNLOCK(sc); 3837 break; 3838 } 3839 3840 if (hn_xpnt_vf_isready(sc)) 3841 hn_xpnt_vf_saveifflags(sc); 3842 3843 if (ifp->if_flags & IFF_UP) { 3844 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 3845 /* 3846 * Caller meight hold mutex, e.g. 3847 * bpf; use busy-wait for the RNDIS 3848 * reply. 3849 */ 3850 HN_NO_SLEEPING(sc); 3851 hn_rxfilter_config(sc); 3852 HN_SLEEPING_OK(sc); 3853 3854 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) 3855 error = hn_xpnt_vf_iocsetflags(sc); 3856 } else { 3857 hn_init_locked(sc); 3858 } 3859 } else { 3860 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 3861 hn_stop(sc, false); 3862 } 3863 sc->hn_if_flags = ifp->if_flags; 3864 3865 HN_UNLOCK(sc); 3866 break; 3867 3868 case SIOCSIFCAP: 3869 HN_LOCK(sc); 3870 3871 if (hn_xpnt_vf_isready(sc)) { 3872 ifr_vf = *ifr; 3873 strlcpy(ifr_vf.ifr_name, sc->hn_vf_ifp->if_xname, 3874 sizeof(ifr_vf.ifr_name)); 3875 error = hn_xpnt_vf_iocsetcaps(sc, &ifr_vf); 3876 HN_UNLOCK(sc); 3877 break; 3878 } 3879 3880 /* 3881 * Fix up requested capabilities w/ supported capabilities, 3882 * since the supported capabilities could have been changed. 3883 */ 3884 mask = (ifr->ifr_reqcap & ifp->if_capabilities) ^ 3885 ifp->if_capenable; 3886 3887 if (mask & IFCAP_TXCSUM) { 3888 ifp->if_capenable ^= IFCAP_TXCSUM; 3889 if (ifp->if_capenable & IFCAP_TXCSUM) 3890 ifp->if_hwassist |= HN_CSUM_IP_HWASSIST(sc); 3891 else 3892 ifp->if_hwassist &= ~HN_CSUM_IP_HWASSIST(sc); 3893 } 3894 if (mask & IFCAP_TXCSUM_IPV6) { 3895 ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; 3896 if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) 3897 ifp->if_hwassist |= HN_CSUM_IP6_HWASSIST(sc); 3898 else 3899 ifp->if_hwassist &= ~HN_CSUM_IP6_HWASSIST(sc); 3900 } 3901 3902 /* TODO: flip RNDIS offload parameters for RXCSUM. */ 3903 if (mask & IFCAP_RXCSUM) 3904 ifp->if_capenable ^= IFCAP_RXCSUM; 3905 #ifdef foo 3906 /* We can't diff IPv6 packets from IPv4 packets on RX path. */ 3907 if (mask & IFCAP_RXCSUM_IPV6) 3908 ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; 3909 #endif 3910 3911 if (mask & IFCAP_LRO) 3912 ifp->if_capenable ^= IFCAP_LRO; 3913 3914 if (mask & IFCAP_TSO4) { 3915 ifp->if_capenable ^= IFCAP_TSO4; 3916 if (ifp->if_capenable & IFCAP_TSO4) 3917 ifp->if_hwassist |= CSUM_IP_TSO; 3918 else 3919 ifp->if_hwassist &= ~CSUM_IP_TSO; 3920 } 3921 if (mask & IFCAP_TSO6) { 3922 ifp->if_capenable ^= IFCAP_TSO6; 3923 if (ifp->if_capenable & IFCAP_TSO6) 3924 ifp->if_hwassist |= CSUM_IP6_TSO; 3925 else 3926 ifp->if_hwassist &= ~CSUM_IP6_TSO; 3927 } 3928 3929 HN_UNLOCK(sc); 3930 break; 3931 3932 case SIOCADDMULTI: 3933 case SIOCDELMULTI: 3934 HN_LOCK(sc); 3935 3936 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) { 3937 HN_UNLOCK(sc); 3938 break; 3939 } 3940 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 3941 /* 3942 * Multicast uses mutex; use busy-wait for 3943 * the RNDIS reply. 3944 */ 3945 HN_NO_SLEEPING(sc); 3946 hn_rxfilter_config(sc); 3947 HN_SLEEPING_OK(sc); 3948 } 3949 3950 /* XXX vlan(4) style mcast addr maintenance */ 3951 if (hn_xpnt_vf_isready(sc)) { 3952 int old_if_flags; 3953 3954 old_if_flags = sc->hn_vf_ifp->if_flags; 3955 hn_xpnt_vf_saveifflags(sc); 3956 3957 if ((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) && 3958 ((old_if_flags ^ sc->hn_vf_ifp->if_flags) & 3959 IFF_ALLMULTI)) 3960 error = hn_xpnt_vf_iocsetflags(sc); 3961 } 3962 3963 HN_UNLOCK(sc); 3964 break; 3965 3966 case SIOCSIFMEDIA: 3967 case SIOCGIFMEDIA: 3968 HN_LOCK(sc); 3969 if (hn_xpnt_vf_isready(sc)) { 3970 /* 3971 * SIOCGIFMEDIA expects ifmediareq, so don't 3972 * create and pass ifr_vf to the VF here; just 3973 * replace the ifr_name. 3974 */ 3975 vf_ifp = sc->hn_vf_ifp; 3976 strlcpy(ifr->ifr_name, vf_ifp->if_xname, 3977 sizeof(ifr->ifr_name)); 3978 error = vf_ifp->if_ioctl(vf_ifp, cmd, data); 3979 /* Restore the ifr_name. */ 3980 strlcpy(ifr->ifr_name, ifp->if_xname, 3981 sizeof(ifr->ifr_name)); 3982 HN_UNLOCK(sc); 3983 break; 3984 } 3985 HN_UNLOCK(sc); 3986 error = ifmedia_ioctl(ifp, ifr, &sc->hn_media, cmd); 3987 break; 3988 3989 case SIOCGIFRSSHASH: 3990 ifrh = (struct ifrsshash *)data; 3991 HN_LOCK(sc); 3992 if (sc->hn_rx_ring_inuse == 1) { 3993 HN_UNLOCK(sc); 3994 ifrh->ifrh_func = RSS_FUNC_NONE; 3995 ifrh->ifrh_types = 0; 3996 break; 3997 } 3998 3999 if (sc->hn_rss_hash & NDIS_HASH_FUNCTION_TOEPLITZ) 4000 ifrh->ifrh_func = RSS_FUNC_TOEPLITZ; 4001 else 4002 ifrh->ifrh_func = RSS_FUNC_PRIVATE; 4003 ifrh->ifrh_types = hn_rss_type_fromndis(sc->hn_rss_hash); 4004 HN_UNLOCK(sc); 4005 break; 4006 4007 case SIOCGIFRSSKEY: 4008 ifrk = (struct ifrsskey *)data; 4009 HN_LOCK(sc); 4010 if (sc->hn_rx_ring_inuse == 1) { 4011 HN_UNLOCK(sc); 4012 ifrk->ifrk_func = RSS_FUNC_NONE; 4013 ifrk->ifrk_keylen = 0; 4014 break; 4015 } 4016 if (sc->hn_rss_hash & NDIS_HASH_FUNCTION_TOEPLITZ) 4017 ifrk->ifrk_func = RSS_FUNC_TOEPLITZ; 4018 else 4019 ifrk->ifrk_func = RSS_FUNC_PRIVATE; 4020 ifrk->ifrk_keylen = NDIS_HASH_KEYSIZE_TOEPLITZ; 4021 memcpy(ifrk->ifrk_key, sc->hn_rss.rss_key, 4022 NDIS_HASH_KEYSIZE_TOEPLITZ); 4023 HN_UNLOCK(sc); 4024 break; 4025 4026 default: 4027 error = ether_ioctl(ifp, cmd, data); 4028 break; 4029 } 4030 return (error); 4031 } 4032 4033 static void 4034 hn_stop(struct hn_softc *sc, bool detaching) 4035 { 4036 struct ifnet *ifp = sc->hn_ifp; 4037 int i; 4038 4039 HN_LOCK_ASSERT(sc); 4040 4041 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, 4042 ("synthetic parts were not attached")); 4043 4044 /* Clear RUNNING bit ASAP. */ 4045 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_RUNNING); 4046 4047 /* Disable polling. */ 4048 hn_polling(sc, 0); 4049 4050 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) { 4051 KASSERT(sc->hn_vf_ifp != NULL, 4052 ("%s: VF is not attached", ifp->if_xname)); 4053 4054 /* Mark transparent mode VF as disabled. */ 4055 hn_xpnt_vf_setdisable(sc, false /* keep hn_vf_ifp */); 4056 4057 /* 4058 * NOTE: 4059 * Datapath setting must happen _before_ bringing 4060 * the VF down. 4061 */ 4062 hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_SYNTH); 4063 4064 /* 4065 * Bring the VF down. 4066 */ 4067 hn_xpnt_vf_saveifflags(sc); 4068 sc->hn_vf_ifp->if_flags &= ~IFF_UP; 4069 hn_xpnt_vf_iocsetflags(sc); 4070 } 4071 4072 /* Suspend data transfers. */ 4073 hn_suspend_data(sc); 4074 4075 /* Clear OACTIVE bit. */ 4076 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 4077 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) 4078 sc->hn_tx_ring[i].hn_oactive = 0; 4079 4080 /* 4081 * If the non-transparent mode VF is active, make sure 4082 * that the RX filter still allows packet reception. 4083 */ 4084 if (!detaching && (sc->hn_flags & HN_FLAG_RXVF)) 4085 hn_rxfilter_config(sc); 4086 } 4087 4088 static void 4089 hn_init_locked(struct hn_softc *sc) 4090 { 4091 struct ifnet *ifp = sc->hn_ifp; 4092 int i; 4093 4094 HN_LOCK_ASSERT(sc); 4095 4096 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) 4097 return; 4098 4099 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 4100 return; 4101 4102 /* Configure RX filter */ 4103 hn_rxfilter_config(sc); 4104 4105 /* Clear OACTIVE bit. */ 4106 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 4107 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) 4108 sc->hn_tx_ring[i].hn_oactive = 0; 4109 4110 /* Clear TX 'suspended' bit. */ 4111 hn_resume_tx(sc, sc->hn_tx_ring_inuse); 4112 4113 if (hn_xpnt_vf_isready(sc)) { 4114 /* Initialize transparent VF. */ 4115 hn_xpnt_vf_init(sc); 4116 } 4117 4118 /* Everything is ready; unleash! */ 4119 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_RUNNING); 4120 4121 /* Re-enable polling if requested. */ 4122 if (sc->hn_pollhz > 0) 4123 hn_polling(sc, sc->hn_pollhz); 4124 } 4125 4126 static void 4127 hn_init(void *xsc) 4128 { 4129 struct hn_softc *sc = xsc; 4130 4131 HN_LOCK(sc); 4132 hn_init_locked(sc); 4133 HN_UNLOCK(sc); 4134 } 4135 4136 #if __FreeBSD_version >= 1100099 4137 4138 static int 4139 hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS) 4140 { 4141 struct hn_softc *sc = arg1; 4142 unsigned int lenlim; 4143 int error; 4144 4145 lenlim = sc->hn_rx_ring[0].hn_lro.lro_length_lim; 4146 error = sysctl_handle_int(oidp, &lenlim, 0, req); 4147 if (error || req->newptr == NULL) 4148 return error; 4149 4150 HN_LOCK(sc); 4151 if (lenlim < HN_LRO_LENLIM_MIN(sc->hn_ifp) || 4152 lenlim > TCP_LRO_LENGTH_MAX) { 4153 HN_UNLOCK(sc); 4154 return EINVAL; 4155 } 4156 hn_set_lro_lenlim(sc, lenlim); 4157 HN_UNLOCK(sc); 4158 4159 return 0; 4160 } 4161 4162 static int 4163 hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS) 4164 { 4165 struct hn_softc *sc = arg1; 4166 int ackcnt, error, i; 4167 4168 /* 4169 * lro_ackcnt_lim is append count limit, 4170 * +1 to turn it into aggregation limit. 4171 */ 4172 ackcnt = sc->hn_rx_ring[0].hn_lro.lro_ackcnt_lim + 1; 4173 error = sysctl_handle_int(oidp, &ackcnt, 0, req); 4174 if (error || req->newptr == NULL) 4175 return error; 4176 4177 if (ackcnt < 2 || ackcnt > (TCP_LRO_ACKCNT_MAX + 1)) 4178 return EINVAL; 4179 4180 /* 4181 * Convert aggregation limit back to append 4182 * count limit. 4183 */ 4184 --ackcnt; 4185 HN_LOCK(sc); 4186 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 4187 sc->hn_rx_ring[i].hn_lro.lro_ackcnt_lim = ackcnt; 4188 HN_UNLOCK(sc); 4189 return 0; 4190 } 4191 4192 #endif 4193 4194 static int 4195 hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS) 4196 { 4197 struct hn_softc *sc = arg1; 4198 int hcsum = arg2; 4199 int on, error, i; 4200 4201 on = 0; 4202 if (sc->hn_rx_ring[0].hn_trust_hcsum & hcsum) 4203 on = 1; 4204 4205 error = sysctl_handle_int(oidp, &on, 0, req); 4206 if (error || req->newptr == NULL) 4207 return error; 4208 4209 HN_LOCK(sc); 4210 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4211 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 4212 4213 if (on) 4214 rxr->hn_trust_hcsum |= hcsum; 4215 else 4216 rxr->hn_trust_hcsum &= ~hcsum; 4217 } 4218 HN_UNLOCK(sc); 4219 return 0; 4220 } 4221 4222 static int 4223 hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS) 4224 { 4225 struct hn_softc *sc = arg1; 4226 int chim_size, error; 4227 4228 chim_size = sc->hn_tx_ring[0].hn_chim_size; 4229 error = sysctl_handle_int(oidp, &chim_size, 0, req); 4230 if (error || req->newptr == NULL) 4231 return error; 4232 4233 if (chim_size > sc->hn_chim_szmax || chim_size <= 0) 4234 return EINVAL; 4235 4236 HN_LOCK(sc); 4237 hn_set_chim_size(sc, chim_size); 4238 HN_UNLOCK(sc); 4239 return 0; 4240 } 4241 4242 #if __FreeBSD_version < 1100095 4243 static int 4244 hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS) 4245 { 4246 struct hn_softc *sc = arg1; 4247 int ofs = arg2, i, error; 4248 struct hn_rx_ring *rxr; 4249 uint64_t stat; 4250 4251 stat = 0; 4252 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4253 rxr = &sc->hn_rx_ring[i]; 4254 stat += *((int *)((uint8_t *)rxr + ofs)); 4255 } 4256 4257 error = sysctl_handle_64(oidp, &stat, 0, req); 4258 if (error || req->newptr == NULL) 4259 return error; 4260 4261 /* Zero out this stat. */ 4262 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4263 rxr = &sc->hn_rx_ring[i]; 4264 *((int *)((uint8_t *)rxr + ofs)) = 0; 4265 } 4266 return 0; 4267 } 4268 #else 4269 static int 4270 hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS) 4271 { 4272 struct hn_softc *sc = arg1; 4273 int ofs = arg2, i, error; 4274 struct hn_rx_ring *rxr; 4275 uint64_t stat; 4276 4277 stat = 0; 4278 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4279 rxr = &sc->hn_rx_ring[i]; 4280 stat += *((uint64_t *)((uint8_t *)rxr + ofs)); 4281 } 4282 4283 error = sysctl_handle_64(oidp, &stat, 0, req); 4284 if (error || req->newptr == NULL) 4285 return error; 4286 4287 /* Zero out this stat. */ 4288 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4289 rxr = &sc->hn_rx_ring[i]; 4290 *((uint64_t *)((uint8_t *)rxr + ofs)) = 0; 4291 } 4292 return 0; 4293 } 4294 4295 #endif 4296 4297 static int 4298 hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS) 4299 { 4300 struct hn_softc *sc = arg1; 4301 int ofs = arg2, i, error; 4302 struct hn_rx_ring *rxr; 4303 u_long stat; 4304 4305 stat = 0; 4306 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4307 rxr = &sc->hn_rx_ring[i]; 4308 stat += *((u_long *)((uint8_t *)rxr + ofs)); 4309 } 4310 4311 error = sysctl_handle_long(oidp, &stat, 0, req); 4312 if (error || req->newptr == NULL) 4313 return error; 4314 4315 /* Zero out this stat. */ 4316 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4317 rxr = &sc->hn_rx_ring[i]; 4318 *((u_long *)((uint8_t *)rxr + ofs)) = 0; 4319 } 4320 return 0; 4321 } 4322 4323 static int 4324 hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS) 4325 { 4326 struct hn_softc *sc = arg1; 4327 int ofs = arg2, i, error; 4328 struct hn_tx_ring *txr; 4329 u_long stat; 4330 4331 stat = 0; 4332 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 4333 txr = &sc->hn_tx_ring[i]; 4334 stat += *((u_long *)((uint8_t *)txr + ofs)); 4335 } 4336 4337 error = sysctl_handle_long(oidp, &stat, 0, req); 4338 if (error || req->newptr == NULL) 4339 return error; 4340 4341 /* Zero out this stat. */ 4342 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 4343 txr = &sc->hn_tx_ring[i]; 4344 *((u_long *)((uint8_t *)txr + ofs)) = 0; 4345 } 4346 return 0; 4347 } 4348 4349 static int 4350 hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS) 4351 { 4352 struct hn_softc *sc = arg1; 4353 int ofs = arg2, i, error, conf; 4354 struct hn_tx_ring *txr; 4355 4356 txr = &sc->hn_tx_ring[0]; 4357 conf = *((int *)((uint8_t *)txr + ofs)); 4358 4359 error = sysctl_handle_int(oidp, &conf, 0, req); 4360 if (error || req->newptr == NULL) 4361 return error; 4362 4363 HN_LOCK(sc); 4364 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 4365 txr = &sc->hn_tx_ring[i]; 4366 *((int *)((uint8_t *)txr + ofs)) = conf; 4367 } 4368 HN_UNLOCK(sc); 4369 4370 return 0; 4371 } 4372 4373 static int 4374 hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS) 4375 { 4376 struct hn_softc *sc = arg1; 4377 int error, size; 4378 4379 size = sc->hn_agg_size; 4380 error = sysctl_handle_int(oidp, &size, 0, req); 4381 if (error || req->newptr == NULL) 4382 return (error); 4383 4384 HN_LOCK(sc); 4385 sc->hn_agg_size = size; 4386 hn_set_txagg(sc); 4387 HN_UNLOCK(sc); 4388 4389 return (0); 4390 } 4391 4392 static int 4393 hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS) 4394 { 4395 struct hn_softc *sc = arg1; 4396 int error, pkts; 4397 4398 pkts = sc->hn_agg_pkts; 4399 error = sysctl_handle_int(oidp, &pkts, 0, req); 4400 if (error || req->newptr == NULL) 4401 return (error); 4402 4403 HN_LOCK(sc); 4404 sc->hn_agg_pkts = pkts; 4405 hn_set_txagg(sc); 4406 HN_UNLOCK(sc); 4407 4408 return (0); 4409 } 4410 4411 static int 4412 hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS) 4413 { 4414 struct hn_softc *sc = arg1; 4415 int pkts; 4416 4417 pkts = sc->hn_tx_ring[0].hn_agg_pktmax; 4418 return (sysctl_handle_int(oidp, &pkts, 0, req)); 4419 } 4420 4421 static int 4422 hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS) 4423 { 4424 struct hn_softc *sc = arg1; 4425 int align; 4426 4427 align = sc->hn_tx_ring[0].hn_agg_align; 4428 return (sysctl_handle_int(oidp, &align, 0, req)); 4429 } 4430 4431 static void 4432 hn_chan_polling(struct vmbus_channel *chan, u_int pollhz) 4433 { 4434 if (pollhz == 0) 4435 vmbus_chan_poll_disable(chan); 4436 else 4437 vmbus_chan_poll_enable(chan, pollhz); 4438 } 4439 4440 static void 4441 hn_polling(struct hn_softc *sc, u_int pollhz) 4442 { 4443 int nsubch = sc->hn_rx_ring_inuse - 1; 4444 4445 HN_LOCK_ASSERT(sc); 4446 4447 if (nsubch > 0) { 4448 struct vmbus_channel **subch; 4449 int i; 4450 4451 subch = vmbus_subchan_get(sc->hn_prichan, nsubch); 4452 for (i = 0; i < nsubch; ++i) 4453 hn_chan_polling(subch[i], pollhz); 4454 vmbus_subchan_rel(subch, nsubch); 4455 } 4456 hn_chan_polling(sc->hn_prichan, pollhz); 4457 } 4458 4459 static int 4460 hn_polling_sysctl(SYSCTL_HANDLER_ARGS) 4461 { 4462 struct hn_softc *sc = arg1; 4463 int pollhz, error; 4464 4465 pollhz = sc->hn_pollhz; 4466 error = sysctl_handle_int(oidp, &pollhz, 0, req); 4467 if (error || req->newptr == NULL) 4468 return (error); 4469 4470 if (pollhz != 0 && 4471 (pollhz < VMBUS_CHAN_POLLHZ_MIN || pollhz > VMBUS_CHAN_POLLHZ_MAX)) 4472 return (EINVAL); 4473 4474 HN_LOCK(sc); 4475 if (sc->hn_pollhz != pollhz) { 4476 sc->hn_pollhz = pollhz; 4477 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) && 4478 (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED)) 4479 hn_polling(sc, sc->hn_pollhz); 4480 } 4481 HN_UNLOCK(sc); 4482 4483 return (0); 4484 } 4485 4486 static int 4487 hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS) 4488 { 4489 struct hn_softc *sc = arg1; 4490 char verstr[16]; 4491 4492 snprintf(verstr, sizeof(verstr), "%u.%u", 4493 HN_NDIS_VERSION_MAJOR(sc->hn_ndis_ver), 4494 HN_NDIS_VERSION_MINOR(sc->hn_ndis_ver)); 4495 return sysctl_handle_string(oidp, verstr, sizeof(verstr), req); 4496 } 4497 4498 static int 4499 hn_caps_sysctl(SYSCTL_HANDLER_ARGS) 4500 { 4501 struct hn_softc *sc = arg1; 4502 char caps_str[128]; 4503 uint32_t caps; 4504 4505 HN_LOCK(sc); 4506 caps = sc->hn_caps; 4507 HN_UNLOCK(sc); 4508 snprintf(caps_str, sizeof(caps_str), "%b", caps, HN_CAP_BITS); 4509 return sysctl_handle_string(oidp, caps_str, sizeof(caps_str), req); 4510 } 4511 4512 static int 4513 hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS) 4514 { 4515 struct hn_softc *sc = arg1; 4516 char assist_str[128]; 4517 uint32_t hwassist; 4518 4519 HN_LOCK(sc); 4520 hwassist = sc->hn_ifp->if_hwassist; 4521 HN_UNLOCK(sc); 4522 snprintf(assist_str, sizeof(assist_str), "%b", hwassist, CSUM_BITS); 4523 return sysctl_handle_string(oidp, assist_str, sizeof(assist_str), req); 4524 } 4525 4526 static int 4527 hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS) 4528 { 4529 struct hn_softc *sc = arg1; 4530 char filter_str[128]; 4531 uint32_t filter; 4532 4533 HN_LOCK(sc); 4534 filter = sc->hn_rx_filter; 4535 HN_UNLOCK(sc); 4536 snprintf(filter_str, sizeof(filter_str), "%b", filter, 4537 NDIS_PACKET_TYPES); 4538 return sysctl_handle_string(oidp, filter_str, sizeof(filter_str), req); 4539 } 4540 4541 #ifndef RSS 4542 4543 static int 4544 hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS) 4545 { 4546 struct hn_softc *sc = arg1; 4547 int error; 4548 4549 HN_LOCK(sc); 4550 4551 error = SYSCTL_OUT(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key)); 4552 if (error || req->newptr == NULL) 4553 goto back; 4554 4555 if ((sc->hn_flags & HN_FLAG_RXVF) || 4556 (hn_xpnt_vf && sc->hn_vf_ifp != NULL)) { 4557 /* 4558 * RSS key is synchronized w/ VF's, don't allow users 4559 * to change it. 4560 */ 4561 error = EBUSY; 4562 goto back; 4563 } 4564 4565 error = SYSCTL_IN(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key)); 4566 if (error) 4567 goto back; 4568 sc->hn_flags |= HN_FLAG_HAS_RSSKEY; 4569 4570 if (sc->hn_rx_ring_inuse > 1) { 4571 error = hn_rss_reconfig(sc); 4572 } else { 4573 /* Not RSS capable, at least for now; just save the RSS key. */ 4574 error = 0; 4575 } 4576 back: 4577 HN_UNLOCK(sc); 4578 return (error); 4579 } 4580 4581 static int 4582 hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS) 4583 { 4584 struct hn_softc *sc = arg1; 4585 int error; 4586 4587 HN_LOCK(sc); 4588 4589 error = SYSCTL_OUT(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind)); 4590 if (error || req->newptr == NULL) 4591 goto back; 4592 4593 /* 4594 * Don't allow RSS indirect table change, if this interface is not 4595 * RSS capable currently. 4596 */ 4597 if (sc->hn_rx_ring_inuse == 1) { 4598 error = EOPNOTSUPP; 4599 goto back; 4600 } 4601 4602 error = SYSCTL_IN(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind)); 4603 if (error) 4604 goto back; 4605 sc->hn_flags |= HN_FLAG_HAS_RSSIND; 4606 4607 hn_rss_ind_fixup(sc); 4608 error = hn_rss_reconfig(sc); 4609 back: 4610 HN_UNLOCK(sc); 4611 return (error); 4612 } 4613 4614 #endif /* !RSS */ 4615 4616 static int 4617 hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS) 4618 { 4619 struct hn_softc *sc = arg1; 4620 char hash_str[128]; 4621 uint32_t hash; 4622 4623 HN_LOCK(sc); 4624 hash = sc->hn_rss_hash; 4625 HN_UNLOCK(sc); 4626 snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS); 4627 return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req); 4628 } 4629 4630 static int 4631 hn_rss_hcap_sysctl(SYSCTL_HANDLER_ARGS) 4632 { 4633 struct hn_softc *sc = arg1; 4634 char hash_str[128]; 4635 uint32_t hash; 4636 4637 HN_LOCK(sc); 4638 hash = sc->hn_rss_hcap; 4639 HN_UNLOCK(sc); 4640 snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS); 4641 return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req); 4642 } 4643 4644 static int 4645 hn_rss_mbuf_sysctl(SYSCTL_HANDLER_ARGS) 4646 { 4647 struct hn_softc *sc = arg1; 4648 char hash_str[128]; 4649 uint32_t hash; 4650 4651 HN_LOCK(sc); 4652 hash = sc->hn_rx_ring[0].hn_mbuf_hash; 4653 HN_UNLOCK(sc); 4654 snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS); 4655 return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req); 4656 } 4657 4658 static int 4659 hn_vf_sysctl(SYSCTL_HANDLER_ARGS) 4660 { 4661 struct hn_softc *sc = arg1; 4662 char vf_name[IFNAMSIZ + 1]; 4663 struct ifnet *vf_ifp; 4664 4665 HN_LOCK(sc); 4666 vf_name[0] = '\0'; 4667 vf_ifp = sc->hn_vf_ifp; 4668 if (vf_ifp != NULL) 4669 snprintf(vf_name, sizeof(vf_name), "%s", vf_ifp->if_xname); 4670 HN_UNLOCK(sc); 4671 return sysctl_handle_string(oidp, vf_name, sizeof(vf_name), req); 4672 } 4673 4674 static int 4675 hn_rxvf_sysctl(SYSCTL_HANDLER_ARGS) 4676 { 4677 struct hn_softc *sc = arg1; 4678 char vf_name[IFNAMSIZ + 1]; 4679 struct ifnet *vf_ifp; 4680 4681 HN_LOCK(sc); 4682 vf_name[0] = '\0'; 4683 vf_ifp = sc->hn_rx_ring[0].hn_rxvf_ifp; 4684 if (vf_ifp != NULL) 4685 snprintf(vf_name, sizeof(vf_name), "%s", vf_ifp->if_xname); 4686 HN_UNLOCK(sc); 4687 return sysctl_handle_string(oidp, vf_name, sizeof(vf_name), req); 4688 } 4689 4690 static int 4691 hn_vflist_sysctl(SYSCTL_HANDLER_ARGS) 4692 { 4693 struct rm_priotracker pt; 4694 struct sbuf *sb; 4695 int error, i; 4696 bool first; 4697 4698 error = sysctl_wire_old_buffer(req, 0); 4699 if (error != 0) 4700 return (error); 4701 4702 sb = sbuf_new_for_sysctl(NULL, NULL, 128, req); 4703 if (sb == NULL) 4704 return (ENOMEM); 4705 4706 rm_rlock(&hn_vfmap_lock, &pt); 4707 4708 first = true; 4709 for (i = 0; i < hn_vfmap_size; ++i) { 4710 struct ifnet *ifp; 4711 4712 if (hn_vfmap[i] == NULL) 4713 continue; 4714 4715 ifp = ifnet_byindex(i); 4716 if (ifp != NULL) { 4717 if (first) 4718 sbuf_printf(sb, "%s", ifp->if_xname); 4719 else 4720 sbuf_printf(sb, " %s", ifp->if_xname); 4721 first = false; 4722 } 4723 } 4724 4725 rm_runlock(&hn_vfmap_lock, &pt); 4726 4727 error = sbuf_finish(sb); 4728 sbuf_delete(sb); 4729 return (error); 4730 } 4731 4732 static int 4733 hn_vfmap_sysctl(SYSCTL_HANDLER_ARGS) 4734 { 4735 struct rm_priotracker pt; 4736 struct sbuf *sb; 4737 int error, i; 4738 bool first; 4739 4740 error = sysctl_wire_old_buffer(req, 0); 4741 if (error != 0) 4742 return (error); 4743 4744 sb = sbuf_new_for_sysctl(NULL, NULL, 128, req); 4745 if (sb == NULL) 4746 return (ENOMEM); 4747 4748 rm_rlock(&hn_vfmap_lock, &pt); 4749 4750 first = true; 4751 for (i = 0; i < hn_vfmap_size; ++i) { 4752 struct ifnet *ifp, *hn_ifp; 4753 4754 hn_ifp = hn_vfmap[i]; 4755 if (hn_ifp == NULL) 4756 continue; 4757 4758 ifp = ifnet_byindex(i); 4759 if (ifp != NULL) { 4760 if (first) { 4761 sbuf_printf(sb, "%s:%s", ifp->if_xname, 4762 hn_ifp->if_xname); 4763 } else { 4764 sbuf_printf(sb, " %s:%s", ifp->if_xname, 4765 hn_ifp->if_xname); 4766 } 4767 first = false; 4768 } 4769 } 4770 4771 rm_runlock(&hn_vfmap_lock, &pt); 4772 4773 error = sbuf_finish(sb); 4774 sbuf_delete(sb); 4775 return (error); 4776 } 4777 4778 static int 4779 hn_xpnt_vf_accbpf_sysctl(SYSCTL_HANDLER_ARGS) 4780 { 4781 struct hn_softc *sc = arg1; 4782 int error, onoff = 0; 4783 4784 if (sc->hn_xvf_flags & HN_XVFFLAG_ACCBPF) 4785 onoff = 1; 4786 error = sysctl_handle_int(oidp, &onoff, 0, req); 4787 if (error || req->newptr == NULL) 4788 return (error); 4789 4790 HN_LOCK(sc); 4791 /* NOTE: hn_vf_lock for hn_transmit() */ 4792 rm_wlock(&sc->hn_vf_lock); 4793 if (onoff) 4794 sc->hn_xvf_flags |= HN_XVFFLAG_ACCBPF; 4795 else 4796 sc->hn_xvf_flags &= ~HN_XVFFLAG_ACCBPF; 4797 rm_wunlock(&sc->hn_vf_lock); 4798 HN_UNLOCK(sc); 4799 4800 return (0); 4801 } 4802 4803 static int 4804 hn_xpnt_vf_enabled_sysctl(SYSCTL_HANDLER_ARGS) 4805 { 4806 struct hn_softc *sc = arg1; 4807 int enabled = 0; 4808 4809 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) 4810 enabled = 1; 4811 return (sysctl_handle_int(oidp, &enabled, 0, req)); 4812 } 4813 4814 static int 4815 hn_check_iplen(const struct mbuf *m, int hoff) 4816 { 4817 const struct ip *ip; 4818 int len, iphlen, iplen; 4819 const struct tcphdr *th; 4820 int thoff; /* TCP data offset */ 4821 4822 len = hoff + sizeof(struct ip); 4823 4824 /* The packet must be at least the size of an IP header. */ 4825 if (m->m_pkthdr.len < len) 4826 return IPPROTO_DONE; 4827 4828 /* The fixed IP header must reside completely in the first mbuf. */ 4829 if (m->m_len < len) 4830 return IPPROTO_DONE; 4831 4832 ip = mtodo(m, hoff); 4833 4834 /* Bound check the packet's stated IP header length. */ 4835 iphlen = ip->ip_hl << 2; 4836 if (iphlen < sizeof(struct ip)) /* minimum header length */ 4837 return IPPROTO_DONE; 4838 4839 /* The full IP header must reside completely in the one mbuf. */ 4840 if (m->m_len < hoff + iphlen) 4841 return IPPROTO_DONE; 4842 4843 iplen = ntohs(ip->ip_len); 4844 4845 /* 4846 * Check that the amount of data in the buffers is as 4847 * at least much as the IP header would have us expect. 4848 */ 4849 if (m->m_pkthdr.len < hoff + iplen) 4850 return IPPROTO_DONE; 4851 4852 /* 4853 * Ignore IP fragments. 4854 */ 4855 if (ntohs(ip->ip_off) & (IP_OFFMASK | IP_MF)) 4856 return IPPROTO_DONE; 4857 4858 /* 4859 * The TCP/IP or UDP/IP header must be entirely contained within 4860 * the first fragment of a packet. 4861 */ 4862 switch (ip->ip_p) { 4863 case IPPROTO_TCP: 4864 if (iplen < iphlen + sizeof(struct tcphdr)) 4865 return IPPROTO_DONE; 4866 if (m->m_len < hoff + iphlen + sizeof(struct tcphdr)) 4867 return IPPROTO_DONE; 4868 th = (const struct tcphdr *)((const uint8_t *)ip + iphlen); 4869 thoff = th->th_off << 2; 4870 if (thoff < sizeof(struct tcphdr) || thoff + iphlen > iplen) 4871 return IPPROTO_DONE; 4872 if (m->m_len < hoff + iphlen + thoff) 4873 return IPPROTO_DONE; 4874 break; 4875 case IPPROTO_UDP: 4876 if (iplen < iphlen + sizeof(struct udphdr)) 4877 return IPPROTO_DONE; 4878 if (m->m_len < hoff + iphlen + sizeof(struct udphdr)) 4879 return IPPROTO_DONE; 4880 break; 4881 default: 4882 if (iplen < iphlen) 4883 return IPPROTO_DONE; 4884 break; 4885 } 4886 return ip->ip_p; 4887 } 4888 4889 static void 4890 hn_rxpkt_proto(const struct mbuf *m_new, int *l3proto, int *l4proto) 4891 { 4892 const struct ether_header *eh; 4893 uint16_t etype; 4894 int hoff; 4895 4896 hoff = sizeof(*eh); 4897 /* Checked at the beginning of this function. */ 4898 KASSERT(m_new->m_len >= hoff, ("not ethernet frame")); 4899 4900 eh = mtod(m_new, const struct ether_header *); 4901 etype = ntohs(eh->ether_type); 4902 if (etype == ETHERTYPE_VLAN) { 4903 const struct ether_vlan_header *evl; 4904 4905 hoff = sizeof(*evl); 4906 if (m_new->m_len < hoff) 4907 return; 4908 evl = mtod(m_new, const struct ether_vlan_header *); 4909 etype = ntohs(evl->evl_proto); 4910 } 4911 *l3proto = etype; 4912 4913 if (etype == ETHERTYPE_IP) 4914 *l4proto = hn_check_iplen(m_new, hoff); 4915 else 4916 *l4proto = IPPROTO_DONE; 4917 } 4918 4919 static int 4920 hn_create_rx_data(struct hn_softc *sc, int ring_cnt) 4921 { 4922 struct sysctl_oid_list *child; 4923 struct sysctl_ctx_list *ctx; 4924 device_t dev = sc->hn_dev; 4925 #if defined(INET) || defined(INET6) 4926 #if __FreeBSD_version >= 1100095 4927 int lroent_cnt; 4928 #endif 4929 #endif 4930 int i; 4931 4932 /* 4933 * Create RXBUF for reception. 4934 * 4935 * NOTE: 4936 * - It is shared by all channels. 4937 * - A large enough buffer is allocated, certain version of NVSes 4938 * may further limit the usable space. 4939 */ 4940 sc->hn_rxbuf = hyperv_dmamem_alloc(bus_get_dma_tag(dev), 4941 PAGE_SIZE, 0, HN_RXBUF_SIZE, &sc->hn_rxbuf_dma, 4942 BUS_DMA_WAITOK | BUS_DMA_ZERO); 4943 if (sc->hn_rxbuf == NULL) { 4944 device_printf(sc->hn_dev, "allocate rxbuf failed\n"); 4945 return (ENOMEM); 4946 } 4947 4948 sc->hn_rx_ring_cnt = ring_cnt; 4949 sc->hn_rx_ring_inuse = sc->hn_rx_ring_cnt; 4950 4951 sc->hn_rx_ring = malloc(sizeof(struct hn_rx_ring) * sc->hn_rx_ring_cnt, 4952 M_DEVBUF, M_WAITOK | M_ZERO); 4953 4954 #if defined(INET) || defined(INET6) 4955 #if __FreeBSD_version >= 1100095 4956 lroent_cnt = hn_lro_entry_count; 4957 if (lroent_cnt < TCP_LRO_ENTRIES) 4958 lroent_cnt = TCP_LRO_ENTRIES; 4959 if (bootverbose) 4960 device_printf(dev, "LRO: entry count %d\n", lroent_cnt); 4961 #endif 4962 #endif /* INET || INET6 */ 4963 4964 ctx = device_get_sysctl_ctx(dev); 4965 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); 4966 4967 /* Create dev.hn.UNIT.rx sysctl tree */ 4968 sc->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rx", 4969 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 4970 4971 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4972 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 4973 4974 rxr->hn_br = hyperv_dmamem_alloc(bus_get_dma_tag(dev), 4975 PAGE_SIZE, 0, HN_TXBR_SIZE + HN_RXBR_SIZE, 4976 &rxr->hn_br_dma, BUS_DMA_WAITOK); 4977 if (rxr->hn_br == NULL) { 4978 device_printf(dev, "allocate bufring failed\n"); 4979 return (ENOMEM); 4980 } 4981 4982 if (hn_trust_hosttcp) 4983 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_TCP; 4984 if (hn_trust_hostudp) 4985 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_UDP; 4986 if (hn_trust_hostip) 4987 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_IP; 4988 rxr->hn_mbuf_hash = NDIS_HASH_ALL; 4989 rxr->hn_ifp = sc->hn_ifp; 4990 if (i < sc->hn_tx_ring_cnt) 4991 rxr->hn_txr = &sc->hn_tx_ring[i]; 4992 rxr->hn_pktbuf_len = HN_PKTBUF_LEN_DEF; 4993 rxr->hn_pktbuf = malloc(rxr->hn_pktbuf_len, M_DEVBUF, M_WAITOK); 4994 rxr->hn_rx_idx = i; 4995 rxr->hn_rxbuf = sc->hn_rxbuf; 4996 4997 /* 4998 * Initialize LRO. 4999 */ 5000 #if defined(INET) || defined(INET6) 5001 #if __FreeBSD_version >= 1100095 5002 tcp_lro_init_args(&rxr->hn_lro, sc->hn_ifp, lroent_cnt, 5003 hn_lro_mbufq_depth); 5004 #else 5005 tcp_lro_init(&rxr->hn_lro); 5006 rxr->hn_lro.ifp = sc->hn_ifp; 5007 #endif 5008 #if __FreeBSD_version >= 1100099 5009 rxr->hn_lro.lro_length_lim = HN_LRO_LENLIM_DEF; 5010 rxr->hn_lro.lro_ackcnt_lim = HN_LRO_ACKCNT_DEF; 5011 #endif 5012 #endif /* INET || INET6 */ 5013 5014 if (sc->hn_rx_sysctl_tree != NULL) { 5015 char name[16]; 5016 5017 /* 5018 * Create per RX ring sysctl tree: 5019 * dev.hn.UNIT.rx.RINGID 5020 */ 5021 snprintf(name, sizeof(name), "%d", i); 5022 rxr->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, 5023 SYSCTL_CHILDREN(sc->hn_rx_sysctl_tree), 5024 OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 5025 5026 if (rxr->hn_rx_sysctl_tree != NULL) { 5027 SYSCTL_ADD_ULONG(ctx, 5028 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), 5029 OID_AUTO, "packets", CTLFLAG_RW, 5030 &rxr->hn_pkts, "# of packets received"); 5031 SYSCTL_ADD_ULONG(ctx, 5032 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), 5033 OID_AUTO, "rss_pkts", CTLFLAG_RW, 5034 &rxr->hn_rss_pkts, 5035 "# of packets w/ RSS info received"); 5036 SYSCTL_ADD_INT(ctx, 5037 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), 5038 OID_AUTO, "pktbuf_len", CTLFLAG_RD, 5039 &rxr->hn_pktbuf_len, 0, 5040 "Temporary channel packet buffer length"); 5041 } 5042 } 5043 } 5044 5045 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_queued", 5046 CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5047 __offsetof(struct hn_rx_ring, hn_lro.lro_queued), 5048 #if __FreeBSD_version < 1100095 5049 hn_rx_stat_int_sysctl, 5050 #else 5051 hn_rx_stat_u64_sysctl, 5052 #endif 5053 "LU", "LRO queued"); 5054 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_flushed", 5055 CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5056 __offsetof(struct hn_rx_ring, hn_lro.lro_flushed), 5057 #if __FreeBSD_version < 1100095 5058 hn_rx_stat_int_sysctl, 5059 #else 5060 hn_rx_stat_u64_sysctl, 5061 #endif 5062 "LU", "LRO flushed"); 5063 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_tried", 5064 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5065 __offsetof(struct hn_rx_ring, hn_lro_tried), 5066 hn_rx_stat_ulong_sysctl, "LU", "# of LRO tries"); 5067 #if __FreeBSD_version >= 1100099 5068 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_length_lim", 5069 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 5070 hn_lro_lenlim_sysctl, "IU", 5071 "Max # of data bytes to be aggregated by LRO"); 5072 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_ackcnt_lim", 5073 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 5074 hn_lro_ackcnt_sysctl, "I", 5075 "Max # of ACKs to be aggregated by LRO"); 5076 #endif 5077 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hosttcp", 5078 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_TCP, 5079 hn_trust_hcsum_sysctl, "I", 5080 "Trust tcp segement verification on host side, " 5081 "when csum info is missing"); 5082 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostudp", 5083 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_UDP, 5084 hn_trust_hcsum_sysctl, "I", 5085 "Trust udp datagram verification on host side, " 5086 "when csum info is missing"); 5087 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostip", 5088 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_IP, 5089 hn_trust_hcsum_sysctl, "I", 5090 "Trust ip packet verification on host side, " 5091 "when csum info is missing"); 5092 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_ip", 5093 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5094 __offsetof(struct hn_rx_ring, hn_csum_ip), 5095 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM IP"); 5096 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_tcp", 5097 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5098 __offsetof(struct hn_rx_ring, hn_csum_tcp), 5099 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM TCP"); 5100 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_udp", 5101 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5102 __offsetof(struct hn_rx_ring, hn_csum_udp), 5103 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM UDP"); 5104 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_trusted", 5105 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5106 __offsetof(struct hn_rx_ring, hn_csum_trusted), 5107 hn_rx_stat_ulong_sysctl, "LU", 5108 "# of packets that we trust host's csum verification"); 5109 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "small_pkts", 5110 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5111 __offsetof(struct hn_rx_ring, hn_small_pkts), 5112 hn_rx_stat_ulong_sysctl, "LU", "# of small packets received"); 5113 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_ack_failed", 5114 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5115 __offsetof(struct hn_rx_ring, hn_ack_failed), 5116 hn_rx_stat_ulong_sysctl, "LU", "# of RXBUF ack failures"); 5117 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_cnt", 5118 CTLFLAG_RD, &sc->hn_rx_ring_cnt, 0, "# created RX rings"); 5119 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_inuse", 5120 CTLFLAG_RD, &sc->hn_rx_ring_inuse, 0, "# used RX rings"); 5121 5122 return (0); 5123 } 5124 5125 static void 5126 hn_destroy_rx_data(struct hn_softc *sc) 5127 { 5128 int i; 5129 5130 if (sc->hn_rxbuf != NULL) { 5131 if ((sc->hn_flags & HN_FLAG_RXBUF_REF) == 0) 5132 hyperv_dmamem_free(&sc->hn_rxbuf_dma, sc->hn_rxbuf); 5133 else 5134 device_printf(sc->hn_dev, "RXBUF is referenced\n"); 5135 sc->hn_rxbuf = NULL; 5136 } 5137 5138 if (sc->hn_rx_ring_cnt == 0) 5139 return; 5140 5141 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 5142 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 5143 5144 if (rxr->hn_br == NULL) 5145 continue; 5146 if ((rxr->hn_rx_flags & HN_RX_FLAG_BR_REF) == 0) { 5147 hyperv_dmamem_free(&rxr->hn_br_dma, rxr->hn_br); 5148 } else { 5149 device_printf(sc->hn_dev, 5150 "%dth channel bufring is referenced", i); 5151 } 5152 rxr->hn_br = NULL; 5153 5154 #if defined(INET) || defined(INET6) 5155 tcp_lro_free(&rxr->hn_lro); 5156 #endif 5157 free(rxr->hn_pktbuf, M_DEVBUF); 5158 } 5159 free(sc->hn_rx_ring, M_DEVBUF); 5160 sc->hn_rx_ring = NULL; 5161 5162 sc->hn_rx_ring_cnt = 0; 5163 sc->hn_rx_ring_inuse = 0; 5164 } 5165 5166 static int 5167 hn_tx_ring_create(struct hn_softc *sc, int id) 5168 { 5169 struct hn_tx_ring *txr = &sc->hn_tx_ring[id]; 5170 device_t dev = sc->hn_dev; 5171 bus_dma_tag_t parent_dtag; 5172 int error, i; 5173 5174 txr->hn_sc = sc; 5175 txr->hn_tx_idx = id; 5176 5177 #ifndef HN_USE_TXDESC_BUFRING 5178 mtx_init(&txr->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN); 5179 #endif 5180 mtx_init(&txr->hn_tx_lock, "hn tx", NULL, MTX_DEF); 5181 5182 txr->hn_txdesc_cnt = HN_TX_DESC_CNT; 5183 txr->hn_txdesc = malloc(sizeof(struct hn_txdesc) * txr->hn_txdesc_cnt, 5184 M_DEVBUF, M_WAITOK | M_ZERO); 5185 #ifndef HN_USE_TXDESC_BUFRING 5186 SLIST_INIT(&txr->hn_txlist); 5187 #else 5188 txr->hn_txdesc_br = buf_ring_alloc(txr->hn_txdesc_cnt, M_DEVBUF, 5189 M_WAITOK, &txr->hn_tx_lock); 5190 #endif 5191 5192 if (hn_tx_taskq_mode == HN_TX_TASKQ_M_EVTTQ) { 5193 txr->hn_tx_taskq = VMBUS_GET_EVENT_TASKQ( 5194 device_get_parent(dev), dev, HN_RING_IDX2CPU(sc, id)); 5195 } else { 5196 txr->hn_tx_taskq = sc->hn_tx_taskqs[id % hn_tx_taskq_cnt]; 5197 } 5198 5199 #ifdef HN_IFSTART_SUPPORT 5200 if (hn_use_if_start) { 5201 txr->hn_txeof = hn_start_txeof; 5202 TASK_INIT(&txr->hn_tx_task, 0, hn_start_taskfunc, txr); 5203 TASK_INIT(&txr->hn_txeof_task, 0, hn_start_txeof_taskfunc, txr); 5204 } else 5205 #endif 5206 { 5207 int br_depth; 5208 5209 txr->hn_txeof = hn_xmit_txeof; 5210 TASK_INIT(&txr->hn_tx_task, 0, hn_xmit_taskfunc, txr); 5211 TASK_INIT(&txr->hn_txeof_task, 0, hn_xmit_txeof_taskfunc, txr); 5212 5213 br_depth = hn_get_txswq_depth(txr); 5214 txr->hn_mbuf_br = buf_ring_alloc(br_depth, M_DEVBUF, 5215 M_WAITOK, &txr->hn_tx_lock); 5216 } 5217 5218 txr->hn_direct_tx_size = hn_direct_tx_size; 5219 5220 /* 5221 * Always schedule transmission instead of trying to do direct 5222 * transmission. This one gives the best performance so far. 5223 */ 5224 txr->hn_sched_tx = 1; 5225 5226 parent_dtag = bus_get_dma_tag(dev); 5227 5228 /* DMA tag for RNDIS packet messages. */ 5229 error = bus_dma_tag_create(parent_dtag, /* parent */ 5230 HN_RNDIS_PKT_ALIGN, /* alignment */ 5231 HN_RNDIS_PKT_BOUNDARY, /* boundary */ 5232 BUS_SPACE_MAXADDR, /* lowaddr */ 5233 BUS_SPACE_MAXADDR, /* highaddr */ 5234 NULL, NULL, /* filter, filterarg */ 5235 HN_RNDIS_PKT_LEN, /* maxsize */ 5236 1, /* nsegments */ 5237 HN_RNDIS_PKT_LEN, /* maxsegsize */ 5238 0, /* flags */ 5239 NULL, /* lockfunc */ 5240 NULL, /* lockfuncarg */ 5241 &txr->hn_tx_rndis_dtag); 5242 if (error) { 5243 device_printf(dev, "failed to create rndis dmatag\n"); 5244 return error; 5245 } 5246 5247 /* DMA tag for data. */ 5248 error = bus_dma_tag_create(parent_dtag, /* parent */ 5249 1, /* alignment */ 5250 HN_TX_DATA_BOUNDARY, /* boundary */ 5251 BUS_SPACE_MAXADDR, /* lowaddr */ 5252 BUS_SPACE_MAXADDR, /* highaddr */ 5253 NULL, NULL, /* filter, filterarg */ 5254 HN_TX_DATA_MAXSIZE, /* maxsize */ 5255 HN_TX_DATA_SEGCNT_MAX, /* nsegments */ 5256 HN_TX_DATA_SEGSIZE, /* maxsegsize */ 5257 0, /* flags */ 5258 NULL, /* lockfunc */ 5259 NULL, /* lockfuncarg */ 5260 &txr->hn_tx_data_dtag); 5261 if (error) { 5262 device_printf(dev, "failed to create data dmatag\n"); 5263 return error; 5264 } 5265 5266 for (i = 0; i < txr->hn_txdesc_cnt; ++i) { 5267 struct hn_txdesc *txd = &txr->hn_txdesc[i]; 5268 5269 txd->txr = txr; 5270 txd->chim_index = HN_NVS_CHIM_IDX_INVALID; 5271 STAILQ_INIT(&txd->agg_list); 5272 5273 /* 5274 * Allocate and load RNDIS packet message. 5275 */ 5276 error = bus_dmamem_alloc(txr->hn_tx_rndis_dtag, 5277 (void **)&txd->rndis_pkt, 5278 BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, 5279 &txd->rndis_pkt_dmap); 5280 if (error) { 5281 device_printf(dev, 5282 "failed to allocate rndis_packet_msg, %d\n", i); 5283 return error; 5284 } 5285 5286 error = bus_dmamap_load(txr->hn_tx_rndis_dtag, 5287 txd->rndis_pkt_dmap, 5288 txd->rndis_pkt, HN_RNDIS_PKT_LEN, 5289 hyperv_dma_map_paddr, &txd->rndis_pkt_paddr, 5290 BUS_DMA_NOWAIT); 5291 if (error) { 5292 device_printf(dev, 5293 "failed to load rndis_packet_msg, %d\n", i); 5294 bus_dmamem_free(txr->hn_tx_rndis_dtag, 5295 txd->rndis_pkt, txd->rndis_pkt_dmap); 5296 return error; 5297 } 5298 5299 /* DMA map for TX data. */ 5300 error = bus_dmamap_create(txr->hn_tx_data_dtag, 0, 5301 &txd->data_dmap); 5302 if (error) { 5303 device_printf(dev, 5304 "failed to allocate tx data dmamap\n"); 5305 bus_dmamap_unload(txr->hn_tx_rndis_dtag, 5306 txd->rndis_pkt_dmap); 5307 bus_dmamem_free(txr->hn_tx_rndis_dtag, 5308 txd->rndis_pkt, txd->rndis_pkt_dmap); 5309 return error; 5310 } 5311 5312 /* All set, put it to list */ 5313 txd->flags |= HN_TXD_FLAG_ONLIST; 5314 #ifndef HN_USE_TXDESC_BUFRING 5315 SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link); 5316 #else 5317 buf_ring_enqueue(txr->hn_txdesc_br, txd); 5318 #endif 5319 } 5320 txr->hn_txdesc_avail = txr->hn_txdesc_cnt; 5321 5322 if (sc->hn_tx_sysctl_tree != NULL) { 5323 struct sysctl_oid_list *child; 5324 struct sysctl_ctx_list *ctx; 5325 char name[16]; 5326 5327 /* 5328 * Create per TX ring sysctl tree: 5329 * dev.hn.UNIT.tx.RINGID 5330 */ 5331 ctx = device_get_sysctl_ctx(dev); 5332 child = SYSCTL_CHILDREN(sc->hn_tx_sysctl_tree); 5333 5334 snprintf(name, sizeof(name), "%d", id); 5335 txr->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, 5336 name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 5337 5338 if (txr->hn_tx_sysctl_tree != NULL) { 5339 child = SYSCTL_CHILDREN(txr->hn_tx_sysctl_tree); 5340 5341 #ifdef HN_DEBUG 5342 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail", 5343 CTLFLAG_RD, &txr->hn_txdesc_avail, 0, 5344 "# of available TX descs"); 5345 #endif 5346 #ifdef HN_IFSTART_SUPPORT 5347 if (!hn_use_if_start) 5348 #endif 5349 { 5350 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "oactive", 5351 CTLFLAG_RD, &txr->hn_oactive, 0, 5352 "over active"); 5353 } 5354 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "packets", 5355 CTLFLAG_RW, &txr->hn_pkts, 5356 "# of packets transmitted"); 5357 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "sends", 5358 CTLFLAG_RW, &txr->hn_sends, "# of sends"); 5359 } 5360 } 5361 5362 return 0; 5363 } 5364 5365 static void 5366 hn_txdesc_dmamap_destroy(struct hn_txdesc *txd) 5367 { 5368 struct hn_tx_ring *txr = txd->txr; 5369 5370 KASSERT(txd->m == NULL, ("still has mbuf installed")); 5371 KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("still dma mapped")); 5372 5373 bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_pkt_dmap); 5374 bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_pkt, 5375 txd->rndis_pkt_dmap); 5376 bus_dmamap_destroy(txr->hn_tx_data_dtag, txd->data_dmap); 5377 } 5378 5379 static void 5380 hn_txdesc_gc(struct hn_tx_ring *txr, struct hn_txdesc *txd) 5381 { 5382 5383 KASSERT(txd->refs == 0 || txd->refs == 1, 5384 ("invalid txd refs %d", txd->refs)); 5385 5386 /* Aggregated txds will be freed by their aggregating txd. */ 5387 if (txd->refs > 0 && (txd->flags & HN_TXD_FLAG_ONAGG) == 0) { 5388 int freed; 5389 5390 freed = hn_txdesc_put(txr, txd); 5391 KASSERT(freed, ("can't free txdesc")); 5392 } 5393 } 5394 5395 static void 5396 hn_tx_ring_destroy(struct hn_tx_ring *txr) 5397 { 5398 int i; 5399 5400 if (txr->hn_txdesc == NULL) 5401 return; 5402 5403 /* 5404 * NOTE: 5405 * Because the freeing of aggregated txds will be deferred 5406 * to the aggregating txd, two passes are used here: 5407 * - The first pass GCes any pending txds. This GC is necessary, 5408 * since if the channels are revoked, hypervisor will not 5409 * deliver send-done for all pending txds. 5410 * - The second pass frees the busdma stuffs, i.e. after all txds 5411 * were freed. 5412 */ 5413 for (i = 0; i < txr->hn_txdesc_cnt; ++i) 5414 hn_txdesc_gc(txr, &txr->hn_txdesc[i]); 5415 for (i = 0; i < txr->hn_txdesc_cnt; ++i) 5416 hn_txdesc_dmamap_destroy(&txr->hn_txdesc[i]); 5417 5418 if (txr->hn_tx_data_dtag != NULL) 5419 bus_dma_tag_destroy(txr->hn_tx_data_dtag); 5420 if (txr->hn_tx_rndis_dtag != NULL) 5421 bus_dma_tag_destroy(txr->hn_tx_rndis_dtag); 5422 5423 #ifdef HN_USE_TXDESC_BUFRING 5424 buf_ring_free(txr->hn_txdesc_br, M_DEVBUF); 5425 #endif 5426 5427 free(txr->hn_txdesc, M_DEVBUF); 5428 txr->hn_txdesc = NULL; 5429 5430 if (txr->hn_mbuf_br != NULL) 5431 buf_ring_free(txr->hn_mbuf_br, M_DEVBUF); 5432 5433 #ifndef HN_USE_TXDESC_BUFRING 5434 mtx_destroy(&txr->hn_txlist_spin); 5435 #endif 5436 mtx_destroy(&txr->hn_tx_lock); 5437 } 5438 5439 static int 5440 hn_create_tx_data(struct hn_softc *sc, int ring_cnt) 5441 { 5442 struct sysctl_oid_list *child; 5443 struct sysctl_ctx_list *ctx; 5444 int i; 5445 5446 /* 5447 * Create TXBUF for chimney sending. 5448 * 5449 * NOTE: It is shared by all channels. 5450 */ 5451 sc->hn_chim = hyperv_dmamem_alloc(bus_get_dma_tag(sc->hn_dev), 5452 PAGE_SIZE, 0, HN_CHIM_SIZE, &sc->hn_chim_dma, 5453 BUS_DMA_WAITOK | BUS_DMA_ZERO); 5454 if (sc->hn_chim == NULL) { 5455 device_printf(sc->hn_dev, "allocate txbuf failed\n"); 5456 return (ENOMEM); 5457 } 5458 5459 sc->hn_tx_ring_cnt = ring_cnt; 5460 sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt; 5461 5462 sc->hn_tx_ring = malloc(sizeof(struct hn_tx_ring) * sc->hn_tx_ring_cnt, 5463 M_DEVBUF, M_WAITOK | M_ZERO); 5464 5465 ctx = device_get_sysctl_ctx(sc->hn_dev); 5466 child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->hn_dev)); 5467 5468 /* Create dev.hn.UNIT.tx sysctl tree */ 5469 sc->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "tx", 5470 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 5471 5472 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 5473 int error; 5474 5475 error = hn_tx_ring_create(sc, i); 5476 if (error) 5477 return error; 5478 } 5479 5480 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "no_txdescs", 5481 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5482 __offsetof(struct hn_tx_ring, hn_no_txdescs), 5483 hn_tx_stat_ulong_sysctl, "LU", "# of times short of TX descs"); 5484 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "send_failed", 5485 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5486 __offsetof(struct hn_tx_ring, hn_send_failed), 5487 hn_tx_stat_ulong_sysctl, "LU", "# of hyper-v sending failure"); 5488 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txdma_failed", 5489 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5490 __offsetof(struct hn_tx_ring, hn_txdma_failed), 5491 hn_tx_stat_ulong_sysctl, "LU", "# of TX DMA failure"); 5492 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_flush_failed", 5493 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5494 __offsetof(struct hn_tx_ring, hn_flush_failed), 5495 hn_tx_stat_ulong_sysctl, "LU", 5496 "# of packet transmission aggregation flush failure"); 5497 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_collapsed", 5498 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5499 __offsetof(struct hn_tx_ring, hn_tx_collapsed), 5500 hn_tx_stat_ulong_sysctl, "LU", "# of TX mbuf collapsed"); 5501 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney", 5502 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5503 __offsetof(struct hn_tx_ring, hn_tx_chimney), 5504 hn_tx_stat_ulong_sysctl, "LU", "# of chimney send"); 5505 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_tried", 5506 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5507 __offsetof(struct hn_tx_ring, hn_tx_chimney_tried), 5508 hn_tx_stat_ulong_sysctl, "LU", "# of chimney send tries"); 5509 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt", 5510 CTLFLAG_RD, &sc->hn_tx_ring[0].hn_txdesc_cnt, 0, 5511 "# of total TX descs"); 5512 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max", 5513 CTLFLAG_RD, &sc->hn_chim_szmax, 0, 5514 "Chimney send packet size upper boundary"); 5515 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size", 5516 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 5517 hn_chim_size_sysctl, "I", "Chimney send packet size limit"); 5518 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "direct_tx_size", 5519 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5520 __offsetof(struct hn_tx_ring, hn_direct_tx_size), 5521 hn_tx_conf_int_sysctl, "I", 5522 "Size of the packet for direct transmission"); 5523 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "sched_tx", 5524 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5525 __offsetof(struct hn_tx_ring, hn_sched_tx), 5526 hn_tx_conf_int_sysctl, "I", 5527 "Always schedule transmission " 5528 "instead of doing direct transmission"); 5529 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_cnt", 5530 CTLFLAG_RD, &sc->hn_tx_ring_cnt, 0, "# created TX rings"); 5531 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_inuse", 5532 CTLFLAG_RD, &sc->hn_tx_ring_inuse, 0, "# used TX rings"); 5533 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "agg_szmax", 5534 CTLFLAG_RD, &sc->hn_tx_ring[0].hn_agg_szmax, 0, 5535 "Applied packet transmission aggregation size"); 5536 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pktmax", 5537 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 5538 hn_txagg_pktmax_sysctl, "I", 5539 "Applied packet transmission aggregation packets"); 5540 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_align", 5541 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 5542 hn_txagg_align_sysctl, "I", 5543 "Applied packet transmission aggregation alignment"); 5544 5545 return 0; 5546 } 5547 5548 static void 5549 hn_set_chim_size(struct hn_softc *sc, int chim_size) 5550 { 5551 int i; 5552 5553 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 5554 sc->hn_tx_ring[i].hn_chim_size = chim_size; 5555 } 5556 5557 static void 5558 hn_set_tso_maxsize(struct hn_softc *sc, int tso_maxlen, int mtu) 5559 { 5560 struct ifnet *ifp = sc->hn_ifp; 5561 u_int hw_tsomax; 5562 int tso_minlen; 5563 5564 HN_LOCK_ASSERT(sc); 5565 5566 if ((ifp->if_capabilities & (IFCAP_TSO4 | IFCAP_TSO6)) == 0) 5567 return; 5568 5569 KASSERT(sc->hn_ndis_tso_sgmin >= 2, 5570 ("invalid NDIS tso sgmin %d", sc->hn_ndis_tso_sgmin)); 5571 tso_minlen = sc->hn_ndis_tso_sgmin * mtu; 5572 5573 KASSERT(sc->hn_ndis_tso_szmax >= tso_minlen && 5574 sc->hn_ndis_tso_szmax <= IP_MAXPACKET, 5575 ("invalid NDIS tso szmax %d", sc->hn_ndis_tso_szmax)); 5576 5577 if (tso_maxlen < tso_minlen) 5578 tso_maxlen = tso_minlen; 5579 else if (tso_maxlen > IP_MAXPACKET) 5580 tso_maxlen = IP_MAXPACKET; 5581 if (tso_maxlen > sc->hn_ndis_tso_szmax) 5582 tso_maxlen = sc->hn_ndis_tso_szmax; 5583 hw_tsomax = tso_maxlen - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); 5584 5585 if (hn_xpnt_vf_isready(sc)) { 5586 if (hw_tsomax > sc->hn_vf_ifp->if_hw_tsomax) 5587 hw_tsomax = sc->hn_vf_ifp->if_hw_tsomax; 5588 } 5589 ifp->if_hw_tsomax = hw_tsomax; 5590 if (bootverbose) 5591 if_printf(ifp, "TSO size max %u\n", ifp->if_hw_tsomax); 5592 } 5593 5594 static void 5595 hn_fixup_tx_data(struct hn_softc *sc) 5596 { 5597 uint64_t csum_assist; 5598 int i; 5599 5600 hn_set_chim_size(sc, sc->hn_chim_szmax); 5601 if (hn_tx_chimney_size > 0 && 5602 hn_tx_chimney_size < sc->hn_chim_szmax) 5603 hn_set_chim_size(sc, hn_tx_chimney_size); 5604 5605 csum_assist = 0; 5606 if (sc->hn_caps & HN_CAP_IPCS) 5607 csum_assist |= CSUM_IP; 5608 if (sc->hn_caps & HN_CAP_TCP4CS) 5609 csum_assist |= CSUM_IP_TCP; 5610 if ((sc->hn_caps & HN_CAP_UDP4CS) && hn_enable_udp4cs) 5611 csum_assist |= CSUM_IP_UDP; 5612 if (sc->hn_caps & HN_CAP_TCP6CS) 5613 csum_assist |= CSUM_IP6_TCP; 5614 if ((sc->hn_caps & HN_CAP_UDP6CS) && hn_enable_udp6cs) 5615 csum_assist |= CSUM_IP6_UDP; 5616 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 5617 sc->hn_tx_ring[i].hn_csum_assist = csum_assist; 5618 5619 if (sc->hn_caps & HN_CAP_HASHVAL) { 5620 /* 5621 * Support HASHVAL pktinfo on TX path. 5622 */ 5623 if (bootverbose) 5624 if_printf(sc->hn_ifp, "support HASHVAL pktinfo\n"); 5625 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 5626 sc->hn_tx_ring[i].hn_tx_flags |= HN_TX_FLAG_HASHVAL; 5627 } 5628 } 5629 5630 static void 5631 hn_fixup_rx_data(struct hn_softc *sc) 5632 { 5633 5634 if (sc->hn_caps & HN_CAP_UDPHASH) { 5635 int i; 5636 5637 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 5638 sc->hn_rx_ring[i].hn_rx_flags |= HN_RX_FLAG_UDP_HASH; 5639 } 5640 } 5641 5642 static void 5643 hn_destroy_tx_data(struct hn_softc *sc) 5644 { 5645 int i; 5646 5647 if (sc->hn_chim != NULL) { 5648 if ((sc->hn_flags & HN_FLAG_CHIM_REF) == 0) { 5649 hyperv_dmamem_free(&sc->hn_chim_dma, sc->hn_chim); 5650 } else { 5651 device_printf(sc->hn_dev, 5652 "chimney sending buffer is referenced"); 5653 } 5654 sc->hn_chim = NULL; 5655 } 5656 5657 if (sc->hn_tx_ring_cnt == 0) 5658 return; 5659 5660 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 5661 hn_tx_ring_destroy(&sc->hn_tx_ring[i]); 5662 5663 free(sc->hn_tx_ring, M_DEVBUF); 5664 sc->hn_tx_ring = NULL; 5665 5666 sc->hn_tx_ring_cnt = 0; 5667 sc->hn_tx_ring_inuse = 0; 5668 } 5669 5670 #ifdef HN_IFSTART_SUPPORT 5671 5672 static void 5673 hn_start_taskfunc(void *xtxr, int pending __unused) 5674 { 5675 struct hn_tx_ring *txr = xtxr; 5676 5677 mtx_lock(&txr->hn_tx_lock); 5678 hn_start_locked(txr, 0); 5679 mtx_unlock(&txr->hn_tx_lock); 5680 } 5681 5682 static int 5683 hn_start_locked(struct hn_tx_ring *txr, int len) 5684 { 5685 struct hn_softc *sc = txr->hn_sc; 5686 struct ifnet *ifp = sc->hn_ifp; 5687 int sched = 0; 5688 5689 KASSERT(hn_use_if_start, 5690 ("hn_start_locked is called, when if_start is disabled")); 5691 KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring")); 5692 mtx_assert(&txr->hn_tx_lock, MA_OWNED); 5693 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc")); 5694 5695 if (__predict_false(txr->hn_suspended)) 5696 return (0); 5697 5698 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != 5699 IFF_DRV_RUNNING) 5700 return (0); 5701 5702 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { 5703 struct hn_txdesc *txd; 5704 struct mbuf *m_head; 5705 int error; 5706 5707 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); 5708 if (m_head == NULL) 5709 break; 5710 5711 if (len > 0 && m_head->m_pkthdr.len > len) { 5712 /* 5713 * This sending could be time consuming; let callers 5714 * dispatch this packet sending (and sending of any 5715 * following up packets) to tx taskqueue. 5716 */ 5717 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 5718 sched = 1; 5719 break; 5720 } 5721 5722 #if defined(INET6) || defined(INET) 5723 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 5724 m_head = hn_tso_fixup(m_head); 5725 if (__predict_false(m_head == NULL)) { 5726 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 5727 continue; 5728 } 5729 } else if (m_head->m_pkthdr.csum_flags & 5730 (CSUM_IP_UDP | CSUM_IP_TCP | CSUM_IP6_UDP | CSUM_IP6_TCP)) { 5731 m_head = hn_set_hlen(m_head); 5732 if (__predict_false(m_head == NULL)) { 5733 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 5734 continue; 5735 } 5736 } 5737 #endif 5738 5739 txd = hn_txdesc_get(txr); 5740 if (txd == NULL) { 5741 txr->hn_no_txdescs++; 5742 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 5743 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 5744 break; 5745 } 5746 5747 error = hn_encap(ifp, txr, txd, &m_head); 5748 if (error) { 5749 /* Both txd and m_head are freed */ 5750 KASSERT(txr->hn_agg_txd == NULL, 5751 ("encap failed w/ pending aggregating txdesc")); 5752 continue; 5753 } 5754 5755 if (txr->hn_agg_pktleft == 0) { 5756 if (txr->hn_agg_txd != NULL) { 5757 KASSERT(m_head == NULL, 5758 ("pending mbuf for aggregating txdesc")); 5759 error = hn_flush_txagg(ifp, txr); 5760 if (__predict_false(error)) { 5761 atomic_set_int(&ifp->if_drv_flags, 5762 IFF_DRV_OACTIVE); 5763 break; 5764 } 5765 } else { 5766 KASSERT(m_head != NULL, ("mbuf was freed")); 5767 error = hn_txpkt(ifp, txr, txd); 5768 if (__predict_false(error)) { 5769 /* txd is freed, but m_head is not */ 5770 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 5771 atomic_set_int(&ifp->if_drv_flags, 5772 IFF_DRV_OACTIVE); 5773 break; 5774 } 5775 } 5776 } 5777 #ifdef INVARIANTS 5778 else { 5779 KASSERT(txr->hn_agg_txd != NULL, 5780 ("no aggregating txdesc")); 5781 KASSERT(m_head == NULL, 5782 ("pending mbuf for aggregating txdesc")); 5783 } 5784 #endif 5785 } 5786 5787 /* Flush pending aggerated transmission. */ 5788 if (txr->hn_agg_txd != NULL) 5789 hn_flush_txagg(ifp, txr); 5790 return (sched); 5791 } 5792 5793 static void 5794 hn_start(struct ifnet *ifp) 5795 { 5796 struct hn_softc *sc = ifp->if_softc; 5797 struct hn_tx_ring *txr = &sc->hn_tx_ring[0]; 5798 5799 if (txr->hn_sched_tx) 5800 goto do_sched; 5801 5802 if (mtx_trylock(&txr->hn_tx_lock)) { 5803 int sched; 5804 5805 sched = hn_start_locked(txr, txr->hn_direct_tx_size); 5806 mtx_unlock(&txr->hn_tx_lock); 5807 if (!sched) 5808 return; 5809 } 5810 do_sched: 5811 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); 5812 } 5813 5814 static void 5815 hn_start_txeof_taskfunc(void *xtxr, int pending __unused) 5816 { 5817 struct hn_tx_ring *txr = xtxr; 5818 5819 mtx_lock(&txr->hn_tx_lock); 5820 atomic_clear_int(&txr->hn_sc->hn_ifp->if_drv_flags, IFF_DRV_OACTIVE); 5821 hn_start_locked(txr, 0); 5822 mtx_unlock(&txr->hn_tx_lock); 5823 } 5824 5825 static void 5826 hn_start_txeof(struct hn_tx_ring *txr) 5827 { 5828 struct hn_softc *sc = txr->hn_sc; 5829 struct ifnet *ifp = sc->hn_ifp; 5830 5831 KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring")); 5832 5833 if (txr->hn_sched_tx) 5834 goto do_sched; 5835 5836 if (mtx_trylock(&txr->hn_tx_lock)) { 5837 int sched; 5838 5839 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 5840 sched = hn_start_locked(txr, txr->hn_direct_tx_size); 5841 mtx_unlock(&txr->hn_tx_lock); 5842 if (sched) { 5843 taskqueue_enqueue(txr->hn_tx_taskq, 5844 &txr->hn_tx_task); 5845 } 5846 } else { 5847 do_sched: 5848 /* 5849 * Release the OACTIVE earlier, with the hope, that 5850 * others could catch up. The task will clear the 5851 * flag again with the hn_tx_lock to avoid possible 5852 * races. 5853 */ 5854 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 5855 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); 5856 } 5857 } 5858 5859 #endif /* HN_IFSTART_SUPPORT */ 5860 5861 static int 5862 hn_xmit(struct hn_tx_ring *txr, int len) 5863 { 5864 struct hn_softc *sc = txr->hn_sc; 5865 struct ifnet *ifp = sc->hn_ifp; 5866 struct mbuf *m_head; 5867 int sched = 0; 5868 5869 mtx_assert(&txr->hn_tx_lock, MA_OWNED); 5870 #ifdef HN_IFSTART_SUPPORT 5871 KASSERT(hn_use_if_start == 0, 5872 ("hn_xmit is called, when if_start is enabled")); 5873 #endif 5874 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc")); 5875 5876 if (__predict_false(txr->hn_suspended)) 5877 return (0); 5878 5879 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || txr->hn_oactive) 5880 return (0); 5881 5882 while ((m_head = drbr_peek(ifp, txr->hn_mbuf_br)) != NULL) { 5883 struct hn_txdesc *txd; 5884 int error; 5885 5886 if (len > 0 && m_head->m_pkthdr.len > len) { 5887 /* 5888 * This sending could be time consuming; let callers 5889 * dispatch this packet sending (and sending of any 5890 * following up packets) to tx taskqueue. 5891 */ 5892 drbr_putback(ifp, txr->hn_mbuf_br, m_head); 5893 sched = 1; 5894 break; 5895 } 5896 5897 txd = hn_txdesc_get(txr); 5898 if (txd == NULL) { 5899 txr->hn_no_txdescs++; 5900 drbr_putback(ifp, txr->hn_mbuf_br, m_head); 5901 txr->hn_oactive = 1; 5902 break; 5903 } 5904 5905 error = hn_encap(ifp, txr, txd, &m_head); 5906 if (error) { 5907 /* Both txd and m_head are freed; discard */ 5908 KASSERT(txr->hn_agg_txd == NULL, 5909 ("encap failed w/ pending aggregating txdesc")); 5910 drbr_advance(ifp, txr->hn_mbuf_br); 5911 continue; 5912 } 5913 5914 if (txr->hn_agg_pktleft == 0) { 5915 if (txr->hn_agg_txd != NULL) { 5916 KASSERT(m_head == NULL, 5917 ("pending mbuf for aggregating txdesc")); 5918 error = hn_flush_txagg(ifp, txr); 5919 if (__predict_false(error)) { 5920 txr->hn_oactive = 1; 5921 break; 5922 } 5923 } else { 5924 KASSERT(m_head != NULL, ("mbuf was freed")); 5925 error = hn_txpkt(ifp, txr, txd); 5926 if (__predict_false(error)) { 5927 /* txd is freed, but m_head is not */ 5928 drbr_putback(ifp, txr->hn_mbuf_br, 5929 m_head); 5930 txr->hn_oactive = 1; 5931 break; 5932 } 5933 } 5934 } 5935 #ifdef INVARIANTS 5936 else { 5937 KASSERT(txr->hn_agg_txd != NULL, 5938 ("no aggregating txdesc")); 5939 KASSERT(m_head == NULL, 5940 ("pending mbuf for aggregating txdesc")); 5941 } 5942 #endif 5943 5944 /* Sent */ 5945 drbr_advance(ifp, txr->hn_mbuf_br); 5946 } 5947 5948 /* Flush pending aggerated transmission. */ 5949 if (txr->hn_agg_txd != NULL) 5950 hn_flush_txagg(ifp, txr); 5951 return (sched); 5952 } 5953 5954 static int 5955 hn_transmit(struct ifnet *ifp, struct mbuf *m) 5956 { 5957 struct hn_softc *sc = ifp->if_softc; 5958 struct hn_tx_ring *txr; 5959 int error, idx = 0; 5960 5961 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) { 5962 struct rm_priotracker pt; 5963 5964 rm_rlock(&sc->hn_vf_lock, &pt); 5965 if (__predict_true(sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) { 5966 struct mbuf *m_bpf = NULL; 5967 int obytes, omcast; 5968 5969 obytes = m->m_pkthdr.len; 5970 omcast = (m->m_flags & M_MCAST) != 0; 5971 5972 if (sc->hn_xvf_flags & HN_XVFFLAG_ACCBPF) { 5973 if (bpf_peers_present(ifp->if_bpf)) { 5974 m_bpf = m_copypacket(m, M_NOWAIT); 5975 if (m_bpf == NULL) { 5976 /* 5977 * Failed to grab a shallow 5978 * copy; tap now. 5979 */ 5980 ETHER_BPF_MTAP(ifp, m); 5981 } 5982 } 5983 } else { 5984 ETHER_BPF_MTAP(ifp, m); 5985 } 5986 5987 error = sc->hn_vf_ifp->if_transmit(sc->hn_vf_ifp, m); 5988 rm_runlock(&sc->hn_vf_lock, &pt); 5989 5990 if (m_bpf != NULL) { 5991 if (!error) 5992 ETHER_BPF_MTAP(ifp, m_bpf); 5993 m_freem(m_bpf); 5994 } 5995 5996 if (error == ENOBUFS) { 5997 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); 5998 } else if (error) { 5999 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 6000 } else { 6001 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 6002 if_inc_counter(ifp, IFCOUNTER_OBYTES, obytes); 6003 if (omcast) { 6004 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 6005 omcast); 6006 } 6007 } 6008 return (error); 6009 } 6010 rm_runlock(&sc->hn_vf_lock, &pt); 6011 } 6012 6013 #if defined(INET6) || defined(INET) 6014 /* 6015 * Perform TSO packet header fixup or get l2/l3 header length now, 6016 * since packet headers should be cache-hot. 6017 */ 6018 if (m->m_pkthdr.csum_flags & CSUM_TSO) { 6019 m = hn_tso_fixup(m); 6020 if (__predict_false(m == NULL)) { 6021 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 6022 return EIO; 6023 } 6024 } else if (m->m_pkthdr.csum_flags & 6025 (CSUM_IP_UDP | CSUM_IP_TCP | CSUM_IP6_UDP | CSUM_IP6_TCP)) { 6026 m = hn_set_hlen(m); 6027 if (__predict_false(m == NULL)) { 6028 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 6029 return EIO; 6030 } 6031 } 6032 #endif 6033 6034 /* 6035 * Select the TX ring based on flowid 6036 */ 6037 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 6038 #ifdef RSS 6039 uint32_t bid; 6040 6041 if (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m), 6042 &bid) == 0) 6043 idx = bid % sc->hn_tx_ring_inuse; 6044 else 6045 #endif 6046 { 6047 #if defined(INET6) || defined(INET) 6048 int tcpsyn = 0; 6049 6050 if (m->m_pkthdr.len < 128 && 6051 (m->m_pkthdr.csum_flags & 6052 (CSUM_IP_TCP | CSUM_IP6_TCP)) && 6053 (m->m_pkthdr.csum_flags & CSUM_TSO) == 0) { 6054 m = hn_check_tcpsyn(m, &tcpsyn); 6055 if (__predict_false(m == NULL)) { 6056 if_inc_counter(ifp, 6057 IFCOUNTER_OERRORS, 1); 6058 return (EIO); 6059 } 6060 } 6061 #else 6062 const int tcpsyn = 0; 6063 #endif 6064 if (tcpsyn) 6065 idx = 0; 6066 else 6067 idx = m->m_pkthdr.flowid % sc->hn_tx_ring_inuse; 6068 } 6069 } 6070 txr = &sc->hn_tx_ring[idx]; 6071 6072 error = drbr_enqueue(ifp, txr->hn_mbuf_br, m); 6073 if (error) { 6074 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); 6075 return error; 6076 } 6077 6078 if (txr->hn_oactive) 6079 return 0; 6080 6081 if (txr->hn_sched_tx) 6082 goto do_sched; 6083 6084 if (mtx_trylock(&txr->hn_tx_lock)) { 6085 int sched; 6086 6087 sched = hn_xmit(txr, txr->hn_direct_tx_size); 6088 mtx_unlock(&txr->hn_tx_lock); 6089 if (!sched) 6090 return 0; 6091 } 6092 do_sched: 6093 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); 6094 return 0; 6095 } 6096 6097 static void 6098 hn_tx_ring_qflush(struct hn_tx_ring *txr) 6099 { 6100 struct mbuf *m; 6101 6102 mtx_lock(&txr->hn_tx_lock); 6103 while ((m = buf_ring_dequeue_sc(txr->hn_mbuf_br)) != NULL) 6104 m_freem(m); 6105 mtx_unlock(&txr->hn_tx_lock); 6106 } 6107 6108 static void 6109 hn_xmit_qflush(struct ifnet *ifp) 6110 { 6111 struct hn_softc *sc = ifp->if_softc; 6112 struct rm_priotracker pt; 6113 int i; 6114 6115 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) 6116 hn_tx_ring_qflush(&sc->hn_tx_ring[i]); 6117 if_qflush(ifp); 6118 6119 rm_rlock(&sc->hn_vf_lock, &pt); 6120 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) 6121 sc->hn_vf_ifp->if_qflush(sc->hn_vf_ifp); 6122 rm_runlock(&sc->hn_vf_lock, &pt); 6123 } 6124 6125 static void 6126 hn_xmit_txeof(struct hn_tx_ring *txr) 6127 { 6128 6129 if (txr->hn_sched_tx) 6130 goto do_sched; 6131 6132 if (mtx_trylock(&txr->hn_tx_lock)) { 6133 int sched; 6134 6135 txr->hn_oactive = 0; 6136 sched = hn_xmit(txr, txr->hn_direct_tx_size); 6137 mtx_unlock(&txr->hn_tx_lock); 6138 if (sched) { 6139 taskqueue_enqueue(txr->hn_tx_taskq, 6140 &txr->hn_tx_task); 6141 } 6142 } else { 6143 do_sched: 6144 /* 6145 * Release the oactive earlier, with the hope, that 6146 * others could catch up. The task will clear the 6147 * oactive again with the hn_tx_lock to avoid possible 6148 * races. 6149 */ 6150 txr->hn_oactive = 0; 6151 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); 6152 } 6153 } 6154 6155 static void 6156 hn_xmit_taskfunc(void *xtxr, int pending __unused) 6157 { 6158 struct hn_tx_ring *txr = xtxr; 6159 6160 mtx_lock(&txr->hn_tx_lock); 6161 hn_xmit(txr, 0); 6162 mtx_unlock(&txr->hn_tx_lock); 6163 } 6164 6165 static void 6166 hn_xmit_txeof_taskfunc(void *xtxr, int pending __unused) 6167 { 6168 struct hn_tx_ring *txr = xtxr; 6169 6170 mtx_lock(&txr->hn_tx_lock); 6171 txr->hn_oactive = 0; 6172 hn_xmit(txr, 0); 6173 mtx_unlock(&txr->hn_tx_lock); 6174 } 6175 6176 static int 6177 hn_chan_attach(struct hn_softc *sc, struct vmbus_channel *chan) 6178 { 6179 struct vmbus_chan_br cbr; 6180 struct hn_rx_ring *rxr; 6181 struct hn_tx_ring *txr = NULL; 6182 int idx, error; 6183 6184 idx = vmbus_chan_subidx(chan); 6185 6186 /* 6187 * Link this channel to RX/TX ring. 6188 */ 6189 KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse, 6190 ("invalid channel index %d, should > 0 && < %d", 6191 idx, sc->hn_rx_ring_inuse)); 6192 rxr = &sc->hn_rx_ring[idx]; 6193 KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED) == 0, 6194 ("RX ring %d already attached", idx)); 6195 rxr->hn_rx_flags |= HN_RX_FLAG_ATTACHED; 6196 rxr->hn_chan = chan; 6197 6198 if (bootverbose) { 6199 if_printf(sc->hn_ifp, "link RX ring %d to chan%u\n", 6200 idx, vmbus_chan_id(chan)); 6201 } 6202 6203 if (idx < sc->hn_tx_ring_inuse) { 6204 txr = &sc->hn_tx_ring[idx]; 6205 KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED) == 0, 6206 ("TX ring %d already attached", idx)); 6207 txr->hn_tx_flags |= HN_TX_FLAG_ATTACHED; 6208 6209 txr->hn_chan = chan; 6210 if (bootverbose) { 6211 if_printf(sc->hn_ifp, "link TX ring %d to chan%u\n", 6212 idx, vmbus_chan_id(chan)); 6213 } 6214 } 6215 6216 /* Bind this channel to a proper CPU. */ 6217 vmbus_chan_cpu_set(chan, HN_RING_IDX2CPU(sc, idx)); 6218 6219 /* 6220 * Open this channel 6221 */ 6222 cbr.cbr = rxr->hn_br; 6223 cbr.cbr_paddr = rxr->hn_br_dma.hv_paddr; 6224 cbr.cbr_txsz = HN_TXBR_SIZE; 6225 cbr.cbr_rxsz = HN_RXBR_SIZE; 6226 error = vmbus_chan_open_br(chan, &cbr, NULL, 0, hn_chan_callback, rxr); 6227 if (error) { 6228 if (error == EISCONN) { 6229 if_printf(sc->hn_ifp, "bufring is connected after " 6230 "chan%u open failure\n", vmbus_chan_id(chan)); 6231 rxr->hn_rx_flags |= HN_RX_FLAG_BR_REF; 6232 } else { 6233 if_printf(sc->hn_ifp, "open chan%u failed: %d\n", 6234 vmbus_chan_id(chan), error); 6235 } 6236 } 6237 return (error); 6238 } 6239 6240 static void 6241 hn_chan_detach(struct hn_softc *sc, struct vmbus_channel *chan) 6242 { 6243 struct hn_rx_ring *rxr; 6244 int idx, error; 6245 6246 idx = vmbus_chan_subidx(chan); 6247 6248 /* 6249 * Link this channel to RX/TX ring. 6250 */ 6251 KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse, 6252 ("invalid channel index %d, should > 0 && < %d", 6253 idx, sc->hn_rx_ring_inuse)); 6254 rxr = &sc->hn_rx_ring[idx]; 6255 KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED), 6256 ("RX ring %d is not attached", idx)); 6257 rxr->hn_rx_flags &= ~HN_RX_FLAG_ATTACHED; 6258 6259 if (idx < sc->hn_tx_ring_inuse) { 6260 struct hn_tx_ring *txr = &sc->hn_tx_ring[idx]; 6261 6262 KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED), 6263 ("TX ring %d is not attached attached", idx)); 6264 txr->hn_tx_flags &= ~HN_TX_FLAG_ATTACHED; 6265 } 6266 6267 /* 6268 * Close this channel. 6269 * 6270 * NOTE: 6271 * Channel closing does _not_ destroy the target channel. 6272 */ 6273 error = vmbus_chan_close_direct(chan); 6274 if (error == EISCONN) { 6275 if_printf(sc->hn_ifp, "chan%u bufring is connected " 6276 "after being closed\n", vmbus_chan_id(chan)); 6277 rxr->hn_rx_flags |= HN_RX_FLAG_BR_REF; 6278 } else if (error) { 6279 if_printf(sc->hn_ifp, "chan%u close failed: %d\n", 6280 vmbus_chan_id(chan), error); 6281 } 6282 } 6283 6284 static int 6285 hn_attach_subchans(struct hn_softc *sc) 6286 { 6287 struct vmbus_channel **subchans; 6288 int subchan_cnt = sc->hn_rx_ring_inuse - 1; 6289 int i, error = 0; 6290 6291 KASSERT(subchan_cnt > 0, ("no sub-channels")); 6292 6293 /* Attach the sub-channels. */ 6294 subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt); 6295 for (i = 0; i < subchan_cnt; ++i) { 6296 int error1; 6297 6298 error1 = hn_chan_attach(sc, subchans[i]); 6299 if (error1) { 6300 error = error1; 6301 /* Move on; all channels will be detached later. */ 6302 } 6303 } 6304 vmbus_subchan_rel(subchans, subchan_cnt); 6305 6306 if (error) { 6307 if_printf(sc->hn_ifp, "sub-channels attach failed: %d\n", error); 6308 } else { 6309 if (bootverbose) { 6310 if_printf(sc->hn_ifp, "%d sub-channels attached\n", 6311 subchan_cnt); 6312 } 6313 } 6314 return (error); 6315 } 6316 6317 static void 6318 hn_detach_allchans(struct hn_softc *sc) 6319 { 6320 struct vmbus_channel **subchans; 6321 int subchan_cnt = sc->hn_rx_ring_inuse - 1; 6322 int i; 6323 6324 if (subchan_cnt == 0) 6325 goto back; 6326 6327 /* Detach the sub-channels. */ 6328 subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt); 6329 for (i = 0; i < subchan_cnt; ++i) 6330 hn_chan_detach(sc, subchans[i]); 6331 vmbus_subchan_rel(subchans, subchan_cnt); 6332 6333 back: 6334 /* 6335 * Detach the primary channel, _after_ all sub-channels 6336 * are detached. 6337 */ 6338 hn_chan_detach(sc, sc->hn_prichan); 6339 6340 /* Wait for sub-channels to be destroyed, if any. */ 6341 vmbus_subchan_drain(sc->hn_prichan); 6342 6343 #ifdef INVARIANTS 6344 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 6345 KASSERT((sc->hn_rx_ring[i].hn_rx_flags & 6346 HN_RX_FLAG_ATTACHED) == 0, 6347 ("%dth RX ring is still attached", i)); 6348 } 6349 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 6350 KASSERT((sc->hn_tx_ring[i].hn_tx_flags & 6351 HN_TX_FLAG_ATTACHED) == 0, 6352 ("%dth TX ring is still attached", i)); 6353 } 6354 #endif 6355 } 6356 6357 static int 6358 hn_synth_alloc_subchans(struct hn_softc *sc, int *nsubch) 6359 { 6360 struct vmbus_channel **subchans; 6361 int nchan, rxr_cnt, error; 6362 6363 nchan = *nsubch + 1; 6364 if (nchan == 1) { 6365 /* 6366 * Multiple RX/TX rings are not requested. 6367 */ 6368 *nsubch = 0; 6369 return (0); 6370 } 6371 6372 /* 6373 * Query RSS capabilities, e.g. # of RX rings, and # of indirect 6374 * table entries. 6375 */ 6376 error = hn_rndis_query_rsscaps(sc, &rxr_cnt); 6377 if (error) { 6378 /* No RSS; this is benign. */ 6379 *nsubch = 0; 6380 return (0); 6381 } 6382 if (bootverbose) { 6383 if_printf(sc->hn_ifp, "RX rings offered %u, requested %d\n", 6384 rxr_cnt, nchan); 6385 } 6386 6387 if (nchan > rxr_cnt) 6388 nchan = rxr_cnt; 6389 if (nchan == 1) { 6390 if_printf(sc->hn_ifp, "only 1 channel is supported, no vRSS\n"); 6391 *nsubch = 0; 6392 return (0); 6393 } 6394 6395 /* 6396 * Allocate sub-channels from NVS. 6397 */ 6398 *nsubch = nchan - 1; 6399 error = hn_nvs_alloc_subchans(sc, nsubch); 6400 if (error || *nsubch == 0) { 6401 /* Failed to allocate sub-channels. */ 6402 *nsubch = 0; 6403 return (0); 6404 } 6405 6406 /* 6407 * Wait for all sub-channels to become ready before moving on. 6408 */ 6409 subchans = vmbus_subchan_get(sc->hn_prichan, *nsubch); 6410 vmbus_subchan_rel(subchans, *nsubch); 6411 return (0); 6412 } 6413 6414 static bool 6415 hn_synth_attachable(const struct hn_softc *sc) 6416 { 6417 int i; 6418 6419 if (sc->hn_flags & HN_FLAG_ERRORS) 6420 return (false); 6421 6422 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 6423 const struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 6424 6425 if (rxr->hn_rx_flags & HN_RX_FLAG_BR_REF) 6426 return (false); 6427 } 6428 return (true); 6429 } 6430 6431 /* 6432 * Make sure that the RX filter is zero after the successful 6433 * RNDIS initialization. 6434 * 6435 * NOTE: 6436 * Under certain conditions on certain versions of Hyper-V, 6437 * the RNDIS rxfilter is _not_ zero on the hypervisor side 6438 * after the successful RNDIS initialization, which breaks 6439 * the assumption of any following code (well, it breaks the 6440 * RNDIS API contract actually). Clear the RNDIS rxfilter 6441 * explicitly, drain packets sneaking through, and drain the 6442 * interrupt taskqueues scheduled due to the stealth packets. 6443 */ 6444 static void 6445 hn_rndis_init_fixat(struct hn_softc *sc, int nchan) 6446 { 6447 6448 hn_disable_rx(sc); 6449 hn_drain_rxtx(sc, nchan); 6450 } 6451 6452 static int 6453 hn_synth_attach(struct hn_softc *sc, int mtu) 6454 { 6455 #define ATTACHED_NVS 0x0002 6456 #define ATTACHED_RNDIS 0x0004 6457 6458 struct ndis_rssprm_toeplitz *rss = &sc->hn_rss; 6459 int error, nsubch, nchan = 1, i, rndis_inited; 6460 uint32_t old_caps, attached = 0; 6461 6462 KASSERT((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0, 6463 ("synthetic parts were attached")); 6464 6465 if (!hn_synth_attachable(sc)) 6466 return (ENXIO); 6467 6468 /* Save capabilities for later verification. */ 6469 old_caps = sc->hn_caps; 6470 sc->hn_caps = 0; 6471 6472 /* Clear RSS stuffs. */ 6473 sc->hn_rss_ind_size = 0; 6474 sc->hn_rss_hash = 0; 6475 sc->hn_rss_hcap = 0; 6476 6477 /* 6478 * Attach the primary channel _before_ attaching NVS and RNDIS. 6479 */ 6480 error = hn_chan_attach(sc, sc->hn_prichan); 6481 if (error) 6482 goto failed; 6483 6484 /* 6485 * Attach NVS. 6486 */ 6487 error = hn_nvs_attach(sc, mtu); 6488 if (error) 6489 goto failed; 6490 attached |= ATTACHED_NVS; 6491 6492 /* 6493 * Attach RNDIS _after_ NVS is attached. 6494 */ 6495 error = hn_rndis_attach(sc, mtu, &rndis_inited); 6496 if (rndis_inited) 6497 attached |= ATTACHED_RNDIS; 6498 if (error) 6499 goto failed; 6500 6501 /* 6502 * Make sure capabilities are not changed. 6503 */ 6504 if (device_is_attached(sc->hn_dev) && old_caps != sc->hn_caps) { 6505 if_printf(sc->hn_ifp, "caps mismatch old 0x%08x, new 0x%08x\n", 6506 old_caps, sc->hn_caps); 6507 error = ENXIO; 6508 goto failed; 6509 } 6510 6511 /* 6512 * Allocate sub-channels for multi-TX/RX rings. 6513 * 6514 * NOTE: 6515 * The # of RX rings that can be used is equivalent to the # of 6516 * channels to be requested. 6517 */ 6518 nsubch = sc->hn_rx_ring_cnt - 1; 6519 error = hn_synth_alloc_subchans(sc, &nsubch); 6520 if (error) 6521 goto failed; 6522 /* NOTE: _Full_ synthetic parts detach is required now. */ 6523 sc->hn_flags |= HN_FLAG_SYNTH_ATTACHED; 6524 6525 /* 6526 * Set the # of TX/RX rings that could be used according to 6527 * the # of channels that NVS offered. 6528 */ 6529 nchan = nsubch + 1; 6530 hn_set_ring_inuse(sc, nchan); 6531 if (nchan == 1) { 6532 /* Only the primary channel can be used; done */ 6533 goto back; 6534 } 6535 6536 /* 6537 * Attach the sub-channels. 6538 * 6539 * NOTE: hn_set_ring_inuse() _must_ have been called. 6540 */ 6541 error = hn_attach_subchans(sc); 6542 if (error) 6543 goto failed; 6544 6545 /* 6546 * Configure RSS key and indirect table _after_ all sub-channels 6547 * are attached. 6548 */ 6549 if ((sc->hn_flags & HN_FLAG_HAS_RSSKEY) == 0) { 6550 /* 6551 * RSS key is not set yet; set it to the default RSS key. 6552 */ 6553 if (bootverbose) 6554 if_printf(sc->hn_ifp, "setup default RSS key\n"); 6555 #ifdef RSS 6556 rss_getkey(rss->rss_key); 6557 #else 6558 memcpy(rss->rss_key, hn_rss_key_default, sizeof(rss->rss_key)); 6559 #endif 6560 sc->hn_flags |= HN_FLAG_HAS_RSSKEY; 6561 } 6562 6563 if ((sc->hn_flags & HN_FLAG_HAS_RSSIND) == 0) { 6564 /* 6565 * RSS indirect table is not set yet; set it up in round- 6566 * robin fashion. 6567 */ 6568 if (bootverbose) { 6569 if_printf(sc->hn_ifp, "setup default RSS indirect " 6570 "table\n"); 6571 } 6572 for (i = 0; i < NDIS_HASH_INDCNT; ++i) { 6573 uint32_t subidx; 6574 6575 #ifdef RSS 6576 subidx = rss_get_indirection_to_bucket(i); 6577 #else 6578 subidx = i; 6579 #endif 6580 rss->rss_ind[i] = subidx % nchan; 6581 } 6582 sc->hn_flags |= HN_FLAG_HAS_RSSIND; 6583 } else { 6584 /* 6585 * # of usable channels may be changed, so we have to 6586 * make sure that all entries in RSS indirect table 6587 * are valid. 6588 * 6589 * NOTE: hn_set_ring_inuse() _must_ have been called. 6590 */ 6591 hn_rss_ind_fixup(sc); 6592 } 6593 6594 sc->hn_rss_hash = sc->hn_rss_hcap; 6595 if ((sc->hn_flags & HN_FLAG_RXVF) || 6596 (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) { 6597 /* NOTE: Don't reconfigure RSS; will do immediately. */ 6598 hn_vf_rss_fixup(sc, false); 6599 } 6600 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE); 6601 if (error) 6602 goto failed; 6603 back: 6604 /* 6605 * Fixup transmission aggregation setup. 6606 */ 6607 hn_set_txagg(sc); 6608 hn_rndis_init_fixat(sc, nchan); 6609 return (0); 6610 6611 failed: 6612 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) { 6613 hn_rndis_init_fixat(sc, nchan); 6614 hn_synth_detach(sc); 6615 } else { 6616 if (attached & ATTACHED_RNDIS) { 6617 hn_rndis_init_fixat(sc, nchan); 6618 hn_rndis_detach(sc); 6619 } 6620 if (attached & ATTACHED_NVS) 6621 hn_nvs_detach(sc); 6622 hn_chan_detach(sc, sc->hn_prichan); 6623 /* Restore old capabilities. */ 6624 sc->hn_caps = old_caps; 6625 } 6626 return (error); 6627 6628 #undef ATTACHED_RNDIS 6629 #undef ATTACHED_NVS 6630 } 6631 6632 /* 6633 * NOTE: 6634 * The interface must have been suspended though hn_suspend(), before 6635 * this function get called. 6636 */ 6637 static void 6638 hn_synth_detach(struct hn_softc *sc) 6639 { 6640 6641 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, 6642 ("synthetic parts were not attached")); 6643 6644 /* Detach the RNDIS first. */ 6645 hn_rndis_detach(sc); 6646 6647 /* Detach NVS. */ 6648 hn_nvs_detach(sc); 6649 6650 /* Detach all of the channels. */ 6651 hn_detach_allchans(sc); 6652 6653 if (vmbus_current_version >= VMBUS_VERSION_WIN10 && sc->hn_rxbuf_gpadl != 0) { 6654 /* 6655 * Host is post-Win2016, disconnect RXBUF from primary channel here. 6656 */ 6657 int error; 6658 6659 error = vmbus_chan_gpadl_disconnect(sc->hn_prichan, 6660 sc->hn_rxbuf_gpadl); 6661 if (error) { 6662 if_printf(sc->hn_ifp, 6663 "rxbuf gpadl disconn failed: %d\n", error); 6664 sc->hn_flags |= HN_FLAG_RXBUF_REF; 6665 } 6666 sc->hn_rxbuf_gpadl = 0; 6667 } 6668 6669 if (vmbus_current_version >= VMBUS_VERSION_WIN10 && sc->hn_chim_gpadl != 0) { 6670 /* 6671 * Host is post-Win2016, disconnect chimney sending buffer from 6672 * primary channel here. 6673 */ 6674 int error; 6675 6676 error = vmbus_chan_gpadl_disconnect(sc->hn_prichan, 6677 sc->hn_chim_gpadl); 6678 if (error) { 6679 if_printf(sc->hn_ifp, 6680 "chim gpadl disconn failed: %d\n", error); 6681 sc->hn_flags |= HN_FLAG_CHIM_REF; 6682 } 6683 sc->hn_chim_gpadl = 0; 6684 } 6685 sc->hn_flags &= ~HN_FLAG_SYNTH_ATTACHED; 6686 } 6687 6688 static void 6689 hn_set_ring_inuse(struct hn_softc *sc, int ring_cnt) 6690 { 6691 KASSERT(ring_cnt > 0 && ring_cnt <= sc->hn_rx_ring_cnt, 6692 ("invalid ring count %d", ring_cnt)); 6693 6694 if (sc->hn_tx_ring_cnt > ring_cnt) 6695 sc->hn_tx_ring_inuse = ring_cnt; 6696 else 6697 sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt; 6698 sc->hn_rx_ring_inuse = ring_cnt; 6699 6700 #ifdef RSS 6701 if (sc->hn_rx_ring_inuse != rss_getnumbuckets()) { 6702 if_printf(sc->hn_ifp, "# of RX rings (%d) does not match " 6703 "# of RSS buckets (%d)\n", sc->hn_rx_ring_inuse, 6704 rss_getnumbuckets()); 6705 } 6706 #endif 6707 6708 if (bootverbose) { 6709 if_printf(sc->hn_ifp, "%d TX ring, %d RX ring\n", 6710 sc->hn_tx_ring_inuse, sc->hn_rx_ring_inuse); 6711 } 6712 } 6713 6714 static void 6715 hn_chan_drain(struct hn_softc *sc, struct vmbus_channel *chan) 6716 { 6717 6718 /* 6719 * NOTE: 6720 * The TX bufring will not be drained by the hypervisor, 6721 * if the primary channel is revoked. 6722 */ 6723 while (!vmbus_chan_rx_empty(chan) || 6724 (!vmbus_chan_is_revoked(sc->hn_prichan) && 6725 !vmbus_chan_tx_empty(chan))) 6726 pause("waitch", 1); 6727 vmbus_chan_intr_drain(chan); 6728 } 6729 6730 static void 6731 hn_disable_rx(struct hn_softc *sc) 6732 { 6733 6734 /* 6735 * Disable RX by clearing RX filter forcefully. 6736 */ 6737 sc->hn_rx_filter = NDIS_PACKET_TYPE_NONE; 6738 hn_rndis_set_rxfilter(sc, sc->hn_rx_filter); /* ignore error */ 6739 6740 /* 6741 * Give RNDIS enough time to flush all pending data packets. 6742 */ 6743 pause("waitrx", (200 * hz) / 1000); 6744 } 6745 6746 /* 6747 * NOTE: 6748 * RX/TX _must_ have been suspended/disabled, before this function 6749 * is called. 6750 */ 6751 static void 6752 hn_drain_rxtx(struct hn_softc *sc, int nchan) 6753 { 6754 struct vmbus_channel **subch = NULL; 6755 int nsubch; 6756 6757 /* 6758 * Drain RX/TX bufrings and interrupts. 6759 */ 6760 nsubch = nchan - 1; 6761 if (nsubch > 0) 6762 subch = vmbus_subchan_get(sc->hn_prichan, nsubch); 6763 6764 if (subch != NULL) { 6765 int i; 6766 6767 for (i = 0; i < nsubch; ++i) 6768 hn_chan_drain(sc, subch[i]); 6769 } 6770 hn_chan_drain(sc, sc->hn_prichan); 6771 6772 if (subch != NULL) 6773 vmbus_subchan_rel(subch, nsubch); 6774 } 6775 6776 static void 6777 hn_suspend_data(struct hn_softc *sc) 6778 { 6779 struct hn_tx_ring *txr; 6780 int i; 6781 6782 HN_LOCK_ASSERT(sc); 6783 6784 /* 6785 * Suspend TX. 6786 */ 6787 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { 6788 txr = &sc->hn_tx_ring[i]; 6789 6790 mtx_lock(&txr->hn_tx_lock); 6791 txr->hn_suspended = 1; 6792 mtx_unlock(&txr->hn_tx_lock); 6793 /* No one is able send more packets now. */ 6794 6795 /* 6796 * Wait for all pending sends to finish. 6797 * 6798 * NOTE: 6799 * We will _not_ receive all pending send-done, if the 6800 * primary channel is revoked. 6801 */ 6802 while (hn_tx_ring_pending(txr) && 6803 !vmbus_chan_is_revoked(sc->hn_prichan)) 6804 pause("hnwtx", 1 /* 1 tick */); 6805 } 6806 6807 /* 6808 * Disable RX. 6809 */ 6810 hn_disable_rx(sc); 6811 6812 /* 6813 * Drain RX/TX. 6814 */ 6815 hn_drain_rxtx(sc, sc->hn_rx_ring_inuse); 6816 6817 /* 6818 * Drain any pending TX tasks. 6819 * 6820 * NOTE: 6821 * The above hn_drain_rxtx() can dispatch TX tasks, so the TX 6822 * tasks will have to be drained _after_ the above hn_drain_rxtx(). 6823 */ 6824 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { 6825 txr = &sc->hn_tx_ring[i]; 6826 6827 taskqueue_drain(txr->hn_tx_taskq, &txr->hn_tx_task); 6828 taskqueue_drain(txr->hn_tx_taskq, &txr->hn_txeof_task); 6829 } 6830 } 6831 6832 static void 6833 hn_suspend_mgmt_taskfunc(void *xsc, int pending __unused) 6834 { 6835 6836 ((struct hn_softc *)xsc)->hn_mgmt_taskq = NULL; 6837 } 6838 6839 static void 6840 hn_suspend_mgmt(struct hn_softc *sc) 6841 { 6842 struct task task; 6843 6844 HN_LOCK_ASSERT(sc); 6845 6846 /* 6847 * Make sure that hn_mgmt_taskq0 can nolonger be accessed 6848 * through hn_mgmt_taskq. 6849 */ 6850 TASK_INIT(&task, 0, hn_suspend_mgmt_taskfunc, sc); 6851 vmbus_chan_run_task(sc->hn_prichan, &task); 6852 6853 /* 6854 * Make sure that all pending management tasks are completed. 6855 */ 6856 taskqueue_drain(sc->hn_mgmt_taskq0, &sc->hn_netchg_init); 6857 taskqueue_drain_timeout(sc->hn_mgmt_taskq0, &sc->hn_netchg_status); 6858 taskqueue_drain_all(sc->hn_mgmt_taskq0); 6859 } 6860 6861 static void 6862 hn_suspend(struct hn_softc *sc) 6863 { 6864 6865 /* Disable polling. */ 6866 hn_polling(sc, 0); 6867 6868 /* 6869 * If the non-transparent mode VF is activated, the synthetic 6870 * device is receiving packets, so the data path of the 6871 * synthetic device must be suspended. 6872 */ 6873 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) || 6874 (sc->hn_flags & HN_FLAG_RXVF)) 6875 hn_suspend_data(sc); 6876 hn_suspend_mgmt(sc); 6877 } 6878 6879 static void 6880 hn_resume_tx(struct hn_softc *sc, int tx_ring_cnt) 6881 { 6882 int i; 6883 6884 KASSERT(tx_ring_cnt <= sc->hn_tx_ring_cnt, 6885 ("invalid TX ring count %d", tx_ring_cnt)); 6886 6887 for (i = 0; i < tx_ring_cnt; ++i) { 6888 struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; 6889 6890 mtx_lock(&txr->hn_tx_lock); 6891 txr->hn_suspended = 0; 6892 mtx_unlock(&txr->hn_tx_lock); 6893 } 6894 } 6895 6896 static void 6897 hn_resume_data(struct hn_softc *sc) 6898 { 6899 int i; 6900 6901 HN_LOCK_ASSERT(sc); 6902 6903 /* 6904 * Re-enable RX. 6905 */ 6906 hn_rxfilter_config(sc); 6907 6908 /* 6909 * Make sure to clear suspend status on "all" TX rings, 6910 * since hn_tx_ring_inuse can be changed after 6911 * hn_suspend_data(). 6912 */ 6913 hn_resume_tx(sc, sc->hn_tx_ring_cnt); 6914 6915 #ifdef HN_IFSTART_SUPPORT 6916 if (!hn_use_if_start) 6917 #endif 6918 { 6919 /* 6920 * Flush unused drbrs, since hn_tx_ring_inuse may be 6921 * reduced. 6922 */ 6923 for (i = sc->hn_tx_ring_inuse; i < sc->hn_tx_ring_cnt; ++i) 6924 hn_tx_ring_qflush(&sc->hn_tx_ring[i]); 6925 } 6926 6927 /* 6928 * Kick start TX. 6929 */ 6930 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { 6931 struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; 6932 6933 /* 6934 * Use txeof task, so that any pending oactive can be 6935 * cleared properly. 6936 */ 6937 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); 6938 } 6939 } 6940 6941 static void 6942 hn_resume_mgmt(struct hn_softc *sc) 6943 { 6944 6945 sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0; 6946 6947 /* 6948 * Kick off network change detection, if it was pending. 6949 * If no network change was pending, start link status 6950 * checks, which is more lightweight than network change 6951 * detection. 6952 */ 6953 if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG) 6954 hn_change_network(sc); 6955 else 6956 hn_update_link_status(sc); 6957 } 6958 6959 static void 6960 hn_resume(struct hn_softc *sc) 6961 { 6962 6963 /* 6964 * If the non-transparent mode VF is activated, the synthetic 6965 * device have to receive packets, so the data path of the 6966 * synthetic device must be resumed. 6967 */ 6968 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) || 6969 (sc->hn_flags & HN_FLAG_RXVF)) 6970 hn_resume_data(sc); 6971 6972 /* 6973 * Don't resume link status change if VF is attached/activated. 6974 * - In the non-transparent VF mode, the synthetic device marks 6975 * link down until the VF is deactivated; i.e. VF is down. 6976 * - In transparent VF mode, VF's media status is used until 6977 * the VF is detached. 6978 */ 6979 if ((sc->hn_flags & HN_FLAG_RXVF) == 0 && 6980 !(hn_xpnt_vf && sc->hn_vf_ifp != NULL)) 6981 hn_resume_mgmt(sc); 6982 6983 /* 6984 * Re-enable polling if this interface is running and 6985 * the polling is requested. 6986 */ 6987 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) && sc->hn_pollhz > 0) 6988 hn_polling(sc, sc->hn_pollhz); 6989 } 6990 6991 static void 6992 hn_rndis_rx_status(struct hn_softc *sc, const void *data, int dlen) 6993 { 6994 const struct rndis_status_msg *msg; 6995 int ofs; 6996 6997 if (dlen < sizeof(*msg)) { 6998 if_printf(sc->hn_ifp, "invalid RNDIS status\n"); 6999 return; 7000 } 7001 msg = data; 7002 7003 switch (msg->rm_status) { 7004 case RNDIS_STATUS_MEDIA_CONNECT: 7005 case RNDIS_STATUS_MEDIA_DISCONNECT: 7006 hn_update_link_status(sc); 7007 break; 7008 7009 case RNDIS_STATUS_TASK_OFFLOAD_CURRENT_CONFIG: 7010 case RNDIS_STATUS_LINK_SPEED_CHANGE: 7011 /* Not really useful; ignore. */ 7012 break; 7013 7014 case RNDIS_STATUS_NETWORK_CHANGE: 7015 ofs = RNDIS_STBUFOFFSET_ABS(msg->rm_stbufoffset); 7016 if (dlen < ofs + msg->rm_stbuflen || 7017 msg->rm_stbuflen < sizeof(uint32_t)) { 7018 if_printf(sc->hn_ifp, "network changed\n"); 7019 } else { 7020 uint32_t change; 7021 7022 memcpy(&change, ((const uint8_t *)msg) + ofs, 7023 sizeof(change)); 7024 if_printf(sc->hn_ifp, "network changed, change %u\n", 7025 change); 7026 } 7027 hn_change_network(sc); 7028 break; 7029 7030 default: 7031 if_printf(sc->hn_ifp, "unknown RNDIS status 0x%08x\n", 7032 msg->rm_status); 7033 break; 7034 } 7035 } 7036 7037 static int 7038 hn_rndis_rxinfo(const void *info_data, int info_dlen, struct hn_rxinfo *info) 7039 { 7040 const struct rndis_pktinfo *pi = info_data; 7041 uint32_t mask = 0; 7042 7043 while (info_dlen != 0) { 7044 const void *data; 7045 uint32_t dlen; 7046 7047 if (__predict_false(info_dlen < sizeof(*pi))) 7048 return (EINVAL); 7049 if (__predict_false(info_dlen < pi->rm_size)) 7050 return (EINVAL); 7051 info_dlen -= pi->rm_size; 7052 7053 if (__predict_false(pi->rm_size & RNDIS_PKTINFO_SIZE_ALIGNMASK)) 7054 return (EINVAL); 7055 if (__predict_false(pi->rm_size < pi->rm_pktinfooffset)) 7056 return (EINVAL); 7057 dlen = pi->rm_size - pi->rm_pktinfooffset; 7058 data = pi->rm_data; 7059 7060 switch (pi->rm_type) { 7061 case NDIS_PKTINFO_TYPE_VLAN: 7062 if (__predict_false(dlen < NDIS_VLAN_INFO_SIZE)) 7063 return (EINVAL); 7064 info->vlan_info = *((const uint32_t *)data); 7065 mask |= HN_RXINFO_VLAN; 7066 break; 7067 7068 case NDIS_PKTINFO_TYPE_CSUM: 7069 if (__predict_false(dlen < NDIS_RXCSUM_INFO_SIZE)) 7070 return (EINVAL); 7071 info->csum_info = *((const uint32_t *)data); 7072 mask |= HN_RXINFO_CSUM; 7073 break; 7074 7075 case HN_NDIS_PKTINFO_TYPE_HASHVAL: 7076 if (__predict_false(dlen < HN_NDIS_HASH_VALUE_SIZE)) 7077 return (EINVAL); 7078 info->hash_value = *((const uint32_t *)data); 7079 mask |= HN_RXINFO_HASHVAL; 7080 break; 7081 7082 case HN_NDIS_PKTINFO_TYPE_HASHINF: 7083 if (__predict_false(dlen < HN_NDIS_HASH_INFO_SIZE)) 7084 return (EINVAL); 7085 info->hash_info = *((const uint32_t *)data); 7086 mask |= HN_RXINFO_HASHINF; 7087 break; 7088 7089 default: 7090 goto next; 7091 } 7092 7093 if (mask == HN_RXINFO_ALL) { 7094 /* All found; done */ 7095 break; 7096 } 7097 next: 7098 pi = (const struct rndis_pktinfo *) 7099 ((const uint8_t *)pi + pi->rm_size); 7100 } 7101 7102 /* 7103 * Final fixup. 7104 * - If there is no hash value, invalidate the hash info. 7105 */ 7106 if ((mask & HN_RXINFO_HASHVAL) == 0) 7107 info->hash_info = HN_NDIS_HASH_INFO_INVALID; 7108 return (0); 7109 } 7110 7111 static __inline bool 7112 hn_rndis_check_overlap(int off, int len, int check_off, int check_len) 7113 { 7114 7115 if (off < check_off) { 7116 if (__predict_true(off + len <= check_off)) 7117 return (false); 7118 } else if (off > check_off) { 7119 if (__predict_true(check_off + check_len <= off)) 7120 return (false); 7121 } 7122 return (true); 7123 } 7124 7125 static void 7126 hn_rndis_rx_data(struct hn_rx_ring *rxr, const void *data, int dlen) 7127 { 7128 const struct rndis_packet_msg *pkt; 7129 struct hn_rxinfo info; 7130 int data_off, pktinfo_off, data_len, pktinfo_len; 7131 7132 /* 7133 * Check length. 7134 */ 7135 if (__predict_false(dlen < sizeof(*pkt))) { 7136 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg\n"); 7137 return; 7138 } 7139 pkt = data; 7140 7141 if (__predict_false(dlen < pkt->rm_len)) { 7142 if_printf(rxr->hn_ifp, "truncated RNDIS packet msg, " 7143 "dlen %d, msglen %u\n", dlen, pkt->rm_len); 7144 return; 7145 } 7146 if (__predict_false(pkt->rm_len < 7147 pkt->rm_datalen + pkt->rm_oobdatalen + pkt->rm_pktinfolen)) { 7148 if_printf(rxr->hn_ifp, "invalid RNDIS packet msglen, " 7149 "msglen %u, data %u, oob %u, pktinfo %u\n", 7150 pkt->rm_len, pkt->rm_datalen, pkt->rm_oobdatalen, 7151 pkt->rm_pktinfolen); 7152 return; 7153 } 7154 if (__predict_false(pkt->rm_datalen == 0)) { 7155 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, no data\n"); 7156 return; 7157 } 7158 7159 /* 7160 * Check offests. 7161 */ 7162 #define IS_OFFSET_INVALID(ofs) \ 7163 ((ofs) < RNDIS_PACKET_MSG_OFFSET_MIN || \ 7164 ((ofs) & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK)) 7165 7166 /* XXX Hyper-V does not meet data offset alignment requirement */ 7167 if (__predict_false(pkt->rm_dataoffset < RNDIS_PACKET_MSG_OFFSET_MIN)) { 7168 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7169 "data offset %u\n", pkt->rm_dataoffset); 7170 return; 7171 } 7172 if (__predict_false(pkt->rm_oobdataoffset > 0 && 7173 IS_OFFSET_INVALID(pkt->rm_oobdataoffset))) { 7174 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7175 "oob offset %u\n", pkt->rm_oobdataoffset); 7176 return; 7177 } 7178 if (__predict_true(pkt->rm_pktinfooffset > 0) && 7179 __predict_false(IS_OFFSET_INVALID(pkt->rm_pktinfooffset))) { 7180 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7181 "pktinfo offset %u\n", pkt->rm_pktinfooffset); 7182 return; 7183 } 7184 7185 #undef IS_OFFSET_INVALID 7186 7187 data_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_dataoffset); 7188 data_len = pkt->rm_datalen; 7189 pktinfo_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_pktinfooffset); 7190 pktinfo_len = pkt->rm_pktinfolen; 7191 7192 /* 7193 * Check OOB coverage. 7194 */ 7195 if (__predict_false(pkt->rm_oobdatalen != 0)) { 7196 int oob_off, oob_len; 7197 7198 if_printf(rxr->hn_ifp, "got oobdata\n"); 7199 oob_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_oobdataoffset); 7200 oob_len = pkt->rm_oobdatalen; 7201 7202 if (__predict_false(oob_off + oob_len > pkt->rm_len)) { 7203 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7204 "oob overflow, msglen %u, oob abs %d len %d\n", 7205 pkt->rm_len, oob_off, oob_len); 7206 return; 7207 } 7208 7209 /* 7210 * Check against data. 7211 */ 7212 if (hn_rndis_check_overlap(oob_off, oob_len, 7213 data_off, data_len)) { 7214 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7215 "oob overlaps data, oob abs %d len %d, " 7216 "data abs %d len %d\n", 7217 oob_off, oob_len, data_off, data_len); 7218 return; 7219 } 7220 7221 /* 7222 * Check against pktinfo. 7223 */ 7224 if (pktinfo_len != 0 && 7225 hn_rndis_check_overlap(oob_off, oob_len, 7226 pktinfo_off, pktinfo_len)) { 7227 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7228 "oob overlaps pktinfo, oob abs %d len %d, " 7229 "pktinfo abs %d len %d\n", 7230 oob_off, oob_len, pktinfo_off, pktinfo_len); 7231 return; 7232 } 7233 } 7234 7235 /* 7236 * Check per-packet-info coverage and find useful per-packet-info. 7237 */ 7238 info.vlan_info = HN_NDIS_VLAN_INFO_INVALID; 7239 info.csum_info = HN_NDIS_RXCSUM_INFO_INVALID; 7240 info.hash_info = HN_NDIS_HASH_INFO_INVALID; 7241 if (__predict_true(pktinfo_len != 0)) { 7242 bool overlap; 7243 int error; 7244 7245 if (__predict_false(pktinfo_off + pktinfo_len > pkt->rm_len)) { 7246 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7247 "pktinfo overflow, msglen %u, " 7248 "pktinfo abs %d len %d\n", 7249 pkt->rm_len, pktinfo_off, pktinfo_len); 7250 return; 7251 } 7252 7253 /* 7254 * Check packet info coverage. 7255 */ 7256 overlap = hn_rndis_check_overlap(pktinfo_off, pktinfo_len, 7257 data_off, data_len); 7258 if (__predict_false(overlap)) { 7259 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7260 "pktinfo overlap data, pktinfo abs %d len %d, " 7261 "data abs %d len %d\n", 7262 pktinfo_off, pktinfo_len, data_off, data_len); 7263 return; 7264 } 7265 7266 /* 7267 * Find useful per-packet-info. 7268 */ 7269 error = hn_rndis_rxinfo(((const uint8_t *)pkt) + pktinfo_off, 7270 pktinfo_len, &info); 7271 if (__predict_false(error)) { 7272 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg " 7273 "pktinfo\n"); 7274 return; 7275 } 7276 } 7277 7278 if (__predict_false(data_off + data_len > pkt->rm_len)) { 7279 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7280 "data overflow, msglen %u, data abs %d len %d\n", 7281 pkt->rm_len, data_off, data_len); 7282 return; 7283 } 7284 hn_rxpkt(rxr, ((const uint8_t *)pkt) + data_off, data_len, &info); 7285 } 7286 7287 static __inline void 7288 hn_rndis_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen) 7289 { 7290 const struct rndis_msghdr *hdr; 7291 7292 if (__predict_false(dlen < sizeof(*hdr))) { 7293 if_printf(rxr->hn_ifp, "invalid RNDIS msg\n"); 7294 return; 7295 } 7296 hdr = data; 7297 7298 if (__predict_true(hdr->rm_type == REMOTE_NDIS_PACKET_MSG)) { 7299 /* Hot data path. */ 7300 hn_rndis_rx_data(rxr, data, dlen); 7301 /* Done! */ 7302 return; 7303 } 7304 7305 if (hdr->rm_type == REMOTE_NDIS_INDICATE_STATUS_MSG) 7306 hn_rndis_rx_status(rxr->hn_ifp->if_softc, data, dlen); 7307 else 7308 hn_rndis_rx_ctrl(rxr->hn_ifp->if_softc, data, dlen); 7309 } 7310 7311 static void 7312 hn_nvs_handle_notify(struct hn_softc *sc, const struct vmbus_chanpkt_hdr *pkt) 7313 { 7314 const struct hn_nvs_hdr *hdr; 7315 7316 if (VMBUS_CHANPKT_DATALEN(pkt) < sizeof(*hdr)) { 7317 if_printf(sc->hn_ifp, "invalid nvs notify\n"); 7318 return; 7319 } 7320 hdr = VMBUS_CHANPKT_CONST_DATA(pkt); 7321 7322 if (hdr->nvs_type == HN_NVS_TYPE_TXTBL_NOTE) { 7323 /* Useless; ignore */ 7324 return; 7325 } 7326 if_printf(sc->hn_ifp, "got notify, nvs type %u\n", hdr->nvs_type); 7327 } 7328 7329 static void 7330 hn_nvs_handle_comp(struct hn_softc *sc, struct vmbus_channel *chan, 7331 const struct vmbus_chanpkt_hdr *pkt) 7332 { 7333 struct hn_nvs_sendctx *sndc; 7334 7335 sndc = (struct hn_nvs_sendctx *)(uintptr_t)pkt->cph_xactid; 7336 sndc->hn_cb(sndc, sc, chan, VMBUS_CHANPKT_CONST_DATA(pkt), 7337 VMBUS_CHANPKT_DATALEN(pkt)); 7338 /* 7339 * NOTE: 7340 * 'sndc' CAN NOT be accessed anymore, since it can be freed by 7341 * its callback. 7342 */ 7343 } 7344 7345 static void 7346 hn_nvs_handle_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan, 7347 const struct vmbus_chanpkt_hdr *pkthdr) 7348 { 7349 const struct vmbus_chanpkt_rxbuf *pkt; 7350 const struct hn_nvs_hdr *nvs_hdr; 7351 int count, i, hlen; 7352 7353 if (__predict_false(VMBUS_CHANPKT_DATALEN(pkthdr) < sizeof(*nvs_hdr))) { 7354 if_printf(rxr->hn_ifp, "invalid nvs RNDIS\n"); 7355 return; 7356 } 7357 nvs_hdr = VMBUS_CHANPKT_CONST_DATA(pkthdr); 7358 7359 /* Make sure that this is a RNDIS message. */ 7360 if (__predict_false(nvs_hdr->nvs_type != HN_NVS_TYPE_RNDIS)) { 7361 if_printf(rxr->hn_ifp, "nvs type %u, not RNDIS\n", 7362 nvs_hdr->nvs_type); 7363 return; 7364 } 7365 7366 hlen = VMBUS_CHANPKT_GETLEN(pkthdr->cph_hlen); 7367 if (__predict_false(hlen < sizeof(*pkt))) { 7368 if_printf(rxr->hn_ifp, "invalid rxbuf chanpkt\n"); 7369 return; 7370 } 7371 pkt = (const struct vmbus_chanpkt_rxbuf *)pkthdr; 7372 7373 if (__predict_false(pkt->cp_rxbuf_id != HN_NVS_RXBUF_SIG)) { 7374 if_printf(rxr->hn_ifp, "invalid rxbuf_id 0x%08x\n", 7375 pkt->cp_rxbuf_id); 7376 return; 7377 } 7378 7379 count = pkt->cp_rxbuf_cnt; 7380 if (__predict_false(hlen < 7381 __offsetof(struct vmbus_chanpkt_rxbuf, cp_rxbuf[count]))) { 7382 if_printf(rxr->hn_ifp, "invalid rxbuf_cnt %d\n", count); 7383 return; 7384 } 7385 7386 /* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */ 7387 for (i = 0; i < count; ++i) { 7388 int ofs, len; 7389 7390 ofs = pkt->cp_rxbuf[i].rb_ofs; 7391 len = pkt->cp_rxbuf[i].rb_len; 7392 if (__predict_false(ofs + len > HN_RXBUF_SIZE)) { 7393 if_printf(rxr->hn_ifp, "%dth RNDIS msg overflow rxbuf, " 7394 "ofs %d, len %d\n", i, ofs, len); 7395 continue; 7396 } 7397 hn_rndis_rxpkt(rxr, rxr->hn_rxbuf + ofs, len); 7398 } 7399 7400 /* 7401 * Ack the consumed RXBUF associated w/ this channel packet, 7402 * so that this RXBUF can be recycled by the hypervisor. 7403 */ 7404 hn_nvs_ack_rxbuf(rxr, chan, pkt->cp_hdr.cph_xactid); 7405 } 7406 7407 static void 7408 hn_nvs_ack_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan, 7409 uint64_t tid) 7410 { 7411 struct hn_nvs_rndis_ack ack; 7412 int retries, error; 7413 7414 ack.nvs_type = HN_NVS_TYPE_RNDIS_ACK; 7415 ack.nvs_status = HN_NVS_STATUS_OK; 7416 7417 retries = 0; 7418 again: 7419 error = vmbus_chan_send(chan, VMBUS_CHANPKT_TYPE_COMP, 7420 VMBUS_CHANPKT_FLAG_NONE, &ack, sizeof(ack), tid); 7421 if (__predict_false(error == EAGAIN)) { 7422 /* 7423 * NOTE: 7424 * This should _not_ happen in real world, since the 7425 * consumption of the TX bufring from the TX path is 7426 * controlled. 7427 */ 7428 if (rxr->hn_ack_failed == 0) 7429 if_printf(rxr->hn_ifp, "RXBUF ack retry\n"); 7430 rxr->hn_ack_failed++; 7431 retries++; 7432 if (retries < 10) { 7433 DELAY(100); 7434 goto again; 7435 } 7436 /* RXBUF leaks! */ 7437 if_printf(rxr->hn_ifp, "RXBUF ack failed\n"); 7438 } 7439 } 7440 7441 static void 7442 hn_chan_callback(struct vmbus_channel *chan, void *xrxr) 7443 { 7444 struct hn_rx_ring *rxr = xrxr; 7445 struct hn_softc *sc = rxr->hn_ifp->if_softc; 7446 7447 for (;;) { 7448 struct vmbus_chanpkt_hdr *pkt = rxr->hn_pktbuf; 7449 int error, pktlen; 7450 7451 pktlen = rxr->hn_pktbuf_len; 7452 error = vmbus_chan_recv_pkt(chan, pkt, &pktlen); 7453 if (__predict_false(error == ENOBUFS)) { 7454 void *nbuf; 7455 int nlen; 7456 7457 /* 7458 * Expand channel packet buffer. 7459 * 7460 * XXX 7461 * Use M_WAITOK here, since allocation failure 7462 * is fatal. 7463 */ 7464 nlen = rxr->hn_pktbuf_len * 2; 7465 while (nlen < pktlen) 7466 nlen *= 2; 7467 nbuf = malloc(nlen, M_DEVBUF, M_WAITOK); 7468 7469 if_printf(rxr->hn_ifp, "expand pktbuf %d -> %d\n", 7470 rxr->hn_pktbuf_len, nlen); 7471 7472 free(rxr->hn_pktbuf, M_DEVBUF); 7473 rxr->hn_pktbuf = nbuf; 7474 rxr->hn_pktbuf_len = nlen; 7475 /* Retry! */ 7476 continue; 7477 } else if (__predict_false(error == EAGAIN)) { 7478 /* No more channel packets; done! */ 7479 break; 7480 } 7481 KASSERT(!error, ("vmbus_chan_recv_pkt failed: %d", error)); 7482 7483 switch (pkt->cph_type) { 7484 case VMBUS_CHANPKT_TYPE_COMP: 7485 hn_nvs_handle_comp(sc, chan, pkt); 7486 break; 7487 7488 case VMBUS_CHANPKT_TYPE_RXBUF: 7489 hn_nvs_handle_rxbuf(rxr, chan, pkt); 7490 break; 7491 7492 case VMBUS_CHANPKT_TYPE_INBAND: 7493 hn_nvs_handle_notify(sc, pkt); 7494 break; 7495 7496 default: 7497 if_printf(rxr->hn_ifp, "unknown chan pkt %u\n", 7498 pkt->cph_type); 7499 break; 7500 } 7501 } 7502 hn_chan_rollup(rxr, rxr->hn_txr); 7503 } 7504 7505 static void 7506 hn_sysinit(void *arg __unused) 7507 { 7508 int i; 7509 7510 hn_udpcs_fixup = counter_u64_alloc(M_WAITOK); 7511 7512 #ifdef HN_IFSTART_SUPPORT 7513 /* 7514 * Don't use ifnet.if_start if transparent VF mode is requested; 7515 * mainly due to the IFF_DRV_OACTIVE flag. 7516 */ 7517 if (hn_xpnt_vf && hn_use_if_start) { 7518 hn_use_if_start = 0; 7519 printf("hn: tranparent VF mode, if_transmit will be used, " 7520 "instead of if_start\n"); 7521 } 7522 #endif 7523 if (hn_xpnt_vf_attwait < HN_XPNT_VF_ATTWAIT_MIN) { 7524 printf("hn: invalid transparent VF attach routing " 7525 "wait timeout %d, reset to %d\n", 7526 hn_xpnt_vf_attwait, HN_XPNT_VF_ATTWAIT_MIN); 7527 hn_xpnt_vf_attwait = HN_XPNT_VF_ATTWAIT_MIN; 7528 } 7529 7530 /* 7531 * Initialize VF map. 7532 */ 7533 rm_init_flags(&hn_vfmap_lock, "hn_vfmap", RM_SLEEPABLE); 7534 hn_vfmap_size = HN_VFMAP_SIZE_DEF; 7535 hn_vfmap = malloc(sizeof(struct ifnet *) * hn_vfmap_size, M_DEVBUF, 7536 M_WAITOK | M_ZERO); 7537 7538 /* 7539 * Fix the # of TX taskqueues. 7540 */ 7541 if (hn_tx_taskq_cnt <= 0) 7542 hn_tx_taskq_cnt = 1; 7543 else if (hn_tx_taskq_cnt > mp_ncpus) 7544 hn_tx_taskq_cnt = mp_ncpus; 7545 7546 /* 7547 * Fix the TX taskqueue mode. 7548 */ 7549 switch (hn_tx_taskq_mode) { 7550 case HN_TX_TASKQ_M_INDEP: 7551 case HN_TX_TASKQ_M_GLOBAL: 7552 case HN_TX_TASKQ_M_EVTTQ: 7553 break; 7554 default: 7555 hn_tx_taskq_mode = HN_TX_TASKQ_M_INDEP; 7556 break; 7557 } 7558 7559 if (vm_guest != VM_GUEST_HV) 7560 return; 7561 7562 if (hn_tx_taskq_mode != HN_TX_TASKQ_M_GLOBAL) 7563 return; 7564 7565 hn_tx_taskque = malloc(hn_tx_taskq_cnt * sizeof(struct taskqueue *), 7566 M_DEVBUF, M_WAITOK); 7567 for (i = 0; i < hn_tx_taskq_cnt; ++i) { 7568 hn_tx_taskque[i] = taskqueue_create("hn_tx", M_WAITOK, 7569 taskqueue_thread_enqueue, &hn_tx_taskque[i]); 7570 taskqueue_start_threads(&hn_tx_taskque[i], 1, PI_NET, 7571 "hn tx%d", i); 7572 } 7573 } 7574 SYSINIT(hn_sysinit, SI_SUB_DRIVERS, SI_ORDER_SECOND, hn_sysinit, NULL); 7575 7576 static void 7577 hn_sysuninit(void *arg __unused) 7578 { 7579 7580 if (hn_tx_taskque != NULL) { 7581 int i; 7582 7583 for (i = 0; i < hn_tx_taskq_cnt; ++i) 7584 taskqueue_free(hn_tx_taskque[i]); 7585 free(hn_tx_taskque, M_DEVBUF); 7586 } 7587 7588 if (hn_vfmap != NULL) 7589 free(hn_vfmap, M_DEVBUF); 7590 rm_destroy(&hn_vfmap_lock); 7591 7592 counter_u64_free(hn_udpcs_fixup); 7593 } 7594 SYSUNINIT(hn_sysuninit, SI_SUB_DRIVERS, SI_ORDER_SECOND, hn_sysuninit, NULL); 7595