1 /*- 2 * Copyright (c) 2010-2012 Citrix Inc. 3 * Copyright (c) 2009-2012,2016-2017 Microsoft Corp. 4 * Copyright (c) 2012 NetApp Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /*- 30 * Copyright (c) 2004-2006 Kip Macy 31 * All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 45 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 52 * SUCH DAMAGE. 53 */ 54 55 #include <sys/cdefs.h> 56 __FBSDID("$FreeBSD$"); 57 58 #include "opt_hn.h" 59 #include "opt_inet6.h" 60 #include "opt_inet.h" 61 #include "opt_rss.h" 62 63 #include <sys/param.h> 64 #include <sys/systm.h> 65 #include <sys/bus.h> 66 #include <sys/counter.h> 67 #include <sys/kernel.h> 68 #include <sys/limits.h> 69 #include <sys/malloc.h> 70 #include <sys/mbuf.h> 71 #include <sys/module.h> 72 #include <sys/queue.h> 73 #include <sys/lock.h> 74 #include <sys/proc.h> 75 #include <sys/rmlock.h> 76 #include <sys/sbuf.h> 77 #include <sys/sched.h> 78 #include <sys/smp.h> 79 #include <sys/socket.h> 80 #include <sys/sockio.h> 81 #include <sys/sx.h> 82 #include <sys/sysctl.h> 83 #include <sys/taskqueue.h> 84 #include <sys/buf_ring.h> 85 #include <sys/eventhandler.h> 86 87 #include <machine/atomic.h> 88 #include <machine/in_cksum.h> 89 90 #include <net/bpf.h> 91 #include <net/ethernet.h> 92 #include <net/if.h> 93 #include <net/if_dl.h> 94 #include <net/if_media.h> 95 #include <net/if_types.h> 96 #include <net/if_var.h> 97 #include <net/rndis.h> 98 #ifdef RSS 99 #include <net/rss_config.h> 100 #endif 101 102 #include <netinet/in_systm.h> 103 #include <netinet/in.h> 104 #include <netinet/ip.h> 105 #include <netinet/ip6.h> 106 #include <netinet/tcp.h> 107 #include <netinet/tcp_lro.h> 108 #include <netinet/udp.h> 109 110 #include <dev/hyperv/include/hyperv.h> 111 #include <dev/hyperv/include/hyperv_busdma.h> 112 #include <dev/hyperv/include/vmbus.h> 113 #include <dev/hyperv/include/vmbus_xact.h> 114 115 #include <dev/hyperv/netvsc/ndis.h> 116 #include <dev/hyperv/netvsc/if_hnreg.h> 117 #include <dev/hyperv/netvsc/if_hnvar.h> 118 #include <dev/hyperv/netvsc/hn_nvs.h> 119 #include <dev/hyperv/netvsc/hn_rndis.h> 120 121 #include "vmbus_if.h" 122 123 #define HN_IFSTART_SUPPORT 124 125 #define HN_RING_CNT_DEF_MAX 8 126 127 #define HN_VFMAP_SIZE_DEF 8 128 129 #define HN_XPNT_VF_ATTWAIT_MIN 2 /* seconds */ 130 131 /* YYY should get it from the underlying channel */ 132 #define HN_TX_DESC_CNT 512 133 134 #define HN_RNDIS_PKT_LEN \ 135 (sizeof(struct rndis_packet_msg) + \ 136 HN_RNDIS_PKTINFO_SIZE(HN_NDIS_HASH_VALUE_SIZE) + \ 137 HN_RNDIS_PKTINFO_SIZE(NDIS_VLAN_INFO_SIZE) + \ 138 HN_RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) + \ 139 HN_RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE)) 140 #define HN_RNDIS_PKT_BOUNDARY PAGE_SIZE 141 #define HN_RNDIS_PKT_ALIGN CACHE_LINE_SIZE 142 143 #define HN_TX_DATA_BOUNDARY PAGE_SIZE 144 #define HN_TX_DATA_MAXSIZE IP_MAXPACKET 145 #define HN_TX_DATA_SEGSIZE PAGE_SIZE 146 /* -1 for RNDIS packet message */ 147 #define HN_TX_DATA_SEGCNT_MAX (HN_GPACNT_MAX - 1) 148 149 #define HN_DIRECT_TX_SIZE_DEF 128 150 151 #define HN_EARLY_TXEOF_THRESH 8 152 153 #define HN_PKTBUF_LEN_DEF (16 * 1024) 154 155 #define HN_LROENT_CNT_DEF 128 156 157 #define HN_LRO_LENLIM_MULTIRX_DEF (12 * ETHERMTU) 158 #define HN_LRO_LENLIM_DEF (25 * ETHERMTU) 159 /* YYY 2*MTU is a bit rough, but should be good enough. */ 160 #define HN_LRO_LENLIM_MIN(ifp) (2 * (ifp)->if_mtu) 161 162 #define HN_LRO_ACKCNT_DEF 1 163 164 #define HN_LOCK_INIT(sc) \ 165 sx_init(&(sc)->hn_lock, device_get_nameunit((sc)->hn_dev)) 166 #define HN_LOCK_DESTROY(sc) sx_destroy(&(sc)->hn_lock) 167 #define HN_LOCK_ASSERT(sc) sx_assert(&(sc)->hn_lock, SA_XLOCKED) 168 #define HN_LOCK(sc) \ 169 do { \ 170 while (sx_try_xlock(&(sc)->hn_lock) == 0) { \ 171 /* Relinquish cpu to avoid deadlock */ \ 172 sched_relinquish(curthread); \ 173 DELAY(1000); \ 174 } \ 175 } while (0) 176 #define HN_UNLOCK(sc) sx_xunlock(&(sc)->hn_lock) 177 178 #define HN_CSUM_IP_MASK (CSUM_IP | CSUM_IP_TCP | CSUM_IP_UDP) 179 #define HN_CSUM_IP6_MASK (CSUM_IP6_TCP | CSUM_IP6_UDP) 180 #define HN_CSUM_IP_HWASSIST(sc) \ 181 ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP_MASK) 182 #define HN_CSUM_IP6_HWASSIST(sc) \ 183 ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP6_MASK) 184 185 #define HN_PKTSIZE_MIN(align) \ 186 roundup2(ETHER_MIN_LEN + ETHER_VLAN_ENCAP_LEN - ETHER_CRC_LEN + \ 187 HN_RNDIS_PKT_LEN, (align)) 188 #define HN_PKTSIZE(m, align) \ 189 roundup2((m)->m_pkthdr.len + HN_RNDIS_PKT_LEN, (align)) 190 191 #ifdef RSS 192 #define HN_RING_IDX2CPU(sc, idx) rss_getcpu((idx) % rss_getnumbuckets()) 193 #else 194 #define HN_RING_IDX2CPU(sc, idx) (((sc)->hn_cpu + (idx)) % mp_ncpus) 195 #endif 196 197 struct hn_txdesc { 198 #ifndef HN_USE_TXDESC_BUFRING 199 SLIST_ENTRY(hn_txdesc) link; 200 #endif 201 STAILQ_ENTRY(hn_txdesc) agg_link; 202 203 /* Aggregated txdescs, in sending order. */ 204 STAILQ_HEAD(, hn_txdesc) agg_list; 205 206 /* The oldest packet, if transmission aggregation happens. */ 207 struct mbuf *m; 208 struct hn_tx_ring *txr; 209 int refs; 210 uint32_t flags; /* HN_TXD_FLAG_ */ 211 struct hn_nvs_sendctx send_ctx; 212 uint32_t chim_index; 213 int chim_size; 214 215 bus_dmamap_t data_dmap; 216 217 bus_addr_t rndis_pkt_paddr; 218 struct rndis_packet_msg *rndis_pkt; 219 bus_dmamap_t rndis_pkt_dmap; 220 }; 221 222 #define HN_TXD_FLAG_ONLIST 0x0001 223 #define HN_TXD_FLAG_DMAMAP 0x0002 224 #define HN_TXD_FLAG_ONAGG 0x0004 225 226 struct hn_rxinfo { 227 uint32_t vlan_info; 228 uint32_t csum_info; 229 uint32_t hash_info; 230 uint32_t hash_value; 231 }; 232 233 struct hn_rxvf_setarg { 234 struct hn_rx_ring *rxr; 235 struct ifnet *vf_ifp; 236 }; 237 238 #define HN_RXINFO_VLAN 0x0001 239 #define HN_RXINFO_CSUM 0x0002 240 #define HN_RXINFO_HASHINF 0x0004 241 #define HN_RXINFO_HASHVAL 0x0008 242 #define HN_RXINFO_ALL \ 243 (HN_RXINFO_VLAN | \ 244 HN_RXINFO_CSUM | \ 245 HN_RXINFO_HASHINF | \ 246 HN_RXINFO_HASHVAL) 247 248 #define HN_NDIS_VLAN_INFO_INVALID 0xffffffff 249 #define HN_NDIS_RXCSUM_INFO_INVALID 0 250 #define HN_NDIS_HASH_INFO_INVALID 0 251 252 static int hn_probe(device_t); 253 static int hn_attach(device_t); 254 static int hn_detach(device_t); 255 static int hn_shutdown(device_t); 256 static void hn_chan_callback(struct vmbus_channel *, 257 void *); 258 259 static void hn_init(void *); 260 static int hn_ioctl(struct ifnet *, u_long, caddr_t); 261 #ifdef HN_IFSTART_SUPPORT 262 static void hn_start(struct ifnet *); 263 #endif 264 static int hn_transmit(struct ifnet *, struct mbuf *); 265 static void hn_xmit_qflush(struct ifnet *); 266 static int hn_ifmedia_upd(struct ifnet *); 267 static void hn_ifmedia_sts(struct ifnet *, 268 struct ifmediareq *); 269 270 static void hn_ifnet_event(void *, struct ifnet *, int); 271 static void hn_ifaddr_event(void *, struct ifnet *); 272 static void hn_ifnet_attevent(void *, struct ifnet *); 273 static void hn_ifnet_detevent(void *, struct ifnet *); 274 static void hn_ifnet_lnkevent(void *, struct ifnet *, int); 275 276 static bool hn_ismyvf(const struct hn_softc *, 277 const struct ifnet *); 278 static void hn_rxvf_change(struct hn_softc *, 279 struct ifnet *, bool); 280 static void hn_rxvf_set(struct hn_softc *, struct ifnet *); 281 static void hn_rxvf_set_task(void *, int); 282 static void hn_xpnt_vf_input(struct ifnet *, struct mbuf *); 283 static int hn_xpnt_vf_iocsetflags(struct hn_softc *); 284 static int hn_xpnt_vf_iocsetcaps(struct hn_softc *, 285 struct ifreq *); 286 static void hn_xpnt_vf_saveifflags(struct hn_softc *); 287 static bool hn_xpnt_vf_isready(struct hn_softc *); 288 static void hn_xpnt_vf_setready(struct hn_softc *); 289 static void hn_xpnt_vf_init_taskfunc(void *, int); 290 static void hn_xpnt_vf_init(struct hn_softc *); 291 static void hn_xpnt_vf_setenable(struct hn_softc *); 292 static void hn_xpnt_vf_setdisable(struct hn_softc *, bool); 293 static void hn_vf_rss_fixup(struct hn_softc *, bool); 294 static void hn_vf_rss_restore(struct hn_softc *); 295 296 static int hn_rndis_rxinfo(const void *, int, 297 struct hn_rxinfo *); 298 static void hn_rndis_rx_data(struct hn_rx_ring *, 299 const void *, int); 300 static void hn_rndis_rx_status(struct hn_softc *, 301 const void *, int); 302 static void hn_rndis_init_fixat(struct hn_softc *, int); 303 304 static void hn_nvs_handle_notify(struct hn_softc *, 305 const struct vmbus_chanpkt_hdr *); 306 static void hn_nvs_handle_comp(struct hn_softc *, 307 struct vmbus_channel *, 308 const struct vmbus_chanpkt_hdr *); 309 static void hn_nvs_handle_rxbuf(struct hn_rx_ring *, 310 struct vmbus_channel *, 311 const struct vmbus_chanpkt_hdr *); 312 static void hn_nvs_ack_rxbuf(struct hn_rx_ring *, 313 struct vmbus_channel *, uint64_t); 314 315 #if __FreeBSD_version >= 1100099 316 static int hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS); 317 static int hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS); 318 #endif 319 static int hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS); 320 static int hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS); 321 #if __FreeBSD_version < 1100095 322 static int hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS); 323 #else 324 static int hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS); 325 #endif 326 static int hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS); 327 static int hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS); 328 static int hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS); 329 static int hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS); 330 static int hn_caps_sysctl(SYSCTL_HANDLER_ARGS); 331 static int hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS); 332 static int hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS); 333 #ifndef RSS 334 static int hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS); 335 static int hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS); 336 #endif 337 static int hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS); 338 static int hn_rss_hcap_sysctl(SYSCTL_HANDLER_ARGS); 339 static int hn_rss_mbuf_sysctl(SYSCTL_HANDLER_ARGS); 340 static int hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS); 341 static int hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS); 342 static int hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS); 343 static int hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS); 344 static int hn_polling_sysctl(SYSCTL_HANDLER_ARGS); 345 static int hn_vf_sysctl(SYSCTL_HANDLER_ARGS); 346 static int hn_rxvf_sysctl(SYSCTL_HANDLER_ARGS); 347 static int hn_vflist_sysctl(SYSCTL_HANDLER_ARGS); 348 static int hn_vfmap_sysctl(SYSCTL_HANDLER_ARGS); 349 static int hn_xpnt_vf_accbpf_sysctl(SYSCTL_HANDLER_ARGS); 350 static int hn_xpnt_vf_enabled_sysctl(SYSCTL_HANDLER_ARGS); 351 352 static void hn_stop(struct hn_softc *, bool); 353 static void hn_init_locked(struct hn_softc *); 354 static int hn_chan_attach(struct hn_softc *, 355 struct vmbus_channel *); 356 static void hn_chan_detach(struct hn_softc *, 357 struct vmbus_channel *); 358 static int hn_attach_subchans(struct hn_softc *); 359 static void hn_detach_allchans(struct hn_softc *); 360 static void hn_chan_rollup(struct hn_rx_ring *, 361 struct hn_tx_ring *); 362 static void hn_set_ring_inuse(struct hn_softc *, int); 363 static int hn_synth_attach(struct hn_softc *, int); 364 static void hn_synth_detach(struct hn_softc *); 365 static int hn_synth_alloc_subchans(struct hn_softc *, 366 int *); 367 static bool hn_synth_attachable(const struct hn_softc *); 368 static void hn_suspend(struct hn_softc *); 369 static void hn_suspend_data(struct hn_softc *); 370 static void hn_suspend_mgmt(struct hn_softc *); 371 static void hn_resume(struct hn_softc *); 372 static void hn_resume_data(struct hn_softc *); 373 static void hn_resume_mgmt(struct hn_softc *); 374 static void hn_suspend_mgmt_taskfunc(void *, int); 375 static void hn_chan_drain(struct hn_softc *, 376 struct vmbus_channel *); 377 static void hn_disable_rx(struct hn_softc *); 378 static void hn_drain_rxtx(struct hn_softc *, int); 379 static void hn_polling(struct hn_softc *, u_int); 380 static void hn_chan_polling(struct vmbus_channel *, u_int); 381 static void hn_mtu_change_fixup(struct hn_softc *); 382 383 static void hn_update_link_status(struct hn_softc *); 384 static void hn_change_network(struct hn_softc *); 385 static void hn_link_taskfunc(void *, int); 386 static void hn_netchg_init_taskfunc(void *, int); 387 static void hn_netchg_status_taskfunc(void *, int); 388 static void hn_link_status(struct hn_softc *); 389 390 static int hn_create_rx_data(struct hn_softc *, int); 391 static void hn_destroy_rx_data(struct hn_softc *); 392 static int hn_check_iplen(const struct mbuf *, int); 393 static void hn_rxpkt_proto(const struct mbuf *, int *, int *); 394 static int hn_set_rxfilter(struct hn_softc *, uint32_t); 395 static int hn_rxfilter_config(struct hn_softc *); 396 static int hn_rss_reconfig(struct hn_softc *); 397 static void hn_rss_ind_fixup(struct hn_softc *); 398 static void hn_rss_mbuf_hash(struct hn_softc *, uint32_t); 399 static int hn_rxpkt(struct hn_rx_ring *, const void *, 400 int, const struct hn_rxinfo *); 401 static uint32_t hn_rss_type_fromndis(uint32_t); 402 static uint32_t hn_rss_type_tondis(uint32_t); 403 404 static int hn_tx_ring_create(struct hn_softc *, int); 405 static void hn_tx_ring_destroy(struct hn_tx_ring *); 406 static int hn_create_tx_data(struct hn_softc *, int); 407 static void hn_fixup_tx_data(struct hn_softc *); 408 static void hn_fixup_rx_data(struct hn_softc *); 409 static void hn_destroy_tx_data(struct hn_softc *); 410 static void hn_txdesc_dmamap_destroy(struct hn_txdesc *); 411 static void hn_txdesc_gc(struct hn_tx_ring *, 412 struct hn_txdesc *); 413 static int hn_encap(struct ifnet *, struct hn_tx_ring *, 414 struct hn_txdesc *, struct mbuf **); 415 static int hn_txpkt(struct ifnet *, struct hn_tx_ring *, 416 struct hn_txdesc *); 417 static void hn_set_chim_size(struct hn_softc *, int); 418 static void hn_set_tso_maxsize(struct hn_softc *, int, int); 419 static bool hn_tx_ring_pending(struct hn_tx_ring *); 420 static void hn_tx_ring_qflush(struct hn_tx_ring *); 421 static void hn_resume_tx(struct hn_softc *, int); 422 static void hn_set_txagg(struct hn_softc *); 423 static void *hn_try_txagg(struct ifnet *, 424 struct hn_tx_ring *, struct hn_txdesc *, 425 int); 426 static int hn_get_txswq_depth(const struct hn_tx_ring *); 427 static void hn_txpkt_done(struct hn_nvs_sendctx *, 428 struct hn_softc *, struct vmbus_channel *, 429 const void *, int); 430 static int hn_txpkt_sglist(struct hn_tx_ring *, 431 struct hn_txdesc *); 432 static int hn_txpkt_chim(struct hn_tx_ring *, 433 struct hn_txdesc *); 434 static int hn_xmit(struct hn_tx_ring *, int); 435 static void hn_xmit_taskfunc(void *, int); 436 static void hn_xmit_txeof(struct hn_tx_ring *); 437 static void hn_xmit_txeof_taskfunc(void *, int); 438 #ifdef HN_IFSTART_SUPPORT 439 static int hn_start_locked(struct hn_tx_ring *, int); 440 static void hn_start_taskfunc(void *, int); 441 static void hn_start_txeof(struct hn_tx_ring *); 442 static void hn_start_txeof_taskfunc(void *, int); 443 #endif 444 445 SYSCTL_NODE(_hw, OID_AUTO, hn, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 446 "Hyper-V network interface"); 447 448 /* Trust tcp segements verification on host side. */ 449 static int hn_trust_hosttcp = 1; 450 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hosttcp, CTLFLAG_RDTUN, 451 &hn_trust_hosttcp, 0, 452 "Trust tcp segement verification on host side, " 453 "when csum info is missing (global setting)"); 454 455 /* Trust udp datagrams verification on host side. */ 456 static int hn_trust_hostudp = 1; 457 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostudp, CTLFLAG_RDTUN, 458 &hn_trust_hostudp, 0, 459 "Trust udp datagram verification on host side, " 460 "when csum info is missing (global setting)"); 461 462 /* Trust ip packets verification on host side. */ 463 static int hn_trust_hostip = 1; 464 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostip, CTLFLAG_RDTUN, 465 &hn_trust_hostip, 0, 466 "Trust ip packet verification on host side, " 467 "when csum info is missing (global setting)"); 468 469 /* 470 * Offload UDP/IPv4 checksum. 471 */ 472 static int hn_enable_udp4cs = 1; 473 SYSCTL_INT(_hw_hn, OID_AUTO, enable_udp4cs, CTLFLAG_RDTUN, 474 &hn_enable_udp4cs, 0, "Offload UDP/IPv4 checksum"); 475 476 /* 477 * Offload UDP/IPv6 checksum. 478 */ 479 static int hn_enable_udp6cs = 1; 480 SYSCTL_INT(_hw_hn, OID_AUTO, enable_udp6cs, CTLFLAG_RDTUN, 481 &hn_enable_udp6cs, 0, "Offload UDP/IPv6 checksum"); 482 483 /* Stats. */ 484 static counter_u64_t hn_udpcs_fixup; 485 SYSCTL_COUNTER_U64(_hw_hn, OID_AUTO, udpcs_fixup, CTLFLAG_RW, 486 &hn_udpcs_fixup, "# of UDP checksum fixup"); 487 488 /* 489 * See hn_set_hlen(). 490 * 491 * This value is for Azure. For Hyper-V, set this above 492 * 65536 to disable UDP datagram checksum fixup. 493 */ 494 static int hn_udpcs_fixup_mtu = 1420; 495 SYSCTL_INT(_hw_hn, OID_AUTO, udpcs_fixup_mtu, CTLFLAG_RWTUN, 496 &hn_udpcs_fixup_mtu, 0, "UDP checksum fixup MTU threshold"); 497 498 /* Limit TSO burst size */ 499 static int hn_tso_maxlen = IP_MAXPACKET; 500 SYSCTL_INT(_hw_hn, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN, 501 &hn_tso_maxlen, 0, "TSO burst limit"); 502 503 /* Limit chimney send size */ 504 static int hn_tx_chimney_size = 0; 505 SYSCTL_INT(_hw_hn, OID_AUTO, tx_chimney_size, CTLFLAG_RDTUN, 506 &hn_tx_chimney_size, 0, "Chimney send packet size limit"); 507 508 /* Limit the size of packet for direct transmission */ 509 static int hn_direct_tx_size = HN_DIRECT_TX_SIZE_DEF; 510 SYSCTL_INT(_hw_hn, OID_AUTO, direct_tx_size, CTLFLAG_RDTUN, 511 &hn_direct_tx_size, 0, "Size of the packet for direct transmission"); 512 513 /* # of LRO entries per RX ring */ 514 #if defined(INET) || defined(INET6) 515 #if __FreeBSD_version >= 1100095 516 static int hn_lro_entry_count = HN_LROENT_CNT_DEF; 517 SYSCTL_INT(_hw_hn, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN, 518 &hn_lro_entry_count, 0, "LRO entry count"); 519 #endif 520 #endif 521 522 static int hn_tx_taskq_cnt = 1; 523 SYSCTL_INT(_hw_hn, OID_AUTO, tx_taskq_cnt, CTLFLAG_RDTUN, 524 &hn_tx_taskq_cnt, 0, "# of TX taskqueues"); 525 526 #define HN_TX_TASKQ_M_INDEP 0 527 #define HN_TX_TASKQ_M_GLOBAL 1 528 #define HN_TX_TASKQ_M_EVTTQ 2 529 530 static int hn_tx_taskq_mode = HN_TX_TASKQ_M_INDEP; 531 SYSCTL_INT(_hw_hn, OID_AUTO, tx_taskq_mode, CTLFLAG_RDTUN, 532 &hn_tx_taskq_mode, 0, "TX taskqueue modes: " 533 "0 - independent, 1 - share global tx taskqs, 2 - share event taskqs"); 534 535 #ifndef HN_USE_TXDESC_BUFRING 536 static int hn_use_txdesc_bufring = 0; 537 #else 538 static int hn_use_txdesc_bufring = 1; 539 #endif 540 SYSCTL_INT(_hw_hn, OID_AUTO, use_txdesc_bufring, CTLFLAG_RD, 541 &hn_use_txdesc_bufring, 0, "Use buf_ring for TX descriptors"); 542 543 #ifdef HN_IFSTART_SUPPORT 544 /* Use ifnet.if_start instead of ifnet.if_transmit */ 545 static int hn_use_if_start = 0; 546 SYSCTL_INT(_hw_hn, OID_AUTO, use_if_start, CTLFLAG_RDTUN, 547 &hn_use_if_start, 0, "Use if_start TX method"); 548 #endif 549 550 /* # of channels to use */ 551 static int hn_chan_cnt = 0; 552 SYSCTL_INT(_hw_hn, OID_AUTO, chan_cnt, CTLFLAG_RDTUN, 553 &hn_chan_cnt, 0, 554 "# of channels to use; each channel has one RX ring and one TX ring"); 555 556 /* # of transmit rings to use */ 557 static int hn_tx_ring_cnt = 0; 558 SYSCTL_INT(_hw_hn, OID_AUTO, tx_ring_cnt, CTLFLAG_RDTUN, 559 &hn_tx_ring_cnt, 0, "# of TX rings to use"); 560 561 /* Software TX ring deptch */ 562 static int hn_tx_swq_depth = 0; 563 SYSCTL_INT(_hw_hn, OID_AUTO, tx_swq_depth, CTLFLAG_RDTUN, 564 &hn_tx_swq_depth, 0, "Depth of IFQ or BUFRING"); 565 566 /* Enable sorted LRO, and the depth of the per-channel mbuf queue */ 567 #if __FreeBSD_version >= 1100095 568 static u_int hn_lro_mbufq_depth = 0; 569 SYSCTL_UINT(_hw_hn, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN, 570 &hn_lro_mbufq_depth, 0, "Depth of LRO mbuf queue"); 571 #endif 572 573 /* Packet transmission aggregation size limit */ 574 static int hn_tx_agg_size = -1; 575 SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_size, CTLFLAG_RDTUN, 576 &hn_tx_agg_size, 0, "Packet transmission aggregation size limit"); 577 578 /* Packet transmission aggregation count limit */ 579 static int hn_tx_agg_pkts = -1; 580 SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_pkts, CTLFLAG_RDTUN, 581 &hn_tx_agg_pkts, 0, "Packet transmission aggregation packet limit"); 582 583 /* VF list */ 584 SYSCTL_PROC(_hw_hn, OID_AUTO, vflist, 585 CTLFLAG_RD | CTLTYPE_STRING | CTLFLAG_NEEDGIANT, 0, 0, 586 hn_vflist_sysctl, "A", 587 "VF list"); 588 589 /* VF mapping */ 590 SYSCTL_PROC(_hw_hn, OID_AUTO, vfmap, 591 CTLFLAG_RD | CTLTYPE_STRING | CTLFLAG_NEEDGIANT, 0, 0, 592 hn_vfmap_sysctl, "A", 593 "VF mapping"); 594 595 /* Transparent VF */ 596 static int hn_xpnt_vf = 1; 597 SYSCTL_INT(_hw_hn, OID_AUTO, vf_transparent, CTLFLAG_RDTUN, 598 &hn_xpnt_vf, 0, "Transparent VF mod"); 599 600 /* Accurate BPF support for Transparent VF */ 601 static int hn_xpnt_vf_accbpf = 0; 602 SYSCTL_INT(_hw_hn, OID_AUTO, vf_xpnt_accbpf, CTLFLAG_RDTUN, 603 &hn_xpnt_vf_accbpf, 0, "Accurate BPF for transparent VF"); 604 605 /* Extra wait for transparent VF attach routing; unit seconds. */ 606 static int hn_xpnt_vf_attwait = HN_XPNT_VF_ATTWAIT_MIN; 607 SYSCTL_INT(_hw_hn, OID_AUTO, vf_xpnt_attwait, CTLFLAG_RWTUN, 608 &hn_xpnt_vf_attwait, 0, 609 "Extra wait for transparent VF attach routing; unit: seconds"); 610 611 static u_int hn_cpu_index; /* next CPU for channel */ 612 static struct taskqueue **hn_tx_taskque;/* shared TX taskqueues */ 613 614 static struct rmlock hn_vfmap_lock; 615 static int hn_vfmap_size; 616 static struct ifnet **hn_vfmap; 617 618 #ifndef RSS 619 static const uint8_t 620 hn_rss_key_default[NDIS_HASH_KEYSIZE_TOEPLITZ] = { 621 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 622 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 623 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, 624 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, 625 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa 626 }; 627 #endif /* !RSS */ 628 629 static const struct hyperv_guid hn_guid = { 630 .hv_guid = { 631 0x63, 0x51, 0x61, 0xf8, 0x3e, 0xdf, 0xc5, 0x46, 632 0x91, 0x3f, 0xf2, 0xd2, 0xf9, 0x65, 0xed, 0x0e } 633 }; 634 635 static device_method_t hn_methods[] = { 636 /* Device interface */ 637 DEVMETHOD(device_probe, hn_probe), 638 DEVMETHOD(device_attach, hn_attach), 639 DEVMETHOD(device_detach, hn_detach), 640 DEVMETHOD(device_shutdown, hn_shutdown), 641 DEVMETHOD_END 642 }; 643 644 static driver_t hn_driver = { 645 "hn", 646 hn_methods, 647 sizeof(struct hn_softc) 648 }; 649 650 static devclass_t hn_devclass; 651 652 DRIVER_MODULE(hn, vmbus, hn_driver, hn_devclass, 0, 0); 653 MODULE_VERSION(hn, 1); 654 MODULE_DEPEND(hn, vmbus, 1, 1, 1); 655 656 #if __FreeBSD_version >= 1100099 657 static void 658 hn_set_lro_lenlim(struct hn_softc *sc, int lenlim) 659 { 660 int i; 661 662 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 663 sc->hn_rx_ring[i].hn_lro.lro_length_lim = lenlim; 664 } 665 #endif 666 667 static int 668 hn_txpkt_sglist(struct hn_tx_ring *txr, struct hn_txdesc *txd) 669 { 670 671 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID && 672 txd->chim_size == 0, ("invalid rndis sglist txd")); 673 return (hn_nvs_send_rndis_sglist(txr->hn_chan, HN_NVS_RNDIS_MTYPE_DATA, 674 &txd->send_ctx, txr->hn_gpa, txr->hn_gpa_cnt)); 675 } 676 677 static int 678 hn_txpkt_chim(struct hn_tx_ring *txr, struct hn_txdesc *txd) 679 { 680 struct hn_nvs_rndis rndis; 681 682 KASSERT(txd->chim_index != HN_NVS_CHIM_IDX_INVALID && 683 txd->chim_size > 0, ("invalid rndis chim txd")); 684 685 rndis.nvs_type = HN_NVS_TYPE_RNDIS; 686 rndis.nvs_rndis_mtype = HN_NVS_RNDIS_MTYPE_DATA; 687 rndis.nvs_chim_idx = txd->chim_index; 688 rndis.nvs_chim_sz = txd->chim_size; 689 690 return (hn_nvs_send(txr->hn_chan, VMBUS_CHANPKT_FLAG_RC, 691 &rndis, sizeof(rndis), &txd->send_ctx)); 692 } 693 694 static __inline uint32_t 695 hn_chim_alloc(struct hn_softc *sc) 696 { 697 int i, bmap_cnt = sc->hn_chim_bmap_cnt; 698 u_long *bmap = sc->hn_chim_bmap; 699 uint32_t ret = HN_NVS_CHIM_IDX_INVALID; 700 701 for (i = 0; i < bmap_cnt; ++i) { 702 int idx; 703 704 idx = ffsl(~bmap[i]); 705 if (idx == 0) 706 continue; 707 708 --idx; /* ffsl is 1-based */ 709 KASSERT(i * LONG_BIT + idx < sc->hn_chim_cnt, 710 ("invalid i %d and idx %d", i, idx)); 711 712 if (atomic_testandset_long(&bmap[i], idx)) 713 continue; 714 715 ret = i * LONG_BIT + idx; 716 break; 717 } 718 return (ret); 719 } 720 721 static __inline void 722 hn_chim_free(struct hn_softc *sc, uint32_t chim_idx) 723 { 724 u_long mask; 725 uint32_t idx; 726 727 idx = chim_idx / LONG_BIT; 728 KASSERT(idx < sc->hn_chim_bmap_cnt, 729 ("invalid chimney index 0x%x", chim_idx)); 730 731 mask = 1UL << (chim_idx % LONG_BIT); 732 KASSERT(sc->hn_chim_bmap[idx] & mask, 733 ("index bitmap 0x%lx, chimney index %u, " 734 "bitmap idx %d, bitmask 0x%lx", 735 sc->hn_chim_bmap[idx], chim_idx, idx, mask)); 736 737 atomic_clear_long(&sc->hn_chim_bmap[idx], mask); 738 } 739 740 #if defined(INET6) || defined(INET) 741 742 #define PULLUP_HDR(m, len) \ 743 do { \ 744 if (__predict_false((m)->m_len < (len))) { \ 745 (m) = m_pullup((m), (len)); \ 746 if ((m) == NULL) \ 747 return (NULL); \ 748 } \ 749 } while (0) 750 751 /* 752 * NOTE: If this function failed, the m_head would be freed. 753 */ 754 static __inline struct mbuf * 755 hn_tso_fixup(struct mbuf *m_head) 756 { 757 struct ether_vlan_header *evl; 758 struct tcphdr *th; 759 int ehlen; 760 761 KASSERT(M_WRITABLE(m_head), ("TSO mbuf not writable")); 762 763 PULLUP_HDR(m_head, sizeof(*evl)); 764 evl = mtod(m_head, struct ether_vlan_header *); 765 if (evl->evl_encap_proto == ntohs(ETHERTYPE_VLAN)) 766 ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 767 else 768 ehlen = ETHER_HDR_LEN; 769 m_head->m_pkthdr.l2hlen = ehlen; 770 771 #ifdef INET 772 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) { 773 struct ip *ip; 774 int iphlen; 775 776 PULLUP_HDR(m_head, ehlen + sizeof(*ip)); 777 ip = mtodo(m_head, ehlen); 778 iphlen = ip->ip_hl << 2; 779 m_head->m_pkthdr.l3hlen = iphlen; 780 781 PULLUP_HDR(m_head, ehlen + iphlen + sizeof(*th)); 782 th = mtodo(m_head, ehlen + iphlen); 783 784 ip->ip_len = 0; 785 ip->ip_sum = 0; 786 th->th_sum = in_pseudo(ip->ip_src.s_addr, 787 ip->ip_dst.s_addr, htons(IPPROTO_TCP)); 788 } 789 #endif 790 #if defined(INET6) && defined(INET) 791 else 792 #endif 793 #ifdef INET6 794 { 795 struct ip6_hdr *ip6; 796 797 PULLUP_HDR(m_head, ehlen + sizeof(*ip6)); 798 ip6 = mtodo(m_head, ehlen); 799 if (ip6->ip6_nxt != IPPROTO_TCP) { 800 m_freem(m_head); 801 return (NULL); 802 } 803 m_head->m_pkthdr.l3hlen = sizeof(*ip6); 804 805 PULLUP_HDR(m_head, ehlen + sizeof(*ip6) + sizeof(*th)); 806 th = mtodo(m_head, ehlen + sizeof(*ip6)); 807 808 ip6->ip6_plen = 0; 809 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); 810 } 811 #endif 812 return (m_head); 813 } 814 815 /* 816 * NOTE: If this function failed, the m_head would be freed. 817 */ 818 static __inline struct mbuf * 819 hn_set_hlen(struct mbuf *m_head) 820 { 821 const struct ether_vlan_header *evl; 822 int ehlen; 823 824 PULLUP_HDR(m_head, sizeof(*evl)); 825 evl = mtod(m_head, const struct ether_vlan_header *); 826 if (evl->evl_encap_proto == ntohs(ETHERTYPE_VLAN)) 827 ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 828 else 829 ehlen = ETHER_HDR_LEN; 830 m_head->m_pkthdr.l2hlen = ehlen; 831 832 #ifdef INET 833 if (m_head->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP_UDP)) { 834 const struct ip *ip; 835 int iphlen; 836 837 PULLUP_HDR(m_head, ehlen + sizeof(*ip)); 838 ip = mtodo(m_head, ehlen); 839 iphlen = ip->ip_hl << 2; 840 m_head->m_pkthdr.l3hlen = iphlen; 841 842 /* 843 * UDP checksum offload does not work in Azure, if the 844 * following conditions meet: 845 * - sizeof(IP hdr + UDP hdr + payload) > 1420. 846 * - IP_DF is not set in the IP hdr. 847 * 848 * Fallback to software checksum for these UDP datagrams. 849 */ 850 if ((m_head->m_pkthdr.csum_flags & CSUM_IP_UDP) && 851 m_head->m_pkthdr.len > hn_udpcs_fixup_mtu + ehlen && 852 (ntohs(ip->ip_off) & IP_DF) == 0) { 853 uint16_t off = ehlen + iphlen; 854 855 counter_u64_add(hn_udpcs_fixup, 1); 856 PULLUP_HDR(m_head, off + sizeof(struct udphdr)); 857 *(uint16_t *)(m_head->m_data + off + 858 m_head->m_pkthdr.csum_data) = in_cksum_skip( 859 m_head, m_head->m_pkthdr.len, off); 860 m_head->m_pkthdr.csum_flags &= ~CSUM_IP_UDP; 861 } 862 } 863 #endif 864 #if defined(INET6) && defined(INET) 865 else 866 #endif 867 #ifdef INET6 868 { 869 const struct ip6_hdr *ip6; 870 871 PULLUP_HDR(m_head, ehlen + sizeof(*ip6)); 872 ip6 = mtodo(m_head, ehlen); 873 if (ip6->ip6_nxt != IPPROTO_TCP && 874 ip6->ip6_nxt != IPPROTO_UDP) { 875 m_freem(m_head); 876 return (NULL); 877 } 878 m_head->m_pkthdr.l3hlen = sizeof(*ip6); 879 } 880 #endif 881 return (m_head); 882 } 883 884 /* 885 * NOTE: If this function failed, the m_head would be freed. 886 */ 887 static __inline struct mbuf * 888 hn_check_tcpsyn(struct mbuf *m_head, int *tcpsyn) 889 { 890 const struct tcphdr *th; 891 int ehlen, iphlen; 892 893 *tcpsyn = 0; 894 ehlen = m_head->m_pkthdr.l2hlen; 895 iphlen = m_head->m_pkthdr.l3hlen; 896 897 PULLUP_HDR(m_head, ehlen + iphlen + sizeof(*th)); 898 th = mtodo(m_head, ehlen + iphlen); 899 if (th->th_flags & TH_SYN) 900 *tcpsyn = 1; 901 return (m_head); 902 } 903 904 #undef PULLUP_HDR 905 906 #endif /* INET6 || INET */ 907 908 static int 909 hn_set_rxfilter(struct hn_softc *sc, uint32_t filter) 910 { 911 int error = 0; 912 913 HN_LOCK_ASSERT(sc); 914 915 if (sc->hn_rx_filter != filter) { 916 error = hn_rndis_set_rxfilter(sc, filter); 917 if (!error) 918 sc->hn_rx_filter = filter; 919 } 920 return (error); 921 } 922 923 static int 924 hn_rxfilter_config(struct hn_softc *sc) 925 { 926 struct ifnet *ifp = sc->hn_ifp; 927 uint32_t filter; 928 929 HN_LOCK_ASSERT(sc); 930 931 /* 932 * If the non-transparent mode VF is activated, we don't know how 933 * its RX filter is configured, so stick the synthetic device in 934 * the promiscous mode. 935 */ 936 if ((ifp->if_flags & IFF_PROMISC) || (sc->hn_flags & HN_FLAG_RXVF)) { 937 filter = NDIS_PACKET_TYPE_PROMISCUOUS; 938 } else { 939 filter = NDIS_PACKET_TYPE_DIRECTED; 940 if (ifp->if_flags & IFF_BROADCAST) 941 filter |= NDIS_PACKET_TYPE_BROADCAST; 942 /* TODO: support multicast list */ 943 if ((ifp->if_flags & IFF_ALLMULTI) || 944 !CK_STAILQ_EMPTY(&ifp->if_multiaddrs)) 945 filter |= NDIS_PACKET_TYPE_ALL_MULTICAST; 946 } 947 return (hn_set_rxfilter(sc, filter)); 948 } 949 950 static void 951 hn_set_txagg(struct hn_softc *sc) 952 { 953 uint32_t size, pkts; 954 int i; 955 956 /* 957 * Setup aggregation size. 958 */ 959 if (sc->hn_agg_size < 0) 960 size = UINT32_MAX; 961 else 962 size = sc->hn_agg_size; 963 964 if (sc->hn_rndis_agg_size < size) 965 size = sc->hn_rndis_agg_size; 966 967 /* NOTE: We only aggregate packets using chimney sending buffers. */ 968 if (size > (uint32_t)sc->hn_chim_szmax) 969 size = sc->hn_chim_szmax; 970 971 if (size <= 2 * HN_PKTSIZE_MIN(sc->hn_rndis_agg_align)) { 972 /* Disable */ 973 size = 0; 974 pkts = 0; 975 goto done; 976 } 977 978 /* NOTE: Type of the per TX ring setting is 'int'. */ 979 if (size > INT_MAX) 980 size = INT_MAX; 981 982 /* 983 * Setup aggregation packet count. 984 */ 985 if (sc->hn_agg_pkts < 0) 986 pkts = UINT32_MAX; 987 else 988 pkts = sc->hn_agg_pkts; 989 990 if (sc->hn_rndis_agg_pkts < pkts) 991 pkts = sc->hn_rndis_agg_pkts; 992 993 if (pkts <= 1) { 994 /* Disable */ 995 size = 0; 996 pkts = 0; 997 goto done; 998 } 999 1000 /* NOTE: Type of the per TX ring setting is 'short'. */ 1001 if (pkts > SHRT_MAX) 1002 pkts = SHRT_MAX; 1003 1004 done: 1005 /* NOTE: Type of the per TX ring setting is 'short'. */ 1006 if (sc->hn_rndis_agg_align > SHRT_MAX) { 1007 /* Disable */ 1008 size = 0; 1009 pkts = 0; 1010 } 1011 1012 if (bootverbose) { 1013 if_printf(sc->hn_ifp, "TX agg size %u, pkts %u, align %u\n", 1014 size, pkts, sc->hn_rndis_agg_align); 1015 } 1016 1017 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 1018 struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; 1019 1020 mtx_lock(&txr->hn_tx_lock); 1021 txr->hn_agg_szmax = size; 1022 txr->hn_agg_pktmax = pkts; 1023 txr->hn_agg_align = sc->hn_rndis_agg_align; 1024 mtx_unlock(&txr->hn_tx_lock); 1025 } 1026 } 1027 1028 static int 1029 hn_get_txswq_depth(const struct hn_tx_ring *txr) 1030 { 1031 1032 KASSERT(txr->hn_txdesc_cnt > 0, ("tx ring is not setup yet")); 1033 if (hn_tx_swq_depth < txr->hn_txdesc_cnt) 1034 return txr->hn_txdesc_cnt; 1035 return hn_tx_swq_depth; 1036 } 1037 1038 static int 1039 hn_rss_reconfig(struct hn_softc *sc) 1040 { 1041 int error; 1042 1043 HN_LOCK_ASSERT(sc); 1044 1045 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) 1046 return (ENXIO); 1047 1048 /* 1049 * Disable RSS first. 1050 * 1051 * NOTE: 1052 * Direct reconfiguration by setting the UNCHG flags does 1053 * _not_ work properly. 1054 */ 1055 if (bootverbose) 1056 if_printf(sc->hn_ifp, "disable RSS\n"); 1057 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_DISABLE); 1058 if (error) { 1059 if_printf(sc->hn_ifp, "RSS disable failed\n"); 1060 return (error); 1061 } 1062 1063 /* 1064 * Reenable the RSS w/ the updated RSS key or indirect 1065 * table. 1066 */ 1067 if (bootverbose) 1068 if_printf(sc->hn_ifp, "reconfig RSS\n"); 1069 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE); 1070 if (error) { 1071 if_printf(sc->hn_ifp, "RSS reconfig failed\n"); 1072 return (error); 1073 } 1074 return (0); 1075 } 1076 1077 static void 1078 hn_rss_ind_fixup(struct hn_softc *sc) 1079 { 1080 struct ndis_rssprm_toeplitz *rss = &sc->hn_rss; 1081 int i, nchan; 1082 1083 nchan = sc->hn_rx_ring_inuse; 1084 KASSERT(nchan > 1, ("invalid # of channels %d", nchan)); 1085 1086 /* 1087 * Check indirect table to make sure that all channels in it 1088 * can be used. 1089 */ 1090 for (i = 0; i < NDIS_HASH_INDCNT; ++i) { 1091 if (rss->rss_ind[i] >= nchan) { 1092 if_printf(sc->hn_ifp, 1093 "RSS indirect table %d fixup: %u -> %d\n", 1094 i, rss->rss_ind[i], nchan - 1); 1095 rss->rss_ind[i] = nchan - 1; 1096 } 1097 } 1098 } 1099 1100 static int 1101 hn_ifmedia_upd(struct ifnet *ifp __unused) 1102 { 1103 1104 return EOPNOTSUPP; 1105 } 1106 1107 static void 1108 hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) 1109 { 1110 struct hn_softc *sc = ifp->if_softc; 1111 1112 ifmr->ifm_status = IFM_AVALID; 1113 ifmr->ifm_active = IFM_ETHER; 1114 1115 if ((sc->hn_link_flags & HN_LINK_FLAG_LINKUP) == 0) { 1116 ifmr->ifm_active |= IFM_NONE; 1117 return; 1118 } 1119 ifmr->ifm_status |= IFM_ACTIVE; 1120 ifmr->ifm_active |= IFM_10G_T | IFM_FDX; 1121 } 1122 1123 static void 1124 hn_rxvf_set_task(void *xarg, int pending __unused) 1125 { 1126 struct hn_rxvf_setarg *arg = xarg; 1127 1128 arg->rxr->hn_rxvf_ifp = arg->vf_ifp; 1129 } 1130 1131 static void 1132 hn_rxvf_set(struct hn_softc *sc, struct ifnet *vf_ifp) 1133 { 1134 struct hn_rx_ring *rxr; 1135 struct hn_rxvf_setarg arg; 1136 struct task task; 1137 int i; 1138 1139 HN_LOCK_ASSERT(sc); 1140 1141 TASK_INIT(&task, 0, hn_rxvf_set_task, &arg); 1142 1143 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 1144 rxr = &sc->hn_rx_ring[i]; 1145 1146 if (i < sc->hn_rx_ring_inuse) { 1147 arg.rxr = rxr; 1148 arg.vf_ifp = vf_ifp; 1149 vmbus_chan_run_task(rxr->hn_chan, &task); 1150 } else { 1151 rxr->hn_rxvf_ifp = vf_ifp; 1152 } 1153 } 1154 } 1155 1156 static bool 1157 hn_ismyvf(const struct hn_softc *sc, const struct ifnet *ifp) 1158 { 1159 const struct ifnet *hn_ifp; 1160 1161 hn_ifp = sc->hn_ifp; 1162 1163 if (ifp == hn_ifp) 1164 return (false); 1165 1166 if (ifp->if_alloctype != IFT_ETHER) 1167 return (false); 1168 1169 /* Ignore lagg/vlan interfaces */ 1170 if (strcmp(ifp->if_dname, "lagg") == 0 || 1171 strcmp(ifp->if_dname, "vlan") == 0) 1172 return (false); 1173 1174 /* 1175 * During detach events ifp->if_addr might be NULL. 1176 * Make sure the bcmp() below doesn't panic on that: 1177 */ 1178 if (ifp->if_addr == NULL || hn_ifp->if_addr == NULL) 1179 return (false); 1180 1181 if (bcmp(IF_LLADDR(ifp), IF_LLADDR(hn_ifp), ETHER_ADDR_LEN) != 0) 1182 return (false); 1183 1184 return (true); 1185 } 1186 1187 static void 1188 hn_rxvf_change(struct hn_softc *sc, struct ifnet *ifp, bool rxvf) 1189 { 1190 struct ifnet *hn_ifp; 1191 1192 HN_LOCK(sc); 1193 1194 if (!(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED)) 1195 goto out; 1196 1197 if (!hn_ismyvf(sc, ifp)) 1198 goto out; 1199 hn_ifp = sc->hn_ifp; 1200 1201 if (rxvf) { 1202 if (sc->hn_flags & HN_FLAG_RXVF) 1203 goto out; 1204 1205 sc->hn_flags |= HN_FLAG_RXVF; 1206 hn_rxfilter_config(sc); 1207 } else { 1208 if (!(sc->hn_flags & HN_FLAG_RXVF)) 1209 goto out; 1210 1211 sc->hn_flags &= ~HN_FLAG_RXVF; 1212 if (hn_ifp->if_drv_flags & IFF_DRV_RUNNING) 1213 hn_rxfilter_config(sc); 1214 else 1215 hn_set_rxfilter(sc, NDIS_PACKET_TYPE_NONE); 1216 } 1217 1218 hn_nvs_set_datapath(sc, 1219 rxvf ? HN_NVS_DATAPATH_VF : HN_NVS_DATAPATH_SYNTH); 1220 1221 hn_rxvf_set(sc, rxvf ? ifp : NULL); 1222 1223 if (rxvf) { 1224 hn_vf_rss_fixup(sc, true); 1225 hn_suspend_mgmt(sc); 1226 sc->hn_link_flags &= 1227 ~(HN_LINK_FLAG_LINKUP | HN_LINK_FLAG_NETCHG); 1228 if_link_state_change(hn_ifp, LINK_STATE_DOWN); 1229 } else { 1230 hn_vf_rss_restore(sc); 1231 hn_resume_mgmt(sc); 1232 } 1233 1234 devctl_notify("HYPERV_NIC_VF", hn_ifp->if_xname, 1235 rxvf ? "VF_UP" : "VF_DOWN", NULL); 1236 1237 if (bootverbose) { 1238 if_printf(hn_ifp, "datapath is switched %s %s\n", 1239 rxvf ? "to" : "from", ifp->if_xname); 1240 } 1241 out: 1242 HN_UNLOCK(sc); 1243 } 1244 1245 static void 1246 hn_ifnet_event(void *arg, struct ifnet *ifp, int event) 1247 { 1248 1249 if (event != IFNET_EVENT_UP && event != IFNET_EVENT_DOWN) 1250 return; 1251 hn_rxvf_change(arg, ifp, event == IFNET_EVENT_UP); 1252 } 1253 1254 static void 1255 hn_ifaddr_event(void *arg, struct ifnet *ifp) 1256 { 1257 1258 hn_rxvf_change(arg, ifp, ifp->if_flags & IFF_UP); 1259 } 1260 1261 static int 1262 hn_xpnt_vf_iocsetcaps(struct hn_softc *sc, struct ifreq *ifr) 1263 { 1264 struct ifnet *ifp, *vf_ifp; 1265 uint64_t tmp; 1266 int error; 1267 1268 HN_LOCK_ASSERT(sc); 1269 ifp = sc->hn_ifp; 1270 vf_ifp = sc->hn_vf_ifp; 1271 1272 /* 1273 * Fix up requested capabilities w/ supported capabilities, 1274 * since the supported capabilities could have been changed. 1275 */ 1276 ifr->ifr_reqcap &= ifp->if_capabilities; 1277 /* Pass SIOCSIFCAP to VF. */ 1278 error = vf_ifp->if_ioctl(vf_ifp, SIOCSIFCAP, (caddr_t)ifr); 1279 1280 /* 1281 * NOTE: 1282 * The error will be propagated to the callers, however, it 1283 * is _not_ useful here. 1284 */ 1285 1286 /* 1287 * Merge VF's enabled capabilities. 1288 */ 1289 ifp->if_capenable = vf_ifp->if_capenable & ifp->if_capabilities; 1290 1291 tmp = vf_ifp->if_hwassist & HN_CSUM_IP_HWASSIST(sc); 1292 if (ifp->if_capenable & IFCAP_TXCSUM) 1293 ifp->if_hwassist |= tmp; 1294 else 1295 ifp->if_hwassist &= ~tmp; 1296 1297 tmp = vf_ifp->if_hwassist & HN_CSUM_IP6_HWASSIST(sc); 1298 if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) 1299 ifp->if_hwassist |= tmp; 1300 else 1301 ifp->if_hwassist &= ~tmp; 1302 1303 tmp = vf_ifp->if_hwassist & CSUM_IP_TSO; 1304 if (ifp->if_capenable & IFCAP_TSO4) 1305 ifp->if_hwassist |= tmp; 1306 else 1307 ifp->if_hwassist &= ~tmp; 1308 1309 tmp = vf_ifp->if_hwassist & CSUM_IP6_TSO; 1310 if (ifp->if_capenable & IFCAP_TSO6) 1311 ifp->if_hwassist |= tmp; 1312 else 1313 ifp->if_hwassist &= ~tmp; 1314 1315 return (error); 1316 } 1317 1318 static int 1319 hn_xpnt_vf_iocsetflags(struct hn_softc *sc) 1320 { 1321 struct ifnet *vf_ifp; 1322 struct ifreq ifr; 1323 1324 HN_LOCK_ASSERT(sc); 1325 vf_ifp = sc->hn_vf_ifp; 1326 1327 memset(&ifr, 0, sizeof(ifr)); 1328 strlcpy(ifr.ifr_name, vf_ifp->if_xname, sizeof(ifr.ifr_name)); 1329 ifr.ifr_flags = vf_ifp->if_flags & 0xffff; 1330 ifr.ifr_flagshigh = vf_ifp->if_flags >> 16; 1331 return (vf_ifp->if_ioctl(vf_ifp, SIOCSIFFLAGS, (caddr_t)&ifr)); 1332 } 1333 1334 static void 1335 hn_xpnt_vf_saveifflags(struct hn_softc *sc) 1336 { 1337 struct ifnet *ifp = sc->hn_ifp; 1338 int allmulti = 0; 1339 1340 HN_LOCK_ASSERT(sc); 1341 1342 /* XXX vlan(4) style mcast addr maintenance */ 1343 if (!CK_STAILQ_EMPTY(&ifp->if_multiaddrs)) 1344 allmulti = IFF_ALLMULTI; 1345 1346 /* Always set the VF's if_flags */ 1347 sc->hn_vf_ifp->if_flags = ifp->if_flags | allmulti; 1348 } 1349 1350 static void 1351 hn_xpnt_vf_input(struct ifnet *vf_ifp, struct mbuf *m) 1352 { 1353 struct rm_priotracker pt; 1354 struct ifnet *hn_ifp = NULL; 1355 struct mbuf *mn; 1356 1357 /* 1358 * XXX racy, if hn(4) ever detached. 1359 */ 1360 rm_rlock(&hn_vfmap_lock, &pt); 1361 if (vf_ifp->if_index < hn_vfmap_size) 1362 hn_ifp = hn_vfmap[vf_ifp->if_index]; 1363 rm_runlock(&hn_vfmap_lock, &pt); 1364 1365 if (hn_ifp != NULL) { 1366 for (mn = m; mn != NULL; mn = mn->m_nextpkt) { 1367 /* 1368 * Allow tapping on the VF. 1369 */ 1370 ETHER_BPF_MTAP(vf_ifp, mn); 1371 1372 /* 1373 * Update VF stats. 1374 */ 1375 if ((vf_ifp->if_capenable & IFCAP_HWSTATS) == 0) { 1376 if_inc_counter(vf_ifp, IFCOUNTER_IBYTES, 1377 mn->m_pkthdr.len); 1378 } 1379 /* 1380 * XXX IFCOUNTER_IMCAST 1381 * This stat updating is kinda invasive, since it 1382 * requires two checks on the mbuf: the length check 1383 * and the ethernet header check. As of this write, 1384 * all multicast packets go directly to hn(4), which 1385 * makes imcast stat updating in the VF a try in vian. 1386 */ 1387 1388 /* 1389 * Fix up rcvif and increase hn(4)'s ipackets. 1390 */ 1391 mn->m_pkthdr.rcvif = hn_ifp; 1392 if_inc_counter(hn_ifp, IFCOUNTER_IPACKETS, 1); 1393 } 1394 /* 1395 * Go through hn(4)'s if_input. 1396 */ 1397 hn_ifp->if_input(hn_ifp, m); 1398 } else { 1399 /* 1400 * In the middle of the transition; free this 1401 * mbuf chain. 1402 */ 1403 while (m != NULL) { 1404 mn = m->m_nextpkt; 1405 m->m_nextpkt = NULL; 1406 m_freem(m); 1407 m = mn; 1408 } 1409 } 1410 } 1411 1412 static void 1413 hn_mtu_change_fixup(struct hn_softc *sc) 1414 { 1415 struct ifnet *ifp; 1416 1417 HN_LOCK_ASSERT(sc); 1418 ifp = sc->hn_ifp; 1419 1420 hn_set_tso_maxsize(sc, hn_tso_maxlen, ifp->if_mtu); 1421 #if __FreeBSD_version >= 1100099 1422 if (sc->hn_rx_ring[0].hn_lro.lro_length_lim < HN_LRO_LENLIM_MIN(ifp)) 1423 hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MIN(ifp)); 1424 #endif 1425 } 1426 1427 static uint32_t 1428 hn_rss_type_fromndis(uint32_t rss_hash) 1429 { 1430 uint32_t types = 0; 1431 1432 if (rss_hash & NDIS_HASH_IPV4) 1433 types |= RSS_TYPE_IPV4; 1434 if (rss_hash & NDIS_HASH_TCP_IPV4) 1435 types |= RSS_TYPE_TCP_IPV4; 1436 if (rss_hash & NDIS_HASH_IPV6) 1437 types |= RSS_TYPE_IPV6; 1438 if (rss_hash & NDIS_HASH_IPV6_EX) 1439 types |= RSS_TYPE_IPV6_EX; 1440 if (rss_hash & NDIS_HASH_TCP_IPV6) 1441 types |= RSS_TYPE_TCP_IPV6; 1442 if (rss_hash & NDIS_HASH_TCP_IPV6_EX) 1443 types |= RSS_TYPE_TCP_IPV6_EX; 1444 if (rss_hash & NDIS_HASH_UDP_IPV4_X) 1445 types |= RSS_TYPE_UDP_IPV4; 1446 return (types); 1447 } 1448 1449 static uint32_t 1450 hn_rss_type_tondis(uint32_t types) 1451 { 1452 uint32_t rss_hash = 0; 1453 1454 KASSERT((types & (RSS_TYPE_UDP_IPV6 | RSS_TYPE_UDP_IPV6_EX)) == 0, 1455 ("UDP6 and UDP6EX are not supported")); 1456 1457 if (types & RSS_TYPE_IPV4) 1458 rss_hash |= NDIS_HASH_IPV4; 1459 if (types & RSS_TYPE_TCP_IPV4) 1460 rss_hash |= NDIS_HASH_TCP_IPV4; 1461 if (types & RSS_TYPE_IPV6) 1462 rss_hash |= NDIS_HASH_IPV6; 1463 if (types & RSS_TYPE_IPV6_EX) 1464 rss_hash |= NDIS_HASH_IPV6_EX; 1465 if (types & RSS_TYPE_TCP_IPV6) 1466 rss_hash |= NDIS_HASH_TCP_IPV6; 1467 if (types & RSS_TYPE_TCP_IPV6_EX) 1468 rss_hash |= NDIS_HASH_TCP_IPV6_EX; 1469 if (types & RSS_TYPE_UDP_IPV4) 1470 rss_hash |= NDIS_HASH_UDP_IPV4_X; 1471 return (rss_hash); 1472 } 1473 1474 static void 1475 hn_rss_mbuf_hash(struct hn_softc *sc, uint32_t mbuf_hash) 1476 { 1477 int i; 1478 1479 HN_LOCK_ASSERT(sc); 1480 1481 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 1482 sc->hn_rx_ring[i].hn_mbuf_hash = mbuf_hash; 1483 } 1484 1485 static void 1486 hn_vf_rss_fixup(struct hn_softc *sc, bool reconf) 1487 { 1488 struct ifnet *ifp, *vf_ifp; 1489 struct ifrsshash ifrh; 1490 struct ifrsskey ifrk; 1491 int error; 1492 uint32_t my_types, diff_types, mbuf_types = 0; 1493 1494 HN_LOCK_ASSERT(sc); 1495 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, 1496 ("%s: synthetic parts are not attached", sc->hn_ifp->if_xname)); 1497 1498 if (sc->hn_rx_ring_inuse == 1) { 1499 /* No RSS on synthetic parts; done. */ 1500 return; 1501 } 1502 if ((sc->hn_rss_hcap & NDIS_HASH_FUNCTION_TOEPLITZ) == 0) { 1503 /* Synthetic parts do not support Toeplitz; done. */ 1504 return; 1505 } 1506 1507 ifp = sc->hn_ifp; 1508 vf_ifp = sc->hn_vf_ifp; 1509 1510 /* 1511 * Extract VF's RSS key. Only 40 bytes key for Toeplitz is 1512 * supported. 1513 */ 1514 memset(&ifrk, 0, sizeof(ifrk)); 1515 strlcpy(ifrk.ifrk_name, vf_ifp->if_xname, sizeof(ifrk.ifrk_name)); 1516 error = vf_ifp->if_ioctl(vf_ifp, SIOCGIFRSSKEY, (caddr_t)&ifrk); 1517 if (error) { 1518 if_printf(ifp, "%s SIOCGIFRSSKEY failed: %d\n", 1519 vf_ifp->if_xname, error); 1520 goto done; 1521 } 1522 if (ifrk.ifrk_func != RSS_FUNC_TOEPLITZ) { 1523 if_printf(ifp, "%s RSS function %u is not Toeplitz\n", 1524 vf_ifp->if_xname, ifrk.ifrk_func); 1525 goto done; 1526 } 1527 if (ifrk.ifrk_keylen != NDIS_HASH_KEYSIZE_TOEPLITZ) { 1528 if_printf(ifp, "%s invalid RSS Toeplitz key length %d\n", 1529 vf_ifp->if_xname, ifrk.ifrk_keylen); 1530 goto done; 1531 } 1532 1533 /* 1534 * Extract VF's RSS hash. Only Toeplitz is supported. 1535 */ 1536 memset(&ifrh, 0, sizeof(ifrh)); 1537 strlcpy(ifrh.ifrh_name, vf_ifp->if_xname, sizeof(ifrh.ifrh_name)); 1538 error = vf_ifp->if_ioctl(vf_ifp, SIOCGIFRSSHASH, (caddr_t)&ifrh); 1539 if (error) { 1540 if_printf(ifp, "%s SIOCGRSSHASH failed: %d\n", 1541 vf_ifp->if_xname, error); 1542 goto done; 1543 } 1544 if (ifrh.ifrh_func != RSS_FUNC_TOEPLITZ) { 1545 if_printf(ifp, "%s RSS function %u is not Toeplitz\n", 1546 vf_ifp->if_xname, ifrh.ifrh_func); 1547 goto done; 1548 } 1549 1550 my_types = hn_rss_type_fromndis(sc->hn_rss_hcap); 1551 if ((ifrh.ifrh_types & my_types) == 0) { 1552 /* This disables RSS; ignore it then */ 1553 if_printf(ifp, "%s intersection of RSS types failed. " 1554 "VF %#x, mine %#x\n", vf_ifp->if_xname, 1555 ifrh.ifrh_types, my_types); 1556 goto done; 1557 } 1558 1559 diff_types = my_types ^ ifrh.ifrh_types; 1560 my_types &= ifrh.ifrh_types; 1561 mbuf_types = my_types; 1562 1563 /* 1564 * Detect RSS hash value/type confliction. 1565 * 1566 * NOTE: 1567 * We don't disable the hash type, but stop delivery the hash 1568 * value/type through mbufs on RX path. 1569 * 1570 * XXX If HN_CAP_UDPHASH is set in hn_caps, then UDP 4-tuple 1571 * hash is delivered with type of TCP_IPV4. This means if 1572 * UDP_IPV4 is enabled, then TCP_IPV4 should be forced, at 1573 * least to hn_mbuf_hash. However, given that _all_ of the 1574 * NICs implement TCP_IPV4, this will _not_ impose any issues 1575 * here. 1576 */ 1577 if ((my_types & RSS_TYPE_IPV4) && 1578 (diff_types & ifrh.ifrh_types & 1579 (RSS_TYPE_TCP_IPV4 | RSS_TYPE_UDP_IPV4))) { 1580 /* Conflict; disable IPV4 hash type/value delivery. */ 1581 if_printf(ifp, "disable IPV4 mbuf hash delivery\n"); 1582 mbuf_types &= ~RSS_TYPE_IPV4; 1583 } 1584 if ((my_types & RSS_TYPE_IPV6) && 1585 (diff_types & ifrh.ifrh_types & 1586 (RSS_TYPE_TCP_IPV6 | RSS_TYPE_UDP_IPV6 | 1587 RSS_TYPE_TCP_IPV6_EX | RSS_TYPE_UDP_IPV6_EX | 1588 RSS_TYPE_IPV6_EX))) { 1589 /* Conflict; disable IPV6 hash type/value delivery. */ 1590 if_printf(ifp, "disable IPV6 mbuf hash delivery\n"); 1591 mbuf_types &= ~RSS_TYPE_IPV6; 1592 } 1593 if ((my_types & RSS_TYPE_IPV6_EX) && 1594 (diff_types & ifrh.ifrh_types & 1595 (RSS_TYPE_TCP_IPV6 | RSS_TYPE_UDP_IPV6 | 1596 RSS_TYPE_TCP_IPV6_EX | RSS_TYPE_UDP_IPV6_EX | 1597 RSS_TYPE_IPV6))) { 1598 /* Conflict; disable IPV6_EX hash type/value delivery. */ 1599 if_printf(ifp, "disable IPV6_EX mbuf hash delivery\n"); 1600 mbuf_types &= ~RSS_TYPE_IPV6_EX; 1601 } 1602 if ((my_types & RSS_TYPE_TCP_IPV6) && 1603 (diff_types & ifrh.ifrh_types & RSS_TYPE_TCP_IPV6_EX)) { 1604 /* Conflict; disable TCP_IPV6 hash type/value delivery. */ 1605 if_printf(ifp, "disable TCP_IPV6 mbuf hash delivery\n"); 1606 mbuf_types &= ~RSS_TYPE_TCP_IPV6; 1607 } 1608 if ((my_types & RSS_TYPE_TCP_IPV6_EX) && 1609 (diff_types & ifrh.ifrh_types & RSS_TYPE_TCP_IPV6)) { 1610 /* Conflict; disable TCP_IPV6_EX hash type/value delivery. */ 1611 if_printf(ifp, "disable TCP_IPV6_EX mbuf hash delivery\n"); 1612 mbuf_types &= ~RSS_TYPE_TCP_IPV6_EX; 1613 } 1614 if ((my_types & RSS_TYPE_UDP_IPV6) && 1615 (diff_types & ifrh.ifrh_types & RSS_TYPE_UDP_IPV6_EX)) { 1616 /* Conflict; disable UDP_IPV6 hash type/value delivery. */ 1617 if_printf(ifp, "disable UDP_IPV6 mbuf hash delivery\n"); 1618 mbuf_types &= ~RSS_TYPE_UDP_IPV6; 1619 } 1620 if ((my_types & RSS_TYPE_UDP_IPV6_EX) && 1621 (diff_types & ifrh.ifrh_types & RSS_TYPE_UDP_IPV6)) { 1622 /* Conflict; disable UDP_IPV6_EX hash type/value delivery. */ 1623 if_printf(ifp, "disable UDP_IPV6_EX mbuf hash delivery\n"); 1624 mbuf_types &= ~RSS_TYPE_UDP_IPV6_EX; 1625 } 1626 1627 /* 1628 * Indirect table does not matter. 1629 */ 1630 1631 sc->hn_rss_hash = (sc->hn_rss_hcap & NDIS_HASH_FUNCTION_MASK) | 1632 hn_rss_type_tondis(my_types); 1633 memcpy(sc->hn_rss.rss_key, ifrk.ifrk_key, sizeof(sc->hn_rss.rss_key)); 1634 sc->hn_flags |= HN_FLAG_HAS_RSSKEY; 1635 1636 if (reconf) { 1637 error = hn_rss_reconfig(sc); 1638 if (error) { 1639 /* XXX roll-back? */ 1640 if_printf(ifp, "hn_rss_reconfig failed: %d\n", error); 1641 /* XXX keep going. */ 1642 } 1643 } 1644 done: 1645 /* Hash deliverability for mbufs. */ 1646 hn_rss_mbuf_hash(sc, hn_rss_type_tondis(mbuf_types)); 1647 } 1648 1649 static void 1650 hn_vf_rss_restore(struct hn_softc *sc) 1651 { 1652 1653 HN_LOCK_ASSERT(sc); 1654 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, 1655 ("%s: synthetic parts are not attached", sc->hn_ifp->if_xname)); 1656 1657 if (sc->hn_rx_ring_inuse == 1) 1658 goto done; 1659 1660 /* 1661 * Restore hash types. Key does _not_ matter. 1662 */ 1663 if (sc->hn_rss_hash != sc->hn_rss_hcap) { 1664 int error; 1665 1666 sc->hn_rss_hash = sc->hn_rss_hcap; 1667 error = hn_rss_reconfig(sc); 1668 if (error) { 1669 if_printf(sc->hn_ifp, "hn_rss_reconfig failed: %d\n", 1670 error); 1671 /* XXX keep going. */ 1672 } 1673 } 1674 done: 1675 /* Hash deliverability for mbufs. */ 1676 hn_rss_mbuf_hash(sc, NDIS_HASH_ALL); 1677 } 1678 1679 static void 1680 hn_xpnt_vf_setready(struct hn_softc *sc) 1681 { 1682 struct ifnet *ifp, *vf_ifp; 1683 struct ifreq ifr; 1684 1685 HN_LOCK_ASSERT(sc); 1686 ifp = sc->hn_ifp; 1687 vf_ifp = sc->hn_vf_ifp; 1688 1689 /* 1690 * Mark the VF ready. 1691 */ 1692 sc->hn_vf_rdytick = 0; 1693 1694 /* 1695 * Save information for restoration. 1696 */ 1697 sc->hn_saved_caps = ifp->if_capabilities; 1698 sc->hn_saved_tsomax = ifp->if_hw_tsomax; 1699 sc->hn_saved_tsosegcnt = ifp->if_hw_tsomaxsegcount; 1700 sc->hn_saved_tsosegsz = ifp->if_hw_tsomaxsegsize; 1701 1702 /* 1703 * Intersect supported/enabled capabilities. 1704 * 1705 * NOTE: 1706 * if_hwassist is not changed here. 1707 */ 1708 ifp->if_capabilities &= vf_ifp->if_capabilities; 1709 ifp->if_capenable &= ifp->if_capabilities; 1710 1711 /* 1712 * Fix TSO settings. 1713 */ 1714 if (ifp->if_hw_tsomax > vf_ifp->if_hw_tsomax) 1715 ifp->if_hw_tsomax = vf_ifp->if_hw_tsomax; 1716 if (ifp->if_hw_tsomaxsegcount > vf_ifp->if_hw_tsomaxsegcount) 1717 ifp->if_hw_tsomaxsegcount = vf_ifp->if_hw_tsomaxsegcount; 1718 if (ifp->if_hw_tsomaxsegsize > vf_ifp->if_hw_tsomaxsegsize) 1719 ifp->if_hw_tsomaxsegsize = vf_ifp->if_hw_tsomaxsegsize; 1720 1721 /* 1722 * Change VF's enabled capabilities. 1723 */ 1724 memset(&ifr, 0, sizeof(ifr)); 1725 strlcpy(ifr.ifr_name, vf_ifp->if_xname, sizeof(ifr.ifr_name)); 1726 ifr.ifr_reqcap = ifp->if_capenable; 1727 hn_xpnt_vf_iocsetcaps(sc, &ifr); 1728 1729 if (ifp->if_mtu != ETHERMTU) { 1730 int error; 1731 1732 /* 1733 * Change VF's MTU. 1734 */ 1735 memset(&ifr, 0, sizeof(ifr)); 1736 strlcpy(ifr.ifr_name, vf_ifp->if_xname, sizeof(ifr.ifr_name)); 1737 ifr.ifr_mtu = ifp->if_mtu; 1738 error = vf_ifp->if_ioctl(vf_ifp, SIOCSIFMTU, (caddr_t)&ifr); 1739 if (error) { 1740 if_printf(ifp, "%s SIOCSIFMTU %u failed\n", 1741 vf_ifp->if_xname, ifp->if_mtu); 1742 if (ifp->if_mtu > ETHERMTU) { 1743 if_printf(ifp, "change MTU to %d\n", ETHERMTU); 1744 1745 /* 1746 * XXX 1747 * No need to adjust the synthetic parts' MTU; 1748 * failure of the adjustment will cause us 1749 * infinite headache. 1750 */ 1751 ifp->if_mtu = ETHERMTU; 1752 hn_mtu_change_fixup(sc); 1753 } 1754 } 1755 } 1756 } 1757 1758 static bool 1759 hn_xpnt_vf_isready(struct hn_softc *sc) 1760 { 1761 1762 HN_LOCK_ASSERT(sc); 1763 1764 if (!hn_xpnt_vf || sc->hn_vf_ifp == NULL) 1765 return (false); 1766 1767 if (sc->hn_vf_rdytick == 0) 1768 return (true); 1769 1770 if (sc->hn_vf_rdytick > ticks) 1771 return (false); 1772 1773 /* Mark VF as ready. */ 1774 hn_xpnt_vf_setready(sc); 1775 return (true); 1776 } 1777 1778 static void 1779 hn_xpnt_vf_setenable(struct hn_softc *sc) 1780 { 1781 int i; 1782 1783 HN_LOCK_ASSERT(sc); 1784 1785 /* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */ 1786 rm_wlock(&sc->hn_vf_lock); 1787 sc->hn_xvf_flags |= HN_XVFFLAG_ENABLED; 1788 rm_wunlock(&sc->hn_vf_lock); 1789 1790 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 1791 sc->hn_rx_ring[i].hn_rx_flags |= HN_RX_FLAG_XPNT_VF; 1792 } 1793 1794 static void 1795 hn_xpnt_vf_setdisable(struct hn_softc *sc, bool clear_vf) 1796 { 1797 int i; 1798 1799 HN_LOCK_ASSERT(sc); 1800 1801 /* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */ 1802 rm_wlock(&sc->hn_vf_lock); 1803 sc->hn_xvf_flags &= ~HN_XVFFLAG_ENABLED; 1804 if (clear_vf) 1805 sc->hn_vf_ifp = NULL; 1806 rm_wunlock(&sc->hn_vf_lock); 1807 1808 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 1809 sc->hn_rx_ring[i].hn_rx_flags &= ~HN_RX_FLAG_XPNT_VF; 1810 } 1811 1812 static void 1813 hn_xpnt_vf_init(struct hn_softc *sc) 1814 { 1815 int error; 1816 1817 HN_LOCK_ASSERT(sc); 1818 1819 KASSERT((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) == 0, 1820 ("%s: transparent VF was enabled", sc->hn_ifp->if_xname)); 1821 1822 if (bootverbose) { 1823 if_printf(sc->hn_ifp, "try bringing up %s\n", 1824 sc->hn_vf_ifp->if_xname); 1825 } 1826 1827 /* 1828 * Bring the VF up. 1829 */ 1830 hn_xpnt_vf_saveifflags(sc); 1831 sc->hn_vf_ifp->if_flags |= IFF_UP; 1832 error = hn_xpnt_vf_iocsetflags(sc); 1833 if (error) { 1834 if_printf(sc->hn_ifp, "bringing up %s failed: %d\n", 1835 sc->hn_vf_ifp->if_xname, error); 1836 return; 1837 } 1838 1839 /* 1840 * NOTE: 1841 * Datapath setting must happen _after_ bringing the VF up. 1842 */ 1843 hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_VF); 1844 1845 /* 1846 * NOTE: 1847 * Fixup RSS related bits _after_ the VF is brought up, since 1848 * many VFs generate RSS key during it's initialization. 1849 */ 1850 hn_vf_rss_fixup(sc, true); 1851 1852 /* Mark transparent mode VF as enabled. */ 1853 hn_xpnt_vf_setenable(sc); 1854 } 1855 1856 static void 1857 hn_xpnt_vf_init_taskfunc(void *xsc, int pending __unused) 1858 { 1859 struct hn_softc *sc = xsc; 1860 1861 HN_LOCK(sc); 1862 1863 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) 1864 goto done; 1865 if (sc->hn_vf_ifp == NULL) 1866 goto done; 1867 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) 1868 goto done; 1869 1870 if (sc->hn_vf_rdytick != 0) { 1871 /* Mark VF as ready. */ 1872 hn_xpnt_vf_setready(sc); 1873 } 1874 1875 if (sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) { 1876 /* 1877 * Delayed VF initialization. 1878 */ 1879 if (bootverbose) { 1880 if_printf(sc->hn_ifp, "delayed initialize %s\n", 1881 sc->hn_vf_ifp->if_xname); 1882 } 1883 hn_xpnt_vf_init(sc); 1884 } 1885 done: 1886 HN_UNLOCK(sc); 1887 } 1888 1889 static void 1890 hn_ifnet_attevent(void *xsc, struct ifnet *ifp) 1891 { 1892 struct hn_softc *sc = xsc; 1893 1894 HN_LOCK(sc); 1895 1896 if (!(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED)) 1897 goto done; 1898 1899 if (!hn_ismyvf(sc, ifp)) 1900 goto done; 1901 1902 if (sc->hn_vf_ifp != NULL) { 1903 if_printf(sc->hn_ifp, "%s was attached as VF\n", 1904 sc->hn_vf_ifp->if_xname); 1905 goto done; 1906 } 1907 1908 if (hn_xpnt_vf && ifp->if_start != NULL) { 1909 /* 1910 * ifnet.if_start is _not_ supported by transparent 1911 * mode VF; mainly due to the IFF_DRV_OACTIVE flag. 1912 */ 1913 if_printf(sc->hn_ifp, "%s uses if_start, which is unsupported " 1914 "in transparent VF mode.\n", ifp->if_xname); 1915 goto done; 1916 } 1917 1918 rm_wlock(&hn_vfmap_lock); 1919 1920 if (ifp->if_index >= hn_vfmap_size) { 1921 struct ifnet **newmap; 1922 int newsize; 1923 1924 newsize = ifp->if_index + HN_VFMAP_SIZE_DEF; 1925 newmap = malloc(sizeof(struct ifnet *) * newsize, M_DEVBUF, 1926 M_WAITOK | M_ZERO); 1927 1928 memcpy(newmap, hn_vfmap, 1929 sizeof(struct ifnet *) * hn_vfmap_size); 1930 free(hn_vfmap, M_DEVBUF); 1931 hn_vfmap = newmap; 1932 hn_vfmap_size = newsize; 1933 } 1934 KASSERT(hn_vfmap[ifp->if_index] == NULL, 1935 ("%s: ifindex %d was mapped to %s", 1936 ifp->if_xname, ifp->if_index, hn_vfmap[ifp->if_index]->if_xname)); 1937 hn_vfmap[ifp->if_index] = sc->hn_ifp; 1938 1939 rm_wunlock(&hn_vfmap_lock); 1940 1941 /* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */ 1942 rm_wlock(&sc->hn_vf_lock); 1943 KASSERT((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) == 0, 1944 ("%s: transparent VF was enabled", sc->hn_ifp->if_xname)); 1945 sc->hn_vf_ifp = ifp; 1946 rm_wunlock(&sc->hn_vf_lock); 1947 1948 if (hn_xpnt_vf) { 1949 int wait_ticks; 1950 1951 /* 1952 * Install if_input for vf_ifp, which does vf_ifp -> hn_ifp. 1953 * Save vf_ifp's current if_input for later restoration. 1954 */ 1955 sc->hn_vf_input = ifp->if_input; 1956 ifp->if_input = hn_xpnt_vf_input; 1957 1958 /* 1959 * Stop link status management; use the VF's. 1960 */ 1961 hn_suspend_mgmt(sc); 1962 1963 /* 1964 * Give VF sometime to complete its attach routing. 1965 */ 1966 wait_ticks = hn_xpnt_vf_attwait * hz; 1967 sc->hn_vf_rdytick = ticks + wait_ticks; 1968 1969 taskqueue_enqueue_timeout(sc->hn_vf_taskq, &sc->hn_vf_init, 1970 wait_ticks); 1971 } 1972 done: 1973 HN_UNLOCK(sc); 1974 } 1975 1976 static void 1977 hn_ifnet_detevent(void *xsc, struct ifnet *ifp) 1978 { 1979 struct hn_softc *sc = xsc; 1980 1981 HN_LOCK(sc); 1982 1983 if (sc->hn_vf_ifp == NULL) 1984 goto done; 1985 1986 if (!hn_ismyvf(sc, ifp)) 1987 goto done; 1988 1989 if (hn_xpnt_vf) { 1990 /* 1991 * Make sure that the delayed initialization is not running. 1992 * 1993 * NOTE: 1994 * - This lock _must_ be released, since the hn_vf_init task 1995 * will try holding this lock. 1996 * - It is safe to release this lock here, since the 1997 * hn_ifnet_attevent() is interlocked by the hn_vf_ifp. 1998 * 1999 * XXX racy, if hn(4) ever detached. 2000 */ 2001 HN_UNLOCK(sc); 2002 taskqueue_drain_timeout(sc->hn_vf_taskq, &sc->hn_vf_init); 2003 HN_LOCK(sc); 2004 2005 KASSERT(sc->hn_vf_input != NULL, ("%s VF input is not saved", 2006 sc->hn_ifp->if_xname)); 2007 ifp->if_input = sc->hn_vf_input; 2008 sc->hn_vf_input = NULL; 2009 2010 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) && 2011 (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) 2012 hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_SYNTH); 2013 2014 if (sc->hn_vf_rdytick == 0) { 2015 /* 2016 * The VF was ready; restore some settings. 2017 */ 2018 sc->hn_ifp->if_capabilities = sc->hn_saved_caps; 2019 /* 2020 * NOTE: 2021 * There is _no_ need to fixup if_capenable and 2022 * if_hwassist, since the if_capabilities before 2023 * restoration was an intersection of the VF's 2024 * if_capabilites and the synthetic device's 2025 * if_capabilites. 2026 */ 2027 sc->hn_ifp->if_hw_tsomax = sc->hn_saved_tsomax; 2028 sc->hn_ifp->if_hw_tsomaxsegcount = 2029 sc->hn_saved_tsosegcnt; 2030 sc->hn_ifp->if_hw_tsomaxsegsize = sc->hn_saved_tsosegsz; 2031 } 2032 2033 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) { 2034 /* 2035 * Restore RSS settings. 2036 */ 2037 hn_vf_rss_restore(sc); 2038 2039 /* 2040 * Resume link status management, which was suspended 2041 * by hn_ifnet_attevent(). 2042 */ 2043 hn_resume_mgmt(sc); 2044 } 2045 } 2046 2047 /* Mark transparent mode VF as disabled. */ 2048 hn_xpnt_vf_setdisable(sc, true /* clear hn_vf_ifp */); 2049 2050 rm_wlock(&hn_vfmap_lock); 2051 2052 KASSERT(ifp->if_index < hn_vfmap_size, 2053 ("ifindex %d, vfmapsize %d", ifp->if_index, hn_vfmap_size)); 2054 if (hn_vfmap[ifp->if_index] != NULL) { 2055 KASSERT(hn_vfmap[ifp->if_index] == sc->hn_ifp, 2056 ("%s: ifindex %d was mapped to %s", 2057 ifp->if_xname, ifp->if_index, 2058 hn_vfmap[ifp->if_index]->if_xname)); 2059 hn_vfmap[ifp->if_index] = NULL; 2060 } 2061 2062 rm_wunlock(&hn_vfmap_lock); 2063 done: 2064 HN_UNLOCK(sc); 2065 } 2066 2067 static void 2068 hn_ifnet_lnkevent(void *xsc, struct ifnet *ifp, int link_state) 2069 { 2070 struct hn_softc *sc = xsc; 2071 2072 if (sc->hn_vf_ifp == ifp) 2073 if_link_state_change(sc->hn_ifp, link_state); 2074 } 2075 2076 static int 2077 hn_probe(device_t dev) 2078 { 2079 2080 if (VMBUS_PROBE_GUID(device_get_parent(dev), dev, &hn_guid) == 0) { 2081 device_set_desc(dev, "Hyper-V Network Interface"); 2082 return BUS_PROBE_DEFAULT; 2083 } 2084 return ENXIO; 2085 } 2086 2087 static int 2088 hn_attach(device_t dev) 2089 { 2090 struct hn_softc *sc = device_get_softc(dev); 2091 struct sysctl_oid_list *child; 2092 struct sysctl_ctx_list *ctx; 2093 uint8_t eaddr[ETHER_ADDR_LEN]; 2094 struct ifnet *ifp = NULL; 2095 int error, ring_cnt, tx_ring_cnt; 2096 uint32_t mtu; 2097 2098 sc->hn_dev = dev; 2099 sc->hn_prichan = vmbus_get_channel(dev); 2100 HN_LOCK_INIT(sc); 2101 rm_init(&sc->hn_vf_lock, "hnvf"); 2102 if (hn_xpnt_vf && hn_xpnt_vf_accbpf) 2103 sc->hn_xvf_flags |= HN_XVFFLAG_ACCBPF; 2104 2105 /* 2106 * Initialize these tunables once. 2107 */ 2108 sc->hn_agg_size = hn_tx_agg_size; 2109 sc->hn_agg_pkts = hn_tx_agg_pkts; 2110 2111 /* 2112 * Setup taskqueue for transmission. 2113 */ 2114 if (hn_tx_taskq_mode == HN_TX_TASKQ_M_INDEP) { 2115 int i; 2116 2117 sc->hn_tx_taskqs = 2118 malloc(hn_tx_taskq_cnt * sizeof(struct taskqueue *), 2119 M_DEVBUF, M_WAITOK); 2120 for (i = 0; i < hn_tx_taskq_cnt; ++i) { 2121 sc->hn_tx_taskqs[i] = taskqueue_create("hn_tx", 2122 M_WAITOK, taskqueue_thread_enqueue, 2123 &sc->hn_tx_taskqs[i]); 2124 taskqueue_start_threads(&sc->hn_tx_taskqs[i], 1, PI_NET, 2125 "%s tx%d", device_get_nameunit(dev), i); 2126 } 2127 } else if (hn_tx_taskq_mode == HN_TX_TASKQ_M_GLOBAL) { 2128 sc->hn_tx_taskqs = hn_tx_taskque; 2129 } 2130 2131 /* 2132 * Setup taskqueue for mangement tasks, e.g. link status. 2133 */ 2134 sc->hn_mgmt_taskq0 = taskqueue_create("hn_mgmt", M_WAITOK, 2135 taskqueue_thread_enqueue, &sc->hn_mgmt_taskq0); 2136 taskqueue_start_threads(&sc->hn_mgmt_taskq0, 1, PI_NET, "%s mgmt", 2137 device_get_nameunit(dev)); 2138 TASK_INIT(&sc->hn_link_task, 0, hn_link_taskfunc, sc); 2139 TASK_INIT(&sc->hn_netchg_init, 0, hn_netchg_init_taskfunc, sc); 2140 TIMEOUT_TASK_INIT(sc->hn_mgmt_taskq0, &sc->hn_netchg_status, 0, 2141 hn_netchg_status_taskfunc, sc); 2142 2143 if (hn_xpnt_vf) { 2144 /* 2145 * Setup taskqueue for VF tasks, e.g. delayed VF bringing up. 2146 */ 2147 sc->hn_vf_taskq = taskqueue_create("hn_vf", M_WAITOK, 2148 taskqueue_thread_enqueue, &sc->hn_vf_taskq); 2149 taskqueue_start_threads(&sc->hn_vf_taskq, 1, PI_NET, "%s vf", 2150 device_get_nameunit(dev)); 2151 TIMEOUT_TASK_INIT(sc->hn_vf_taskq, &sc->hn_vf_init, 0, 2152 hn_xpnt_vf_init_taskfunc, sc); 2153 } 2154 2155 /* 2156 * Allocate ifnet and setup its name earlier, so that if_printf 2157 * can be used by functions, which will be called after 2158 * ether_ifattach(). 2159 */ 2160 ifp = sc->hn_ifp = if_alloc(IFT_ETHER); 2161 ifp->if_softc = sc; 2162 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 2163 2164 /* 2165 * Initialize ifmedia earlier so that it can be unconditionally 2166 * destroyed, if error happened later on. 2167 */ 2168 ifmedia_init(&sc->hn_media, 0, hn_ifmedia_upd, hn_ifmedia_sts); 2169 2170 /* 2171 * Figure out the # of RX rings (ring_cnt) and the # of TX rings 2172 * to use (tx_ring_cnt). 2173 * 2174 * NOTE: 2175 * The # of RX rings to use is same as the # of channels to use. 2176 */ 2177 ring_cnt = hn_chan_cnt; 2178 if (ring_cnt <= 0) { 2179 /* Default */ 2180 ring_cnt = mp_ncpus; 2181 if (ring_cnt > HN_RING_CNT_DEF_MAX) 2182 ring_cnt = HN_RING_CNT_DEF_MAX; 2183 } else if (ring_cnt > mp_ncpus) { 2184 ring_cnt = mp_ncpus; 2185 } 2186 #ifdef RSS 2187 if (ring_cnt > rss_getnumbuckets()) 2188 ring_cnt = rss_getnumbuckets(); 2189 #endif 2190 2191 tx_ring_cnt = hn_tx_ring_cnt; 2192 if (tx_ring_cnt <= 0 || tx_ring_cnt > ring_cnt) 2193 tx_ring_cnt = ring_cnt; 2194 #ifdef HN_IFSTART_SUPPORT 2195 if (hn_use_if_start) { 2196 /* ifnet.if_start only needs one TX ring. */ 2197 tx_ring_cnt = 1; 2198 } 2199 #endif 2200 2201 /* 2202 * Set the leader CPU for channels. 2203 */ 2204 sc->hn_cpu = atomic_fetchadd_int(&hn_cpu_index, ring_cnt) % mp_ncpus; 2205 2206 /* 2207 * Create enough TX/RX rings, even if only limited number of 2208 * channels can be allocated. 2209 */ 2210 error = hn_create_tx_data(sc, tx_ring_cnt); 2211 if (error) 2212 goto failed; 2213 error = hn_create_rx_data(sc, ring_cnt); 2214 if (error) 2215 goto failed; 2216 2217 /* 2218 * Create transaction context for NVS and RNDIS transactions. 2219 */ 2220 sc->hn_xact = vmbus_xact_ctx_create(bus_get_dma_tag(dev), 2221 HN_XACT_REQ_SIZE, HN_XACT_RESP_SIZE, 0); 2222 if (sc->hn_xact == NULL) { 2223 error = ENXIO; 2224 goto failed; 2225 } 2226 2227 /* 2228 * Install orphan handler for the revocation of this device's 2229 * primary channel. 2230 * 2231 * NOTE: 2232 * The processing order is critical here: 2233 * Install the orphan handler, _before_ testing whether this 2234 * device's primary channel has been revoked or not. 2235 */ 2236 vmbus_chan_set_orphan(sc->hn_prichan, sc->hn_xact); 2237 if (vmbus_chan_is_revoked(sc->hn_prichan)) { 2238 error = ENXIO; 2239 goto failed; 2240 } 2241 2242 /* 2243 * Attach the synthetic parts, i.e. NVS and RNDIS. 2244 */ 2245 error = hn_synth_attach(sc, ETHERMTU); 2246 if (error) 2247 goto failed; 2248 2249 error = hn_rndis_get_eaddr(sc, eaddr); 2250 if (error) 2251 goto failed; 2252 2253 error = hn_rndis_get_mtu(sc, &mtu); 2254 if (error) 2255 mtu = ETHERMTU; 2256 else if (bootverbose) 2257 device_printf(dev, "RNDIS mtu %u\n", mtu); 2258 2259 #if __FreeBSD_version >= 1100099 2260 if (sc->hn_rx_ring_inuse > 1) { 2261 /* 2262 * Reduce TCP segment aggregation limit for multiple 2263 * RX rings to increase ACK timeliness. 2264 */ 2265 hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MULTIRX_DEF); 2266 } 2267 #endif 2268 2269 /* 2270 * Fixup TX/RX stuffs after synthetic parts are attached. 2271 */ 2272 hn_fixup_tx_data(sc); 2273 hn_fixup_rx_data(sc); 2274 2275 ctx = device_get_sysctl_ctx(dev); 2276 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); 2277 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "nvs_version", CTLFLAG_RD, 2278 &sc->hn_nvs_ver, 0, "NVS version"); 2279 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "ndis_version", 2280 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2281 hn_ndis_version_sysctl, "A", "NDIS version"); 2282 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "caps", 2283 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2284 hn_caps_sysctl, "A", "capabilities"); 2285 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "hwassist", 2286 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2287 hn_hwassist_sysctl, "A", "hwassist"); 2288 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tso_max", 2289 CTLFLAG_RD, &ifp->if_hw_tsomax, 0, "max TSO size"); 2290 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tso_maxsegcnt", 2291 CTLFLAG_RD, &ifp->if_hw_tsomaxsegcount, 0, 2292 "max # of TSO segments"); 2293 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tso_maxsegsz", 2294 CTLFLAG_RD, &ifp->if_hw_tsomaxsegsize, 0, 2295 "max size of TSO segment"); 2296 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxfilter", 2297 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2298 hn_rxfilter_sysctl, "A", "rxfilter"); 2299 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_hash", 2300 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2301 hn_rss_hash_sysctl, "A", "RSS hash"); 2302 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_hashcap", 2303 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2304 hn_rss_hcap_sysctl, "A", "RSS hash capabilities"); 2305 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "mbuf_hash", 2306 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2307 hn_rss_mbuf_sysctl, "A", "RSS hash for mbufs"); 2308 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rss_ind_size", 2309 CTLFLAG_RD, &sc->hn_rss_ind_size, 0, "RSS indirect entry count"); 2310 #ifndef RSS 2311 /* 2312 * Don't allow RSS key/indirect table changes, if RSS is defined. 2313 */ 2314 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_key", 2315 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 2316 hn_rss_key_sysctl, "IU", "RSS key"); 2317 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_ind", 2318 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 2319 hn_rss_ind_sysctl, "IU", "RSS indirect table"); 2320 #endif 2321 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_size", 2322 CTLFLAG_RD, &sc->hn_rndis_agg_size, 0, 2323 "RNDIS offered packet transmission aggregation size limit"); 2324 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_pkts", 2325 CTLFLAG_RD, &sc->hn_rndis_agg_pkts, 0, 2326 "RNDIS offered packet transmission aggregation count limit"); 2327 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_align", 2328 CTLFLAG_RD, &sc->hn_rndis_agg_align, 0, 2329 "RNDIS packet transmission aggregation alignment"); 2330 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_size", 2331 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 2332 hn_txagg_size_sysctl, "I", 2333 "Packet transmission aggregation size, 0 -- disable, -1 -- auto"); 2334 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pkts", 2335 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 2336 hn_txagg_pkts_sysctl, "I", 2337 "Packet transmission aggregation packets, " 2338 "0 -- disable, -1 -- auto"); 2339 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "polling", 2340 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 2341 hn_polling_sysctl, "I", 2342 "Polling frequency: [100,1000000], 0 disable polling"); 2343 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf", 2344 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2345 hn_vf_sysctl, "A", "Virtual Function's name"); 2346 if (!hn_xpnt_vf) { 2347 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxvf", 2348 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2349 hn_rxvf_sysctl, "A", "activated Virtual Function's name"); 2350 } else { 2351 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf_xpnt_enabled", 2352 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 2353 hn_xpnt_vf_enabled_sysctl, "I", 2354 "Transparent VF enabled"); 2355 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf_xpnt_accbpf", 2356 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 2357 hn_xpnt_vf_accbpf_sysctl, "I", 2358 "Accurate BPF for transparent VF"); 2359 } 2360 2361 /* 2362 * Setup the ifmedia, which has been initialized earlier. 2363 */ 2364 ifmedia_add(&sc->hn_media, IFM_ETHER | IFM_AUTO, 0, NULL); 2365 ifmedia_set(&sc->hn_media, IFM_ETHER | IFM_AUTO); 2366 /* XXX ifmedia_set really should do this for us */ 2367 sc->hn_media.ifm_media = sc->hn_media.ifm_cur->ifm_media; 2368 2369 /* 2370 * Setup the ifnet for this interface. 2371 */ 2372 2373 ifp->if_baudrate = IF_Gbps(10); 2374 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 2375 ifp->if_ioctl = hn_ioctl; 2376 ifp->if_init = hn_init; 2377 #ifdef HN_IFSTART_SUPPORT 2378 if (hn_use_if_start) { 2379 int qdepth = hn_get_txswq_depth(&sc->hn_tx_ring[0]); 2380 2381 ifp->if_start = hn_start; 2382 IFQ_SET_MAXLEN(&ifp->if_snd, qdepth); 2383 ifp->if_snd.ifq_drv_maxlen = qdepth - 1; 2384 IFQ_SET_READY(&ifp->if_snd); 2385 } else 2386 #endif 2387 { 2388 ifp->if_transmit = hn_transmit; 2389 ifp->if_qflush = hn_xmit_qflush; 2390 } 2391 2392 ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_LRO | IFCAP_LINKSTATE; 2393 #ifdef foo 2394 /* We can't diff IPv6 packets from IPv4 packets on RX path. */ 2395 ifp->if_capabilities |= IFCAP_RXCSUM_IPV6; 2396 #endif 2397 if (sc->hn_caps & HN_CAP_VLAN) { 2398 /* XXX not sure about VLAN_MTU. */ 2399 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU; 2400 } 2401 2402 ifp->if_hwassist = sc->hn_tx_ring[0].hn_csum_assist; 2403 if (ifp->if_hwassist & HN_CSUM_IP_MASK) 2404 ifp->if_capabilities |= IFCAP_TXCSUM; 2405 if (ifp->if_hwassist & HN_CSUM_IP6_MASK) 2406 ifp->if_capabilities |= IFCAP_TXCSUM_IPV6; 2407 if (sc->hn_caps & HN_CAP_TSO4) { 2408 ifp->if_capabilities |= IFCAP_TSO4; 2409 ifp->if_hwassist |= CSUM_IP_TSO; 2410 } 2411 if (sc->hn_caps & HN_CAP_TSO6) { 2412 ifp->if_capabilities |= IFCAP_TSO6; 2413 ifp->if_hwassist |= CSUM_IP6_TSO; 2414 } 2415 2416 /* Enable all available capabilities by default. */ 2417 ifp->if_capenable = ifp->if_capabilities; 2418 2419 /* 2420 * Disable IPv6 TSO and TXCSUM by default, they still can 2421 * be enabled through SIOCSIFCAP. 2422 */ 2423 ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6 | IFCAP_TSO6); 2424 ifp->if_hwassist &= ~(HN_CSUM_IP6_MASK | CSUM_IP6_TSO); 2425 2426 if (ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) { 2427 /* 2428 * Lock hn_set_tso_maxsize() to simplify its 2429 * internal logic. 2430 */ 2431 HN_LOCK(sc); 2432 hn_set_tso_maxsize(sc, hn_tso_maxlen, ETHERMTU); 2433 HN_UNLOCK(sc); 2434 ifp->if_hw_tsomaxsegcount = HN_TX_DATA_SEGCNT_MAX; 2435 ifp->if_hw_tsomaxsegsize = PAGE_SIZE; 2436 } 2437 2438 ether_ifattach(ifp, eaddr); 2439 2440 if ((ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) && bootverbose) { 2441 if_printf(ifp, "TSO segcnt %u segsz %u\n", 2442 ifp->if_hw_tsomaxsegcount, ifp->if_hw_tsomaxsegsize); 2443 } 2444 if (mtu < ETHERMTU) { 2445 if_printf(ifp, "fixup mtu %u -> %u\n", ifp->if_mtu, mtu); 2446 ifp->if_mtu = mtu; 2447 } 2448 2449 /* Inform the upper layer about the long frame support. */ 2450 ifp->if_hdrlen = sizeof(struct ether_vlan_header); 2451 2452 /* 2453 * Kick off link status check. 2454 */ 2455 sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0; 2456 hn_update_link_status(sc); 2457 2458 if (!hn_xpnt_vf) { 2459 sc->hn_ifnet_evthand = EVENTHANDLER_REGISTER(ifnet_event, 2460 hn_ifnet_event, sc, EVENTHANDLER_PRI_ANY); 2461 sc->hn_ifaddr_evthand = EVENTHANDLER_REGISTER(ifaddr_event, 2462 hn_ifaddr_event, sc, EVENTHANDLER_PRI_ANY); 2463 } else { 2464 sc->hn_ifnet_lnkhand = EVENTHANDLER_REGISTER(ifnet_link_event, 2465 hn_ifnet_lnkevent, sc, EVENTHANDLER_PRI_ANY); 2466 } 2467 2468 /* 2469 * NOTE: 2470 * Subscribe ether_ifattach event, instead of ifnet_arrival event, 2471 * since interface's LLADDR is needed; interface LLADDR is not 2472 * available when ifnet_arrival event is triggered. 2473 */ 2474 sc->hn_ifnet_atthand = EVENTHANDLER_REGISTER(ether_ifattach_event, 2475 hn_ifnet_attevent, sc, EVENTHANDLER_PRI_ANY); 2476 sc->hn_ifnet_dethand = EVENTHANDLER_REGISTER(ifnet_departure_event, 2477 hn_ifnet_detevent, sc, EVENTHANDLER_PRI_ANY); 2478 2479 return (0); 2480 failed: 2481 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) 2482 hn_synth_detach(sc); 2483 hn_detach(dev); 2484 return (error); 2485 } 2486 2487 static int 2488 hn_detach(device_t dev) 2489 { 2490 struct hn_softc *sc = device_get_softc(dev); 2491 struct ifnet *ifp = sc->hn_ifp, *vf_ifp; 2492 2493 if (sc->hn_xact != NULL && vmbus_chan_is_revoked(sc->hn_prichan)) { 2494 /* 2495 * In case that the vmbus missed the orphan handler 2496 * installation. 2497 */ 2498 vmbus_xact_ctx_orphan(sc->hn_xact); 2499 } 2500 2501 if (sc->hn_ifaddr_evthand != NULL) 2502 EVENTHANDLER_DEREGISTER(ifaddr_event, sc->hn_ifaddr_evthand); 2503 if (sc->hn_ifnet_evthand != NULL) 2504 EVENTHANDLER_DEREGISTER(ifnet_event, sc->hn_ifnet_evthand); 2505 if (sc->hn_ifnet_atthand != NULL) { 2506 EVENTHANDLER_DEREGISTER(ether_ifattach_event, 2507 sc->hn_ifnet_atthand); 2508 } 2509 if (sc->hn_ifnet_dethand != NULL) { 2510 EVENTHANDLER_DEREGISTER(ifnet_departure_event, 2511 sc->hn_ifnet_dethand); 2512 } 2513 if (sc->hn_ifnet_lnkhand != NULL) 2514 EVENTHANDLER_DEREGISTER(ifnet_link_event, sc->hn_ifnet_lnkhand); 2515 2516 vf_ifp = sc->hn_vf_ifp; 2517 __compiler_membar(); 2518 if (vf_ifp != NULL) 2519 hn_ifnet_detevent(sc, vf_ifp); 2520 2521 if (device_is_attached(dev)) { 2522 HN_LOCK(sc); 2523 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) { 2524 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 2525 hn_stop(sc, true); 2526 /* 2527 * NOTE: 2528 * hn_stop() only suspends data, so managment 2529 * stuffs have to be suspended manually here. 2530 */ 2531 hn_suspend_mgmt(sc); 2532 hn_synth_detach(sc); 2533 } 2534 HN_UNLOCK(sc); 2535 ether_ifdetach(ifp); 2536 } 2537 2538 ifmedia_removeall(&sc->hn_media); 2539 hn_destroy_rx_data(sc); 2540 hn_destroy_tx_data(sc); 2541 2542 if (sc->hn_tx_taskqs != NULL && sc->hn_tx_taskqs != hn_tx_taskque) { 2543 int i; 2544 2545 for (i = 0; i < hn_tx_taskq_cnt; ++i) 2546 taskqueue_free(sc->hn_tx_taskqs[i]); 2547 free(sc->hn_tx_taskqs, M_DEVBUF); 2548 } 2549 taskqueue_free(sc->hn_mgmt_taskq0); 2550 if (sc->hn_vf_taskq != NULL) 2551 taskqueue_free(sc->hn_vf_taskq); 2552 2553 if (sc->hn_xact != NULL) { 2554 /* 2555 * Uninstall the orphan handler _before_ the xact is 2556 * destructed. 2557 */ 2558 vmbus_chan_unset_orphan(sc->hn_prichan); 2559 vmbus_xact_ctx_destroy(sc->hn_xact); 2560 } 2561 2562 if_free(ifp); 2563 2564 HN_LOCK_DESTROY(sc); 2565 rm_destroy(&sc->hn_vf_lock); 2566 return (0); 2567 } 2568 2569 static int 2570 hn_shutdown(device_t dev) 2571 { 2572 2573 return (0); 2574 } 2575 2576 static void 2577 hn_link_status(struct hn_softc *sc) 2578 { 2579 uint32_t link_status; 2580 int error; 2581 2582 error = hn_rndis_get_linkstatus(sc, &link_status); 2583 if (error) { 2584 /* XXX what to do? */ 2585 return; 2586 } 2587 2588 if (link_status == NDIS_MEDIA_STATE_CONNECTED) 2589 sc->hn_link_flags |= HN_LINK_FLAG_LINKUP; 2590 else 2591 sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP; 2592 if_link_state_change(sc->hn_ifp, 2593 (sc->hn_link_flags & HN_LINK_FLAG_LINKUP) ? 2594 LINK_STATE_UP : LINK_STATE_DOWN); 2595 } 2596 2597 static void 2598 hn_link_taskfunc(void *xsc, int pending __unused) 2599 { 2600 struct hn_softc *sc = xsc; 2601 2602 if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG) 2603 return; 2604 hn_link_status(sc); 2605 } 2606 2607 static void 2608 hn_netchg_init_taskfunc(void *xsc, int pending __unused) 2609 { 2610 struct hn_softc *sc = xsc; 2611 2612 /* Prevent any link status checks from running. */ 2613 sc->hn_link_flags |= HN_LINK_FLAG_NETCHG; 2614 2615 /* 2616 * Fake up a [link down --> link up] state change; 5 seconds 2617 * delay is used, which closely simulates miibus reaction 2618 * upon link down event. 2619 */ 2620 sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP; 2621 if_link_state_change(sc->hn_ifp, LINK_STATE_DOWN); 2622 taskqueue_enqueue_timeout(sc->hn_mgmt_taskq0, 2623 &sc->hn_netchg_status, 5 * hz); 2624 } 2625 2626 static void 2627 hn_netchg_status_taskfunc(void *xsc, int pending __unused) 2628 { 2629 struct hn_softc *sc = xsc; 2630 2631 /* Re-allow link status checks. */ 2632 sc->hn_link_flags &= ~HN_LINK_FLAG_NETCHG; 2633 hn_link_status(sc); 2634 } 2635 2636 static void 2637 hn_update_link_status(struct hn_softc *sc) 2638 { 2639 2640 if (sc->hn_mgmt_taskq != NULL) 2641 taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_link_task); 2642 } 2643 2644 static void 2645 hn_change_network(struct hn_softc *sc) 2646 { 2647 2648 if (sc->hn_mgmt_taskq != NULL) 2649 taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_netchg_init); 2650 } 2651 2652 static __inline int 2653 hn_txdesc_dmamap_load(struct hn_tx_ring *txr, struct hn_txdesc *txd, 2654 struct mbuf **m_head, bus_dma_segment_t *segs, int *nsegs) 2655 { 2656 struct mbuf *m = *m_head; 2657 int error; 2658 2659 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID, ("txd uses chim")); 2660 2661 error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap, 2662 m, segs, nsegs, BUS_DMA_NOWAIT); 2663 if (error == EFBIG) { 2664 struct mbuf *m_new; 2665 2666 m_new = m_collapse(m, M_NOWAIT, HN_TX_DATA_SEGCNT_MAX); 2667 if (m_new == NULL) 2668 return ENOBUFS; 2669 else 2670 *m_head = m = m_new; 2671 txr->hn_tx_collapsed++; 2672 2673 error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, 2674 txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT); 2675 } 2676 if (!error) { 2677 bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap, 2678 BUS_DMASYNC_PREWRITE); 2679 txd->flags |= HN_TXD_FLAG_DMAMAP; 2680 } 2681 return error; 2682 } 2683 2684 static __inline int 2685 hn_txdesc_put(struct hn_tx_ring *txr, struct hn_txdesc *txd) 2686 { 2687 2688 KASSERT((txd->flags & HN_TXD_FLAG_ONLIST) == 0, 2689 ("put an onlist txd %#x", txd->flags)); 2690 KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0, 2691 ("put an onagg txd %#x", txd->flags)); 2692 2693 KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs)); 2694 if (atomic_fetchadd_int(&txd->refs, -1) != 1) 2695 return 0; 2696 2697 if (!STAILQ_EMPTY(&txd->agg_list)) { 2698 struct hn_txdesc *tmp_txd; 2699 2700 while ((tmp_txd = STAILQ_FIRST(&txd->agg_list)) != NULL) { 2701 int freed; 2702 2703 KASSERT(STAILQ_EMPTY(&tmp_txd->agg_list), 2704 ("resursive aggregation on aggregated txdesc")); 2705 KASSERT((tmp_txd->flags & HN_TXD_FLAG_ONAGG), 2706 ("not aggregated txdesc")); 2707 KASSERT((tmp_txd->flags & HN_TXD_FLAG_DMAMAP) == 0, 2708 ("aggregated txdesc uses dmamap")); 2709 KASSERT(tmp_txd->chim_index == HN_NVS_CHIM_IDX_INVALID, 2710 ("aggregated txdesc consumes " 2711 "chimney sending buffer")); 2712 KASSERT(tmp_txd->chim_size == 0, 2713 ("aggregated txdesc has non-zero " 2714 "chimney sending size")); 2715 2716 STAILQ_REMOVE_HEAD(&txd->agg_list, agg_link); 2717 tmp_txd->flags &= ~HN_TXD_FLAG_ONAGG; 2718 freed = hn_txdesc_put(txr, tmp_txd); 2719 KASSERT(freed, ("failed to free aggregated txdesc")); 2720 } 2721 } 2722 2723 if (txd->chim_index != HN_NVS_CHIM_IDX_INVALID) { 2724 KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, 2725 ("chim txd uses dmamap")); 2726 hn_chim_free(txr->hn_sc, txd->chim_index); 2727 txd->chim_index = HN_NVS_CHIM_IDX_INVALID; 2728 txd->chim_size = 0; 2729 } else if (txd->flags & HN_TXD_FLAG_DMAMAP) { 2730 bus_dmamap_sync(txr->hn_tx_data_dtag, 2731 txd->data_dmap, BUS_DMASYNC_POSTWRITE); 2732 bus_dmamap_unload(txr->hn_tx_data_dtag, 2733 txd->data_dmap); 2734 txd->flags &= ~HN_TXD_FLAG_DMAMAP; 2735 } 2736 2737 if (txd->m != NULL) { 2738 m_freem(txd->m); 2739 txd->m = NULL; 2740 } 2741 2742 txd->flags |= HN_TXD_FLAG_ONLIST; 2743 #ifndef HN_USE_TXDESC_BUFRING 2744 mtx_lock_spin(&txr->hn_txlist_spin); 2745 KASSERT(txr->hn_txdesc_avail >= 0 && 2746 txr->hn_txdesc_avail < txr->hn_txdesc_cnt, 2747 ("txdesc_put: invalid txd avail %d", txr->hn_txdesc_avail)); 2748 txr->hn_txdesc_avail++; 2749 SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link); 2750 mtx_unlock_spin(&txr->hn_txlist_spin); 2751 #else /* HN_USE_TXDESC_BUFRING */ 2752 #ifdef HN_DEBUG 2753 atomic_add_int(&txr->hn_txdesc_avail, 1); 2754 #endif 2755 buf_ring_enqueue(txr->hn_txdesc_br, txd); 2756 #endif /* !HN_USE_TXDESC_BUFRING */ 2757 2758 return 1; 2759 } 2760 2761 static __inline struct hn_txdesc * 2762 hn_txdesc_get(struct hn_tx_ring *txr) 2763 { 2764 struct hn_txdesc *txd; 2765 2766 #ifndef HN_USE_TXDESC_BUFRING 2767 mtx_lock_spin(&txr->hn_txlist_spin); 2768 txd = SLIST_FIRST(&txr->hn_txlist); 2769 if (txd != NULL) { 2770 KASSERT(txr->hn_txdesc_avail > 0, 2771 ("txdesc_get: invalid txd avail %d", txr->hn_txdesc_avail)); 2772 txr->hn_txdesc_avail--; 2773 SLIST_REMOVE_HEAD(&txr->hn_txlist, link); 2774 } 2775 mtx_unlock_spin(&txr->hn_txlist_spin); 2776 #else 2777 txd = buf_ring_dequeue_sc(txr->hn_txdesc_br); 2778 #endif 2779 2780 if (txd != NULL) { 2781 #ifdef HN_USE_TXDESC_BUFRING 2782 #ifdef HN_DEBUG 2783 atomic_subtract_int(&txr->hn_txdesc_avail, 1); 2784 #endif 2785 #endif /* HN_USE_TXDESC_BUFRING */ 2786 KASSERT(txd->m == NULL && txd->refs == 0 && 2787 STAILQ_EMPTY(&txd->agg_list) && 2788 txd->chim_index == HN_NVS_CHIM_IDX_INVALID && 2789 txd->chim_size == 0 && 2790 (txd->flags & HN_TXD_FLAG_ONLIST) && 2791 (txd->flags & HN_TXD_FLAG_ONAGG) == 0 && 2792 (txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("invalid txd")); 2793 txd->flags &= ~HN_TXD_FLAG_ONLIST; 2794 txd->refs = 1; 2795 } 2796 return txd; 2797 } 2798 2799 static __inline void 2800 hn_txdesc_hold(struct hn_txdesc *txd) 2801 { 2802 2803 /* 0->1 transition will never work */ 2804 KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs)); 2805 atomic_add_int(&txd->refs, 1); 2806 } 2807 2808 static __inline void 2809 hn_txdesc_agg(struct hn_txdesc *agg_txd, struct hn_txdesc *txd) 2810 { 2811 2812 KASSERT((agg_txd->flags & HN_TXD_FLAG_ONAGG) == 0, 2813 ("recursive aggregation on aggregating txdesc")); 2814 2815 KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0, 2816 ("already aggregated")); 2817 KASSERT(STAILQ_EMPTY(&txd->agg_list), 2818 ("recursive aggregation on to-be-aggregated txdesc")); 2819 2820 txd->flags |= HN_TXD_FLAG_ONAGG; 2821 STAILQ_INSERT_TAIL(&agg_txd->agg_list, txd, agg_link); 2822 } 2823 2824 static bool 2825 hn_tx_ring_pending(struct hn_tx_ring *txr) 2826 { 2827 bool pending = false; 2828 2829 #ifndef HN_USE_TXDESC_BUFRING 2830 mtx_lock_spin(&txr->hn_txlist_spin); 2831 if (txr->hn_txdesc_avail != txr->hn_txdesc_cnt) 2832 pending = true; 2833 mtx_unlock_spin(&txr->hn_txlist_spin); 2834 #else 2835 if (!buf_ring_full(txr->hn_txdesc_br)) 2836 pending = true; 2837 #endif 2838 return (pending); 2839 } 2840 2841 static __inline void 2842 hn_txeof(struct hn_tx_ring *txr) 2843 { 2844 txr->hn_has_txeof = 0; 2845 txr->hn_txeof(txr); 2846 } 2847 2848 static void 2849 hn_txpkt_done(struct hn_nvs_sendctx *sndc, struct hn_softc *sc, 2850 struct vmbus_channel *chan, const void *data __unused, int dlen __unused) 2851 { 2852 struct hn_txdesc *txd = sndc->hn_cbarg; 2853 struct hn_tx_ring *txr; 2854 2855 txr = txd->txr; 2856 KASSERT(txr->hn_chan == chan, 2857 ("channel mismatch, on chan%u, should be chan%u", 2858 vmbus_chan_id(chan), vmbus_chan_id(txr->hn_chan))); 2859 2860 txr->hn_has_txeof = 1; 2861 hn_txdesc_put(txr, txd); 2862 2863 ++txr->hn_txdone_cnt; 2864 if (txr->hn_txdone_cnt >= HN_EARLY_TXEOF_THRESH) { 2865 txr->hn_txdone_cnt = 0; 2866 if (txr->hn_oactive) 2867 hn_txeof(txr); 2868 } 2869 } 2870 2871 static void 2872 hn_chan_rollup(struct hn_rx_ring *rxr, struct hn_tx_ring *txr) 2873 { 2874 #if defined(INET) || defined(INET6) 2875 tcp_lro_flush_all(&rxr->hn_lro); 2876 #endif 2877 2878 /* 2879 * NOTE: 2880 * 'txr' could be NULL, if multiple channels and 2881 * ifnet.if_start method are enabled. 2882 */ 2883 if (txr == NULL || !txr->hn_has_txeof) 2884 return; 2885 2886 txr->hn_txdone_cnt = 0; 2887 hn_txeof(txr); 2888 } 2889 2890 static __inline uint32_t 2891 hn_rndis_pktmsg_offset(uint32_t ofs) 2892 { 2893 2894 KASSERT(ofs >= sizeof(struct rndis_packet_msg), 2895 ("invalid RNDIS packet msg offset %u", ofs)); 2896 return (ofs - __offsetof(struct rndis_packet_msg, rm_dataoffset)); 2897 } 2898 2899 static __inline void * 2900 hn_rndis_pktinfo_append(struct rndis_packet_msg *pkt, size_t pktsize, 2901 size_t pi_dlen, uint32_t pi_type) 2902 { 2903 const size_t pi_size = HN_RNDIS_PKTINFO_SIZE(pi_dlen); 2904 struct rndis_pktinfo *pi; 2905 2906 KASSERT((pi_size & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK) == 0, 2907 ("unaligned pktinfo size %zu, pktinfo dlen %zu", pi_size, pi_dlen)); 2908 2909 /* 2910 * Per-packet-info does not move; it only grows. 2911 * 2912 * NOTE: 2913 * rm_pktinfooffset in this phase counts from the beginning 2914 * of rndis_packet_msg. 2915 */ 2916 KASSERT(pkt->rm_pktinfooffset + pkt->rm_pktinfolen + pi_size <= pktsize, 2917 ("%u pktinfo overflows RNDIS packet msg", pi_type)); 2918 pi = (struct rndis_pktinfo *)((uint8_t *)pkt + pkt->rm_pktinfooffset + 2919 pkt->rm_pktinfolen); 2920 pkt->rm_pktinfolen += pi_size; 2921 2922 pi->rm_size = pi_size; 2923 pi->rm_type = pi_type; 2924 pi->rm_pktinfooffset = RNDIS_PKTINFO_OFFSET; 2925 2926 return (pi->rm_data); 2927 } 2928 2929 static __inline int 2930 hn_flush_txagg(struct ifnet *ifp, struct hn_tx_ring *txr) 2931 { 2932 struct hn_txdesc *txd; 2933 struct mbuf *m; 2934 int error, pkts; 2935 2936 txd = txr->hn_agg_txd; 2937 KASSERT(txd != NULL, ("no aggregate txdesc")); 2938 2939 /* 2940 * Since hn_txpkt() will reset this temporary stat, save 2941 * it now, so that oerrors can be updated properly, if 2942 * hn_txpkt() ever fails. 2943 */ 2944 pkts = txr->hn_stat_pkts; 2945 2946 /* 2947 * Since txd's mbuf will _not_ be freed upon hn_txpkt() 2948 * failure, save it for later freeing, if hn_txpkt() ever 2949 * fails. 2950 */ 2951 m = txd->m; 2952 error = hn_txpkt(ifp, txr, txd); 2953 if (__predict_false(error)) { 2954 /* txd is freed, but m is not. */ 2955 m_freem(m); 2956 2957 txr->hn_flush_failed++; 2958 if_inc_counter(ifp, IFCOUNTER_OERRORS, pkts); 2959 } 2960 2961 /* Reset all aggregation states. */ 2962 txr->hn_agg_txd = NULL; 2963 txr->hn_agg_szleft = 0; 2964 txr->hn_agg_pktleft = 0; 2965 txr->hn_agg_prevpkt = NULL; 2966 2967 return (error); 2968 } 2969 2970 static void * 2971 hn_try_txagg(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd, 2972 int pktsize) 2973 { 2974 void *chim; 2975 2976 if (txr->hn_agg_txd != NULL) { 2977 if (txr->hn_agg_pktleft >= 1 && txr->hn_agg_szleft > pktsize) { 2978 struct hn_txdesc *agg_txd = txr->hn_agg_txd; 2979 struct rndis_packet_msg *pkt = txr->hn_agg_prevpkt; 2980 int olen; 2981 2982 /* 2983 * Update the previous RNDIS packet's total length, 2984 * it can be increased due to the mandatory alignment 2985 * padding for this RNDIS packet. And update the 2986 * aggregating txdesc's chimney sending buffer size 2987 * accordingly. 2988 * 2989 * XXX 2990 * Zero-out the padding, as required by the RNDIS spec. 2991 */ 2992 olen = pkt->rm_len; 2993 pkt->rm_len = roundup2(olen, txr->hn_agg_align); 2994 agg_txd->chim_size += pkt->rm_len - olen; 2995 2996 /* Link this txdesc to the parent. */ 2997 hn_txdesc_agg(agg_txd, txd); 2998 2999 chim = (uint8_t *)pkt + pkt->rm_len; 3000 /* Save the current packet for later fixup. */ 3001 txr->hn_agg_prevpkt = chim; 3002 3003 txr->hn_agg_pktleft--; 3004 txr->hn_agg_szleft -= pktsize; 3005 if (txr->hn_agg_szleft <= 3006 HN_PKTSIZE_MIN(txr->hn_agg_align)) { 3007 /* 3008 * Probably can't aggregate more packets, 3009 * flush this aggregating txdesc proactively. 3010 */ 3011 txr->hn_agg_pktleft = 0; 3012 } 3013 /* Done! */ 3014 return (chim); 3015 } 3016 hn_flush_txagg(ifp, txr); 3017 } 3018 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc")); 3019 3020 txr->hn_tx_chimney_tried++; 3021 txd->chim_index = hn_chim_alloc(txr->hn_sc); 3022 if (txd->chim_index == HN_NVS_CHIM_IDX_INVALID) 3023 return (NULL); 3024 txr->hn_tx_chimney++; 3025 3026 chim = txr->hn_sc->hn_chim + 3027 (txd->chim_index * txr->hn_sc->hn_chim_szmax); 3028 3029 if (txr->hn_agg_pktmax > 1 && 3030 txr->hn_agg_szmax > pktsize + HN_PKTSIZE_MIN(txr->hn_agg_align)) { 3031 txr->hn_agg_txd = txd; 3032 txr->hn_agg_pktleft = txr->hn_agg_pktmax - 1; 3033 txr->hn_agg_szleft = txr->hn_agg_szmax - pktsize; 3034 txr->hn_agg_prevpkt = chim; 3035 } 3036 return (chim); 3037 } 3038 3039 /* 3040 * NOTE: 3041 * If this function fails, then both txd and m_head0 will be freed. 3042 */ 3043 static int 3044 hn_encap(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd, 3045 struct mbuf **m_head0) 3046 { 3047 bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX]; 3048 int error, nsegs, i; 3049 struct mbuf *m_head = *m_head0; 3050 struct rndis_packet_msg *pkt; 3051 uint32_t *pi_data; 3052 void *chim = NULL; 3053 int pkt_hlen, pkt_size; 3054 3055 pkt = txd->rndis_pkt; 3056 pkt_size = HN_PKTSIZE(m_head, txr->hn_agg_align); 3057 if (pkt_size < txr->hn_chim_size) { 3058 chim = hn_try_txagg(ifp, txr, txd, pkt_size); 3059 if (chim != NULL) 3060 pkt = chim; 3061 } else { 3062 if (txr->hn_agg_txd != NULL) 3063 hn_flush_txagg(ifp, txr); 3064 } 3065 3066 pkt->rm_type = REMOTE_NDIS_PACKET_MSG; 3067 pkt->rm_len = m_head->m_pkthdr.len; 3068 pkt->rm_dataoffset = 0; 3069 pkt->rm_datalen = m_head->m_pkthdr.len; 3070 pkt->rm_oobdataoffset = 0; 3071 pkt->rm_oobdatalen = 0; 3072 pkt->rm_oobdataelements = 0; 3073 pkt->rm_pktinfooffset = sizeof(*pkt); 3074 pkt->rm_pktinfolen = 0; 3075 pkt->rm_vchandle = 0; 3076 pkt->rm_reserved = 0; 3077 3078 if (txr->hn_tx_flags & HN_TX_FLAG_HASHVAL) { 3079 /* 3080 * Set the hash value for this packet, so that the host could 3081 * dispatch the TX done event for this packet back to this TX 3082 * ring's channel. 3083 */ 3084 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 3085 HN_NDIS_HASH_VALUE_SIZE, HN_NDIS_PKTINFO_TYPE_HASHVAL); 3086 *pi_data = txr->hn_tx_idx; 3087 } 3088 3089 if (m_head->m_flags & M_VLANTAG) { 3090 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 3091 NDIS_VLAN_INFO_SIZE, NDIS_PKTINFO_TYPE_VLAN); 3092 *pi_data = NDIS_VLAN_INFO_MAKE( 3093 EVL_VLANOFTAG(m_head->m_pkthdr.ether_vtag), 3094 EVL_PRIOFTAG(m_head->m_pkthdr.ether_vtag), 3095 EVL_CFIOFTAG(m_head->m_pkthdr.ether_vtag)); 3096 } 3097 3098 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 3099 #if defined(INET6) || defined(INET) 3100 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 3101 NDIS_LSO2_INFO_SIZE, NDIS_PKTINFO_TYPE_LSO); 3102 #ifdef INET 3103 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) { 3104 *pi_data = NDIS_LSO2_INFO_MAKEIPV4( 3105 m_head->m_pkthdr.l2hlen + m_head->m_pkthdr.l3hlen, 3106 m_head->m_pkthdr.tso_segsz); 3107 } 3108 #endif 3109 #if defined(INET6) && defined(INET) 3110 else 3111 #endif 3112 #ifdef INET6 3113 { 3114 *pi_data = NDIS_LSO2_INFO_MAKEIPV6( 3115 m_head->m_pkthdr.l2hlen + m_head->m_pkthdr.l3hlen, 3116 m_head->m_pkthdr.tso_segsz); 3117 } 3118 #endif 3119 #endif /* INET6 || INET */ 3120 } else if (m_head->m_pkthdr.csum_flags & txr->hn_csum_assist) { 3121 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 3122 NDIS_TXCSUM_INFO_SIZE, NDIS_PKTINFO_TYPE_CSUM); 3123 if (m_head->m_pkthdr.csum_flags & 3124 (CSUM_IP6_TCP | CSUM_IP6_UDP)) { 3125 *pi_data = NDIS_TXCSUM_INFO_IPV6; 3126 } else { 3127 *pi_data = NDIS_TXCSUM_INFO_IPV4; 3128 if (m_head->m_pkthdr.csum_flags & CSUM_IP) 3129 *pi_data |= NDIS_TXCSUM_INFO_IPCS; 3130 } 3131 3132 if (m_head->m_pkthdr.csum_flags & 3133 (CSUM_IP_TCP | CSUM_IP6_TCP)) { 3134 *pi_data |= NDIS_TXCSUM_INFO_MKTCPCS( 3135 m_head->m_pkthdr.l2hlen + m_head->m_pkthdr.l3hlen); 3136 } else if (m_head->m_pkthdr.csum_flags & 3137 (CSUM_IP_UDP | CSUM_IP6_UDP)) { 3138 *pi_data |= NDIS_TXCSUM_INFO_MKUDPCS( 3139 m_head->m_pkthdr.l2hlen + m_head->m_pkthdr.l3hlen); 3140 } 3141 } 3142 3143 pkt_hlen = pkt->rm_pktinfooffset + pkt->rm_pktinfolen; 3144 /* Fixup RNDIS packet message total length */ 3145 pkt->rm_len += pkt_hlen; 3146 /* Convert RNDIS packet message offsets */ 3147 pkt->rm_dataoffset = hn_rndis_pktmsg_offset(pkt_hlen); 3148 pkt->rm_pktinfooffset = hn_rndis_pktmsg_offset(pkt->rm_pktinfooffset); 3149 3150 /* 3151 * Fast path: Chimney sending. 3152 */ 3153 if (chim != NULL) { 3154 struct hn_txdesc *tgt_txd = txd; 3155 3156 if (txr->hn_agg_txd != NULL) { 3157 tgt_txd = txr->hn_agg_txd; 3158 #ifdef INVARIANTS 3159 *m_head0 = NULL; 3160 #endif 3161 } 3162 3163 KASSERT(pkt == chim, 3164 ("RNDIS pkt not in chimney sending buffer")); 3165 KASSERT(tgt_txd->chim_index != HN_NVS_CHIM_IDX_INVALID, 3166 ("chimney sending buffer is not used")); 3167 tgt_txd->chim_size += pkt->rm_len; 3168 3169 m_copydata(m_head, 0, m_head->m_pkthdr.len, 3170 ((uint8_t *)chim) + pkt_hlen); 3171 3172 txr->hn_gpa_cnt = 0; 3173 txr->hn_sendpkt = hn_txpkt_chim; 3174 goto done; 3175 } 3176 3177 KASSERT(txr->hn_agg_txd == NULL, ("aggregating sglist txdesc")); 3178 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID, 3179 ("chimney buffer is used")); 3180 KASSERT(pkt == txd->rndis_pkt, ("RNDIS pkt not in txdesc")); 3181 3182 error = hn_txdesc_dmamap_load(txr, txd, &m_head, segs, &nsegs); 3183 if (__predict_false(error)) { 3184 int freed; 3185 3186 /* 3187 * This mbuf is not linked w/ the txd yet, so free it now. 3188 */ 3189 m_freem(m_head); 3190 *m_head0 = NULL; 3191 3192 freed = hn_txdesc_put(txr, txd); 3193 KASSERT(freed != 0, 3194 ("fail to free txd upon txdma error")); 3195 3196 txr->hn_txdma_failed++; 3197 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 3198 return error; 3199 } 3200 *m_head0 = m_head; 3201 3202 /* +1 RNDIS packet message */ 3203 txr->hn_gpa_cnt = nsegs + 1; 3204 3205 /* send packet with page buffer */ 3206 txr->hn_gpa[0].gpa_page = atop(txd->rndis_pkt_paddr); 3207 txr->hn_gpa[0].gpa_ofs = txd->rndis_pkt_paddr & PAGE_MASK; 3208 txr->hn_gpa[0].gpa_len = pkt_hlen; 3209 3210 /* 3211 * Fill the page buffers with mbuf info after the page 3212 * buffer for RNDIS packet message. 3213 */ 3214 for (i = 0; i < nsegs; ++i) { 3215 struct vmbus_gpa *gpa = &txr->hn_gpa[i + 1]; 3216 3217 gpa->gpa_page = atop(segs[i].ds_addr); 3218 gpa->gpa_ofs = segs[i].ds_addr & PAGE_MASK; 3219 gpa->gpa_len = segs[i].ds_len; 3220 } 3221 3222 txd->chim_index = HN_NVS_CHIM_IDX_INVALID; 3223 txd->chim_size = 0; 3224 txr->hn_sendpkt = hn_txpkt_sglist; 3225 done: 3226 txd->m = m_head; 3227 3228 /* Set the completion routine */ 3229 hn_nvs_sendctx_init(&txd->send_ctx, hn_txpkt_done, txd); 3230 3231 /* Update temporary stats for later use. */ 3232 txr->hn_stat_pkts++; 3233 txr->hn_stat_size += m_head->m_pkthdr.len; 3234 if (m_head->m_flags & M_MCAST) 3235 txr->hn_stat_mcasts++; 3236 3237 return 0; 3238 } 3239 3240 /* 3241 * NOTE: 3242 * If this function fails, then txd will be freed, but the mbuf 3243 * associated w/ the txd will _not_ be freed. 3244 */ 3245 static int 3246 hn_txpkt(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd) 3247 { 3248 int error, send_failed = 0, has_bpf; 3249 3250 again: 3251 has_bpf = bpf_peers_present(ifp->if_bpf); 3252 if (has_bpf) { 3253 /* 3254 * Make sure that this txd and any aggregated txds are not 3255 * freed before ETHER_BPF_MTAP. 3256 */ 3257 hn_txdesc_hold(txd); 3258 } 3259 error = txr->hn_sendpkt(txr, txd); 3260 if (!error) { 3261 if (has_bpf) { 3262 const struct hn_txdesc *tmp_txd; 3263 3264 ETHER_BPF_MTAP(ifp, txd->m); 3265 STAILQ_FOREACH(tmp_txd, &txd->agg_list, agg_link) 3266 ETHER_BPF_MTAP(ifp, tmp_txd->m); 3267 } 3268 3269 if_inc_counter(ifp, IFCOUNTER_OPACKETS, txr->hn_stat_pkts); 3270 #ifdef HN_IFSTART_SUPPORT 3271 if (!hn_use_if_start) 3272 #endif 3273 { 3274 if_inc_counter(ifp, IFCOUNTER_OBYTES, 3275 txr->hn_stat_size); 3276 if (txr->hn_stat_mcasts != 0) { 3277 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 3278 txr->hn_stat_mcasts); 3279 } 3280 } 3281 txr->hn_pkts += txr->hn_stat_pkts; 3282 txr->hn_sends++; 3283 } 3284 if (has_bpf) 3285 hn_txdesc_put(txr, txd); 3286 3287 if (__predict_false(error)) { 3288 int freed; 3289 3290 /* 3291 * This should "really rarely" happen. 3292 * 3293 * XXX Too many RX to be acked or too many sideband 3294 * commands to run? Ask netvsc_channel_rollup() 3295 * to kick start later. 3296 */ 3297 txr->hn_has_txeof = 1; 3298 if (!send_failed) { 3299 txr->hn_send_failed++; 3300 send_failed = 1; 3301 /* 3302 * Try sending again after set hn_has_txeof; 3303 * in case that we missed the last 3304 * netvsc_channel_rollup(). 3305 */ 3306 goto again; 3307 } 3308 if_printf(ifp, "send failed\n"); 3309 3310 /* 3311 * Caller will perform further processing on the 3312 * associated mbuf, so don't free it in hn_txdesc_put(); 3313 * only unload it from the DMA map in hn_txdesc_put(), 3314 * if it was loaded. 3315 */ 3316 txd->m = NULL; 3317 freed = hn_txdesc_put(txr, txd); 3318 KASSERT(freed != 0, 3319 ("fail to free txd upon send error")); 3320 3321 txr->hn_send_failed++; 3322 } 3323 3324 /* Reset temporary stats, after this sending is done. */ 3325 txr->hn_stat_size = 0; 3326 txr->hn_stat_pkts = 0; 3327 txr->hn_stat_mcasts = 0; 3328 3329 return (error); 3330 } 3331 3332 /* 3333 * Append the specified data to the indicated mbuf chain, 3334 * Extend the mbuf chain if the new data does not fit in 3335 * existing space. 3336 * 3337 * This is a minor rewrite of m_append() from sys/kern/uipc_mbuf.c. 3338 * There should be an equivalent in the kernel mbuf code, 3339 * but there does not appear to be one yet. 3340 * 3341 * Differs from m_append() in that additional mbufs are 3342 * allocated with cluster size MJUMPAGESIZE, and filled 3343 * accordingly. 3344 * 3345 * Return 1 if able to complete the job; otherwise 0. 3346 */ 3347 static int 3348 hv_m_append(struct mbuf *m0, int len, c_caddr_t cp) 3349 { 3350 struct mbuf *m, *n; 3351 int remainder, space; 3352 3353 for (m = m0; m->m_next != NULL; m = m->m_next) 3354 ; 3355 remainder = len; 3356 space = M_TRAILINGSPACE(m); 3357 if (space > 0) { 3358 /* 3359 * Copy into available space. 3360 */ 3361 if (space > remainder) 3362 space = remainder; 3363 bcopy(cp, mtod(m, caddr_t) + m->m_len, space); 3364 m->m_len += space; 3365 cp += space; 3366 remainder -= space; 3367 } 3368 while (remainder > 0) { 3369 /* 3370 * Allocate a new mbuf; could check space 3371 * and allocate a cluster instead. 3372 */ 3373 n = m_getjcl(M_NOWAIT, m->m_type, 0, MJUMPAGESIZE); 3374 if (n == NULL) 3375 break; 3376 n->m_len = min(MJUMPAGESIZE, remainder); 3377 bcopy(cp, mtod(n, caddr_t), n->m_len); 3378 cp += n->m_len; 3379 remainder -= n->m_len; 3380 m->m_next = n; 3381 m = n; 3382 } 3383 if (m0->m_flags & M_PKTHDR) 3384 m0->m_pkthdr.len += len - remainder; 3385 3386 return (remainder == 0); 3387 } 3388 3389 #if defined(INET) || defined(INET6) 3390 static __inline int 3391 hn_lro_rx(struct lro_ctrl *lc, struct mbuf *m) 3392 { 3393 #if __FreeBSD_version >= 1100095 3394 if (hn_lro_mbufq_depth) { 3395 tcp_lro_queue_mbuf(lc, m); 3396 return 0; 3397 } 3398 #endif 3399 return tcp_lro_rx(lc, m, 0); 3400 } 3401 #endif 3402 3403 static int 3404 hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen, 3405 const struct hn_rxinfo *info) 3406 { 3407 struct ifnet *ifp, *hn_ifp = rxr->hn_ifp; 3408 struct mbuf *m_new; 3409 int size, do_lro = 0, do_csum = 1, is_vf = 0; 3410 int hash_type = M_HASHTYPE_NONE; 3411 int l3proto = ETHERTYPE_MAX, l4proto = IPPROTO_DONE; 3412 3413 ifp = hn_ifp; 3414 if (rxr->hn_rxvf_ifp != NULL) { 3415 /* 3416 * Non-transparent mode VF; pretend this packet is from 3417 * the VF. 3418 */ 3419 ifp = rxr->hn_rxvf_ifp; 3420 is_vf = 1; 3421 } else if (rxr->hn_rx_flags & HN_RX_FLAG_XPNT_VF) { 3422 /* Transparent mode VF. */ 3423 is_vf = 1; 3424 } 3425 3426 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 3427 /* 3428 * NOTE: 3429 * See the NOTE of hn_rndis_init_fixat(). This 3430 * function can be reached, immediately after the 3431 * RNDIS is initialized but before the ifnet is 3432 * setup on the hn_attach() path; drop the unexpected 3433 * packets. 3434 */ 3435 return (0); 3436 } 3437 3438 if (__predict_false(dlen < ETHER_HDR_LEN)) { 3439 if_inc_counter(hn_ifp, IFCOUNTER_IERRORS, 1); 3440 return (0); 3441 } 3442 3443 if (dlen <= MHLEN) { 3444 m_new = m_gethdr(M_NOWAIT, MT_DATA); 3445 if (m_new == NULL) { 3446 if_inc_counter(hn_ifp, IFCOUNTER_IQDROPS, 1); 3447 return (0); 3448 } 3449 memcpy(mtod(m_new, void *), data, dlen); 3450 m_new->m_pkthdr.len = m_new->m_len = dlen; 3451 rxr->hn_small_pkts++; 3452 } else { 3453 /* 3454 * Get an mbuf with a cluster. For packets 2K or less, 3455 * get a standard 2K cluster. For anything larger, get a 3456 * 4K cluster. Any buffers larger than 4K can cause problems 3457 * if looped around to the Hyper-V TX channel, so avoid them. 3458 */ 3459 size = MCLBYTES; 3460 if (dlen > MCLBYTES) { 3461 /* 4096 */ 3462 size = MJUMPAGESIZE; 3463 } 3464 3465 m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size); 3466 if (m_new == NULL) { 3467 if_inc_counter(hn_ifp, IFCOUNTER_IQDROPS, 1); 3468 return (0); 3469 } 3470 3471 hv_m_append(m_new, dlen, data); 3472 } 3473 m_new->m_pkthdr.rcvif = ifp; 3474 3475 if (__predict_false((hn_ifp->if_capenable & IFCAP_RXCSUM) == 0)) 3476 do_csum = 0; 3477 3478 /* receive side checksum offload */ 3479 if (info->csum_info != HN_NDIS_RXCSUM_INFO_INVALID) { 3480 /* IP csum offload */ 3481 if ((info->csum_info & NDIS_RXCSUM_INFO_IPCS_OK) && do_csum) { 3482 m_new->m_pkthdr.csum_flags |= 3483 (CSUM_IP_CHECKED | CSUM_IP_VALID); 3484 rxr->hn_csum_ip++; 3485 } 3486 3487 /* TCP/UDP csum offload */ 3488 if ((info->csum_info & (NDIS_RXCSUM_INFO_UDPCS_OK | 3489 NDIS_RXCSUM_INFO_TCPCS_OK)) && do_csum) { 3490 m_new->m_pkthdr.csum_flags |= 3491 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 3492 m_new->m_pkthdr.csum_data = 0xffff; 3493 if (info->csum_info & NDIS_RXCSUM_INFO_TCPCS_OK) 3494 rxr->hn_csum_tcp++; 3495 else 3496 rxr->hn_csum_udp++; 3497 } 3498 3499 /* 3500 * XXX 3501 * As of this write (Oct 28th, 2016), host side will turn 3502 * on only TCPCS_OK and IPCS_OK even for UDP datagrams, so 3503 * the do_lro setting here is actually _not_ accurate. We 3504 * depend on the RSS hash type check to reset do_lro. 3505 */ 3506 if ((info->csum_info & 3507 (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) == 3508 (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) 3509 do_lro = 1; 3510 } else { 3511 hn_rxpkt_proto(m_new, &l3proto, &l4proto); 3512 if (l3proto == ETHERTYPE_IP) { 3513 if (l4proto == IPPROTO_TCP) { 3514 if (do_csum && 3515 (rxr->hn_trust_hcsum & 3516 HN_TRUST_HCSUM_TCP)) { 3517 rxr->hn_csum_trusted++; 3518 m_new->m_pkthdr.csum_flags |= 3519 (CSUM_IP_CHECKED | CSUM_IP_VALID | 3520 CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 3521 m_new->m_pkthdr.csum_data = 0xffff; 3522 } 3523 do_lro = 1; 3524 } else if (l4proto == IPPROTO_UDP) { 3525 if (do_csum && 3526 (rxr->hn_trust_hcsum & 3527 HN_TRUST_HCSUM_UDP)) { 3528 rxr->hn_csum_trusted++; 3529 m_new->m_pkthdr.csum_flags |= 3530 (CSUM_IP_CHECKED | CSUM_IP_VALID | 3531 CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 3532 m_new->m_pkthdr.csum_data = 0xffff; 3533 } 3534 } else if (l4proto != IPPROTO_DONE && do_csum && 3535 (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_IP)) { 3536 rxr->hn_csum_trusted++; 3537 m_new->m_pkthdr.csum_flags |= 3538 (CSUM_IP_CHECKED | CSUM_IP_VALID); 3539 } 3540 } 3541 } 3542 3543 if (info->vlan_info != HN_NDIS_VLAN_INFO_INVALID) { 3544 m_new->m_pkthdr.ether_vtag = EVL_MAKETAG( 3545 NDIS_VLAN_INFO_ID(info->vlan_info), 3546 NDIS_VLAN_INFO_PRI(info->vlan_info), 3547 NDIS_VLAN_INFO_CFI(info->vlan_info)); 3548 m_new->m_flags |= M_VLANTAG; 3549 } 3550 3551 /* 3552 * If VF is activated (tranparent/non-transparent mode does not 3553 * matter here). 3554 * 3555 * - Disable LRO 3556 * 3557 * hn(4) will only receive broadcast packets, multicast packets, 3558 * TCP SYN and SYN|ACK (in Azure), LRO is useless for these 3559 * packet types. 3560 * 3561 * For non-transparent, we definitely _cannot_ enable LRO at 3562 * all, since the LRO flush will use hn(4) as the receiving 3563 * interface; i.e. hn_ifp->if_input(hn_ifp, m). 3564 */ 3565 if (is_vf) 3566 do_lro = 0; 3567 3568 /* 3569 * If VF is activated (tranparent/non-transparent mode does not 3570 * matter here), do _not_ mess with unsupported hash types or 3571 * functions. 3572 */ 3573 if (info->hash_info != HN_NDIS_HASH_INFO_INVALID) { 3574 rxr->hn_rss_pkts++; 3575 m_new->m_pkthdr.flowid = info->hash_value; 3576 if (!is_vf) 3577 hash_type = M_HASHTYPE_OPAQUE_HASH; 3578 if ((info->hash_info & NDIS_HASH_FUNCTION_MASK) == 3579 NDIS_HASH_FUNCTION_TOEPLITZ) { 3580 uint32_t type = (info->hash_info & NDIS_HASH_TYPE_MASK & 3581 rxr->hn_mbuf_hash); 3582 3583 /* 3584 * NOTE: 3585 * do_lro is resetted, if the hash types are not TCP 3586 * related. See the comment in the above csum_flags 3587 * setup section. 3588 */ 3589 switch (type) { 3590 case NDIS_HASH_IPV4: 3591 hash_type = M_HASHTYPE_RSS_IPV4; 3592 do_lro = 0; 3593 break; 3594 3595 case NDIS_HASH_TCP_IPV4: 3596 hash_type = M_HASHTYPE_RSS_TCP_IPV4; 3597 if (rxr->hn_rx_flags & HN_RX_FLAG_UDP_HASH) { 3598 int def_htype = M_HASHTYPE_OPAQUE_HASH; 3599 3600 if (is_vf) 3601 def_htype = M_HASHTYPE_NONE; 3602 3603 /* 3604 * UDP 4-tuple hash is delivered as 3605 * TCP 4-tuple hash. 3606 */ 3607 if (l3proto == ETHERTYPE_MAX) { 3608 hn_rxpkt_proto(m_new, 3609 &l3proto, &l4proto); 3610 } 3611 if (l3proto == ETHERTYPE_IP) { 3612 if (l4proto == IPPROTO_UDP && 3613 (rxr->hn_mbuf_hash & 3614 NDIS_HASH_UDP_IPV4_X)) { 3615 hash_type = 3616 M_HASHTYPE_RSS_UDP_IPV4; 3617 do_lro = 0; 3618 } else if (l4proto != 3619 IPPROTO_TCP) { 3620 hash_type = def_htype; 3621 do_lro = 0; 3622 } 3623 } else { 3624 hash_type = def_htype; 3625 do_lro = 0; 3626 } 3627 } 3628 break; 3629 3630 case NDIS_HASH_IPV6: 3631 hash_type = M_HASHTYPE_RSS_IPV6; 3632 do_lro = 0; 3633 break; 3634 3635 case NDIS_HASH_IPV6_EX: 3636 hash_type = M_HASHTYPE_RSS_IPV6_EX; 3637 do_lro = 0; 3638 break; 3639 3640 case NDIS_HASH_TCP_IPV6: 3641 hash_type = M_HASHTYPE_RSS_TCP_IPV6; 3642 break; 3643 3644 case NDIS_HASH_TCP_IPV6_EX: 3645 hash_type = M_HASHTYPE_RSS_TCP_IPV6_EX; 3646 break; 3647 } 3648 } 3649 } else if (!is_vf) { 3650 m_new->m_pkthdr.flowid = rxr->hn_rx_idx; 3651 hash_type = M_HASHTYPE_OPAQUE; 3652 } 3653 M_HASHTYPE_SET(m_new, hash_type); 3654 3655 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); 3656 if (hn_ifp != ifp) { 3657 const struct ether_header *eh; 3658 3659 /* 3660 * Non-transparent mode VF is activated. 3661 */ 3662 3663 /* 3664 * Allow tapping on hn(4). 3665 */ 3666 ETHER_BPF_MTAP(hn_ifp, m_new); 3667 3668 /* 3669 * Update hn(4)'s stats. 3670 */ 3671 if_inc_counter(hn_ifp, IFCOUNTER_IPACKETS, 1); 3672 if_inc_counter(hn_ifp, IFCOUNTER_IBYTES, m_new->m_pkthdr.len); 3673 /* Checked at the beginning of this function. */ 3674 KASSERT(m_new->m_len >= ETHER_HDR_LEN, ("not ethernet frame")); 3675 eh = mtod(m_new, struct ether_header *); 3676 if (ETHER_IS_MULTICAST(eh->ether_dhost)) 3677 if_inc_counter(hn_ifp, IFCOUNTER_IMCASTS, 1); 3678 } 3679 rxr->hn_pkts++; 3680 3681 if ((hn_ifp->if_capenable & IFCAP_LRO) && do_lro) { 3682 #if defined(INET) || defined(INET6) 3683 struct lro_ctrl *lro = &rxr->hn_lro; 3684 3685 if (lro->lro_cnt) { 3686 rxr->hn_lro_tried++; 3687 if (hn_lro_rx(lro, m_new) == 0) { 3688 /* DONE! */ 3689 return 0; 3690 } 3691 } 3692 #endif 3693 } 3694 ifp->if_input(ifp, m_new); 3695 3696 return (0); 3697 } 3698 3699 static int 3700 hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 3701 { 3702 struct hn_softc *sc = ifp->if_softc; 3703 struct ifreq *ifr = (struct ifreq *)data, ifr_vf; 3704 struct ifnet *vf_ifp; 3705 int mask, error = 0; 3706 struct ifrsskey *ifrk; 3707 struct ifrsshash *ifrh; 3708 uint32_t mtu; 3709 3710 switch (cmd) { 3711 case SIOCSIFMTU: 3712 if (ifr->ifr_mtu > HN_MTU_MAX) { 3713 error = EINVAL; 3714 break; 3715 } 3716 3717 HN_LOCK(sc); 3718 3719 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) { 3720 HN_UNLOCK(sc); 3721 break; 3722 } 3723 3724 if ((sc->hn_caps & HN_CAP_MTU) == 0) { 3725 /* Can't change MTU */ 3726 HN_UNLOCK(sc); 3727 error = EOPNOTSUPP; 3728 break; 3729 } 3730 3731 if (ifp->if_mtu == ifr->ifr_mtu) { 3732 HN_UNLOCK(sc); 3733 break; 3734 } 3735 3736 if (hn_xpnt_vf_isready(sc)) { 3737 vf_ifp = sc->hn_vf_ifp; 3738 ifr_vf = *ifr; 3739 strlcpy(ifr_vf.ifr_name, vf_ifp->if_xname, 3740 sizeof(ifr_vf.ifr_name)); 3741 error = vf_ifp->if_ioctl(vf_ifp, SIOCSIFMTU, 3742 (caddr_t)&ifr_vf); 3743 if (error) { 3744 HN_UNLOCK(sc); 3745 if_printf(ifp, "%s SIOCSIFMTU %d failed: %d\n", 3746 vf_ifp->if_xname, ifr->ifr_mtu, error); 3747 break; 3748 } 3749 } 3750 3751 /* 3752 * Suspend this interface before the synthetic parts 3753 * are ripped. 3754 */ 3755 hn_suspend(sc); 3756 3757 /* 3758 * Detach the synthetics parts, i.e. NVS and RNDIS. 3759 */ 3760 hn_synth_detach(sc); 3761 3762 /* 3763 * Reattach the synthetic parts, i.e. NVS and RNDIS, 3764 * with the new MTU setting. 3765 */ 3766 error = hn_synth_attach(sc, ifr->ifr_mtu); 3767 if (error) { 3768 HN_UNLOCK(sc); 3769 break; 3770 } 3771 3772 error = hn_rndis_get_mtu(sc, &mtu); 3773 if (error) 3774 mtu = ifr->ifr_mtu; 3775 else if (bootverbose) 3776 if_printf(ifp, "RNDIS mtu %u\n", mtu); 3777 3778 /* 3779 * Commit the requested MTU, after the synthetic parts 3780 * have been successfully attached. 3781 */ 3782 if (mtu >= ifr->ifr_mtu) { 3783 mtu = ifr->ifr_mtu; 3784 } else { 3785 if_printf(ifp, "fixup mtu %d -> %u\n", 3786 ifr->ifr_mtu, mtu); 3787 } 3788 ifp->if_mtu = mtu; 3789 3790 /* 3791 * Synthetic parts' reattach may change the chimney 3792 * sending size; update it. 3793 */ 3794 if (sc->hn_tx_ring[0].hn_chim_size > sc->hn_chim_szmax) 3795 hn_set_chim_size(sc, sc->hn_chim_szmax); 3796 3797 /* 3798 * Make sure that various parameters based on MTU are 3799 * still valid, after the MTU change. 3800 */ 3801 hn_mtu_change_fixup(sc); 3802 3803 /* 3804 * All done! Resume the interface now. 3805 */ 3806 hn_resume(sc); 3807 3808 if ((sc->hn_flags & HN_FLAG_RXVF) || 3809 (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) { 3810 /* 3811 * Since we have reattached the NVS part, 3812 * change the datapath to VF again; in case 3813 * that it is lost, after the NVS was detached. 3814 */ 3815 hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_VF); 3816 } 3817 3818 HN_UNLOCK(sc); 3819 break; 3820 3821 case SIOCSIFFLAGS: 3822 HN_LOCK(sc); 3823 3824 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) { 3825 HN_UNLOCK(sc); 3826 break; 3827 } 3828 3829 if (hn_xpnt_vf_isready(sc)) 3830 hn_xpnt_vf_saveifflags(sc); 3831 3832 if (ifp->if_flags & IFF_UP) { 3833 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 3834 /* 3835 * Caller meight hold mutex, e.g. 3836 * bpf; use busy-wait for the RNDIS 3837 * reply. 3838 */ 3839 HN_NO_SLEEPING(sc); 3840 hn_rxfilter_config(sc); 3841 HN_SLEEPING_OK(sc); 3842 3843 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) 3844 error = hn_xpnt_vf_iocsetflags(sc); 3845 } else { 3846 hn_init_locked(sc); 3847 } 3848 } else { 3849 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 3850 hn_stop(sc, false); 3851 } 3852 sc->hn_if_flags = ifp->if_flags; 3853 3854 HN_UNLOCK(sc); 3855 break; 3856 3857 case SIOCSIFCAP: 3858 HN_LOCK(sc); 3859 3860 if (hn_xpnt_vf_isready(sc)) { 3861 ifr_vf = *ifr; 3862 strlcpy(ifr_vf.ifr_name, sc->hn_vf_ifp->if_xname, 3863 sizeof(ifr_vf.ifr_name)); 3864 error = hn_xpnt_vf_iocsetcaps(sc, &ifr_vf); 3865 HN_UNLOCK(sc); 3866 break; 3867 } 3868 3869 /* 3870 * Fix up requested capabilities w/ supported capabilities, 3871 * since the supported capabilities could have been changed. 3872 */ 3873 mask = (ifr->ifr_reqcap & ifp->if_capabilities) ^ 3874 ifp->if_capenable; 3875 3876 if (mask & IFCAP_TXCSUM) { 3877 ifp->if_capenable ^= IFCAP_TXCSUM; 3878 if (ifp->if_capenable & IFCAP_TXCSUM) 3879 ifp->if_hwassist |= HN_CSUM_IP_HWASSIST(sc); 3880 else 3881 ifp->if_hwassist &= ~HN_CSUM_IP_HWASSIST(sc); 3882 } 3883 if (mask & IFCAP_TXCSUM_IPV6) { 3884 ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; 3885 if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) 3886 ifp->if_hwassist |= HN_CSUM_IP6_HWASSIST(sc); 3887 else 3888 ifp->if_hwassist &= ~HN_CSUM_IP6_HWASSIST(sc); 3889 } 3890 3891 /* TODO: flip RNDIS offload parameters for RXCSUM. */ 3892 if (mask & IFCAP_RXCSUM) 3893 ifp->if_capenable ^= IFCAP_RXCSUM; 3894 #ifdef foo 3895 /* We can't diff IPv6 packets from IPv4 packets on RX path. */ 3896 if (mask & IFCAP_RXCSUM_IPV6) 3897 ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; 3898 #endif 3899 3900 if (mask & IFCAP_LRO) 3901 ifp->if_capenable ^= IFCAP_LRO; 3902 3903 if (mask & IFCAP_TSO4) { 3904 ifp->if_capenable ^= IFCAP_TSO4; 3905 if (ifp->if_capenable & IFCAP_TSO4) 3906 ifp->if_hwassist |= CSUM_IP_TSO; 3907 else 3908 ifp->if_hwassist &= ~CSUM_IP_TSO; 3909 } 3910 if (mask & IFCAP_TSO6) { 3911 ifp->if_capenable ^= IFCAP_TSO6; 3912 if (ifp->if_capenable & IFCAP_TSO6) 3913 ifp->if_hwassist |= CSUM_IP6_TSO; 3914 else 3915 ifp->if_hwassist &= ~CSUM_IP6_TSO; 3916 } 3917 3918 HN_UNLOCK(sc); 3919 break; 3920 3921 case SIOCADDMULTI: 3922 case SIOCDELMULTI: 3923 HN_LOCK(sc); 3924 3925 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) { 3926 HN_UNLOCK(sc); 3927 break; 3928 } 3929 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 3930 /* 3931 * Multicast uses mutex; use busy-wait for 3932 * the RNDIS reply. 3933 */ 3934 HN_NO_SLEEPING(sc); 3935 hn_rxfilter_config(sc); 3936 HN_SLEEPING_OK(sc); 3937 } 3938 3939 /* XXX vlan(4) style mcast addr maintenance */ 3940 if (hn_xpnt_vf_isready(sc)) { 3941 int old_if_flags; 3942 3943 old_if_flags = sc->hn_vf_ifp->if_flags; 3944 hn_xpnt_vf_saveifflags(sc); 3945 3946 if ((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) && 3947 ((old_if_flags ^ sc->hn_vf_ifp->if_flags) & 3948 IFF_ALLMULTI)) 3949 error = hn_xpnt_vf_iocsetflags(sc); 3950 } 3951 3952 HN_UNLOCK(sc); 3953 break; 3954 3955 case SIOCSIFMEDIA: 3956 case SIOCGIFMEDIA: 3957 HN_LOCK(sc); 3958 if (hn_xpnt_vf_isready(sc)) { 3959 /* 3960 * SIOCGIFMEDIA expects ifmediareq, so don't 3961 * create and pass ifr_vf to the VF here; just 3962 * replace the ifr_name. 3963 */ 3964 vf_ifp = sc->hn_vf_ifp; 3965 strlcpy(ifr->ifr_name, vf_ifp->if_xname, 3966 sizeof(ifr->ifr_name)); 3967 error = vf_ifp->if_ioctl(vf_ifp, cmd, data); 3968 /* Restore the ifr_name. */ 3969 strlcpy(ifr->ifr_name, ifp->if_xname, 3970 sizeof(ifr->ifr_name)); 3971 HN_UNLOCK(sc); 3972 break; 3973 } 3974 HN_UNLOCK(sc); 3975 error = ifmedia_ioctl(ifp, ifr, &sc->hn_media, cmd); 3976 break; 3977 3978 case SIOCGIFRSSHASH: 3979 ifrh = (struct ifrsshash *)data; 3980 HN_LOCK(sc); 3981 if (sc->hn_rx_ring_inuse == 1) { 3982 HN_UNLOCK(sc); 3983 ifrh->ifrh_func = RSS_FUNC_NONE; 3984 ifrh->ifrh_types = 0; 3985 break; 3986 } 3987 3988 if (sc->hn_rss_hash & NDIS_HASH_FUNCTION_TOEPLITZ) 3989 ifrh->ifrh_func = RSS_FUNC_TOEPLITZ; 3990 else 3991 ifrh->ifrh_func = RSS_FUNC_PRIVATE; 3992 ifrh->ifrh_types = hn_rss_type_fromndis(sc->hn_rss_hash); 3993 HN_UNLOCK(sc); 3994 break; 3995 3996 case SIOCGIFRSSKEY: 3997 ifrk = (struct ifrsskey *)data; 3998 HN_LOCK(sc); 3999 if (sc->hn_rx_ring_inuse == 1) { 4000 HN_UNLOCK(sc); 4001 ifrk->ifrk_func = RSS_FUNC_NONE; 4002 ifrk->ifrk_keylen = 0; 4003 break; 4004 } 4005 if (sc->hn_rss_hash & NDIS_HASH_FUNCTION_TOEPLITZ) 4006 ifrk->ifrk_func = RSS_FUNC_TOEPLITZ; 4007 else 4008 ifrk->ifrk_func = RSS_FUNC_PRIVATE; 4009 ifrk->ifrk_keylen = NDIS_HASH_KEYSIZE_TOEPLITZ; 4010 memcpy(ifrk->ifrk_key, sc->hn_rss.rss_key, 4011 NDIS_HASH_KEYSIZE_TOEPLITZ); 4012 HN_UNLOCK(sc); 4013 break; 4014 4015 default: 4016 error = ether_ioctl(ifp, cmd, data); 4017 break; 4018 } 4019 return (error); 4020 } 4021 4022 static void 4023 hn_stop(struct hn_softc *sc, bool detaching) 4024 { 4025 struct ifnet *ifp = sc->hn_ifp; 4026 int i; 4027 4028 HN_LOCK_ASSERT(sc); 4029 4030 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, 4031 ("synthetic parts were not attached")); 4032 4033 /* Clear RUNNING bit ASAP. */ 4034 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_RUNNING); 4035 4036 /* Disable polling. */ 4037 hn_polling(sc, 0); 4038 4039 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) { 4040 KASSERT(sc->hn_vf_ifp != NULL, 4041 ("%s: VF is not attached", ifp->if_xname)); 4042 4043 /* Mark transparent mode VF as disabled. */ 4044 hn_xpnt_vf_setdisable(sc, false /* keep hn_vf_ifp */); 4045 4046 /* 4047 * NOTE: 4048 * Datapath setting must happen _before_ bringing 4049 * the VF down. 4050 */ 4051 hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_SYNTH); 4052 4053 /* 4054 * Bring the VF down. 4055 */ 4056 hn_xpnt_vf_saveifflags(sc); 4057 sc->hn_vf_ifp->if_flags &= ~IFF_UP; 4058 hn_xpnt_vf_iocsetflags(sc); 4059 } 4060 4061 /* Suspend data transfers. */ 4062 hn_suspend_data(sc); 4063 4064 /* Clear OACTIVE bit. */ 4065 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 4066 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) 4067 sc->hn_tx_ring[i].hn_oactive = 0; 4068 4069 /* 4070 * If the non-transparent mode VF is active, make sure 4071 * that the RX filter still allows packet reception. 4072 */ 4073 if (!detaching && (sc->hn_flags & HN_FLAG_RXVF)) 4074 hn_rxfilter_config(sc); 4075 } 4076 4077 static void 4078 hn_init_locked(struct hn_softc *sc) 4079 { 4080 struct ifnet *ifp = sc->hn_ifp; 4081 int i; 4082 4083 HN_LOCK_ASSERT(sc); 4084 4085 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) 4086 return; 4087 4088 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 4089 return; 4090 4091 /* Configure RX filter */ 4092 hn_rxfilter_config(sc); 4093 4094 /* Clear OACTIVE bit. */ 4095 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 4096 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) 4097 sc->hn_tx_ring[i].hn_oactive = 0; 4098 4099 /* Clear TX 'suspended' bit. */ 4100 hn_resume_tx(sc, sc->hn_tx_ring_inuse); 4101 4102 if (hn_xpnt_vf_isready(sc)) { 4103 /* Initialize transparent VF. */ 4104 hn_xpnt_vf_init(sc); 4105 } 4106 4107 /* Everything is ready; unleash! */ 4108 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_RUNNING); 4109 4110 /* Re-enable polling if requested. */ 4111 if (sc->hn_pollhz > 0) 4112 hn_polling(sc, sc->hn_pollhz); 4113 } 4114 4115 static void 4116 hn_init(void *xsc) 4117 { 4118 struct hn_softc *sc = xsc; 4119 4120 HN_LOCK(sc); 4121 hn_init_locked(sc); 4122 HN_UNLOCK(sc); 4123 } 4124 4125 #if __FreeBSD_version >= 1100099 4126 4127 static int 4128 hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS) 4129 { 4130 struct hn_softc *sc = arg1; 4131 unsigned int lenlim; 4132 int error; 4133 4134 lenlim = sc->hn_rx_ring[0].hn_lro.lro_length_lim; 4135 error = sysctl_handle_int(oidp, &lenlim, 0, req); 4136 if (error || req->newptr == NULL) 4137 return error; 4138 4139 HN_LOCK(sc); 4140 if (lenlim < HN_LRO_LENLIM_MIN(sc->hn_ifp) || 4141 lenlim > TCP_LRO_LENGTH_MAX) { 4142 HN_UNLOCK(sc); 4143 return EINVAL; 4144 } 4145 hn_set_lro_lenlim(sc, lenlim); 4146 HN_UNLOCK(sc); 4147 4148 return 0; 4149 } 4150 4151 static int 4152 hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS) 4153 { 4154 struct hn_softc *sc = arg1; 4155 int ackcnt, error, i; 4156 4157 /* 4158 * lro_ackcnt_lim is append count limit, 4159 * +1 to turn it into aggregation limit. 4160 */ 4161 ackcnt = sc->hn_rx_ring[0].hn_lro.lro_ackcnt_lim + 1; 4162 error = sysctl_handle_int(oidp, &ackcnt, 0, req); 4163 if (error || req->newptr == NULL) 4164 return error; 4165 4166 if (ackcnt < 2 || ackcnt > (TCP_LRO_ACKCNT_MAX + 1)) 4167 return EINVAL; 4168 4169 /* 4170 * Convert aggregation limit back to append 4171 * count limit. 4172 */ 4173 --ackcnt; 4174 HN_LOCK(sc); 4175 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 4176 sc->hn_rx_ring[i].hn_lro.lro_ackcnt_lim = ackcnt; 4177 HN_UNLOCK(sc); 4178 return 0; 4179 } 4180 4181 #endif 4182 4183 static int 4184 hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS) 4185 { 4186 struct hn_softc *sc = arg1; 4187 int hcsum = arg2; 4188 int on, error, i; 4189 4190 on = 0; 4191 if (sc->hn_rx_ring[0].hn_trust_hcsum & hcsum) 4192 on = 1; 4193 4194 error = sysctl_handle_int(oidp, &on, 0, req); 4195 if (error || req->newptr == NULL) 4196 return error; 4197 4198 HN_LOCK(sc); 4199 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4200 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 4201 4202 if (on) 4203 rxr->hn_trust_hcsum |= hcsum; 4204 else 4205 rxr->hn_trust_hcsum &= ~hcsum; 4206 } 4207 HN_UNLOCK(sc); 4208 return 0; 4209 } 4210 4211 static int 4212 hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS) 4213 { 4214 struct hn_softc *sc = arg1; 4215 int chim_size, error; 4216 4217 chim_size = sc->hn_tx_ring[0].hn_chim_size; 4218 error = sysctl_handle_int(oidp, &chim_size, 0, req); 4219 if (error || req->newptr == NULL) 4220 return error; 4221 4222 if (chim_size > sc->hn_chim_szmax || chim_size <= 0) 4223 return EINVAL; 4224 4225 HN_LOCK(sc); 4226 hn_set_chim_size(sc, chim_size); 4227 HN_UNLOCK(sc); 4228 return 0; 4229 } 4230 4231 #if __FreeBSD_version < 1100095 4232 static int 4233 hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS) 4234 { 4235 struct hn_softc *sc = arg1; 4236 int ofs = arg2, i, error; 4237 struct hn_rx_ring *rxr; 4238 uint64_t stat; 4239 4240 stat = 0; 4241 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4242 rxr = &sc->hn_rx_ring[i]; 4243 stat += *((int *)((uint8_t *)rxr + ofs)); 4244 } 4245 4246 error = sysctl_handle_64(oidp, &stat, 0, req); 4247 if (error || req->newptr == NULL) 4248 return error; 4249 4250 /* Zero out this stat. */ 4251 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4252 rxr = &sc->hn_rx_ring[i]; 4253 *((int *)((uint8_t *)rxr + ofs)) = 0; 4254 } 4255 return 0; 4256 } 4257 #else 4258 static int 4259 hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS) 4260 { 4261 struct hn_softc *sc = arg1; 4262 int ofs = arg2, i, error; 4263 struct hn_rx_ring *rxr; 4264 uint64_t stat; 4265 4266 stat = 0; 4267 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4268 rxr = &sc->hn_rx_ring[i]; 4269 stat += *((uint64_t *)((uint8_t *)rxr + ofs)); 4270 } 4271 4272 error = sysctl_handle_64(oidp, &stat, 0, req); 4273 if (error || req->newptr == NULL) 4274 return error; 4275 4276 /* Zero out this stat. */ 4277 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4278 rxr = &sc->hn_rx_ring[i]; 4279 *((uint64_t *)((uint8_t *)rxr + ofs)) = 0; 4280 } 4281 return 0; 4282 } 4283 4284 #endif 4285 4286 static int 4287 hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS) 4288 { 4289 struct hn_softc *sc = arg1; 4290 int ofs = arg2, i, error; 4291 struct hn_rx_ring *rxr; 4292 u_long stat; 4293 4294 stat = 0; 4295 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4296 rxr = &sc->hn_rx_ring[i]; 4297 stat += *((u_long *)((uint8_t *)rxr + ofs)); 4298 } 4299 4300 error = sysctl_handle_long(oidp, &stat, 0, req); 4301 if (error || req->newptr == NULL) 4302 return error; 4303 4304 /* Zero out this stat. */ 4305 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4306 rxr = &sc->hn_rx_ring[i]; 4307 *((u_long *)((uint8_t *)rxr + ofs)) = 0; 4308 } 4309 return 0; 4310 } 4311 4312 static int 4313 hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS) 4314 { 4315 struct hn_softc *sc = arg1; 4316 int ofs = arg2, i, error; 4317 struct hn_tx_ring *txr; 4318 u_long stat; 4319 4320 stat = 0; 4321 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 4322 txr = &sc->hn_tx_ring[i]; 4323 stat += *((u_long *)((uint8_t *)txr + ofs)); 4324 } 4325 4326 error = sysctl_handle_long(oidp, &stat, 0, req); 4327 if (error || req->newptr == NULL) 4328 return error; 4329 4330 /* Zero out this stat. */ 4331 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 4332 txr = &sc->hn_tx_ring[i]; 4333 *((u_long *)((uint8_t *)txr + ofs)) = 0; 4334 } 4335 return 0; 4336 } 4337 4338 static int 4339 hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS) 4340 { 4341 struct hn_softc *sc = arg1; 4342 int ofs = arg2, i, error, conf; 4343 struct hn_tx_ring *txr; 4344 4345 txr = &sc->hn_tx_ring[0]; 4346 conf = *((int *)((uint8_t *)txr + ofs)); 4347 4348 error = sysctl_handle_int(oidp, &conf, 0, req); 4349 if (error || req->newptr == NULL) 4350 return error; 4351 4352 HN_LOCK(sc); 4353 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 4354 txr = &sc->hn_tx_ring[i]; 4355 *((int *)((uint8_t *)txr + ofs)) = conf; 4356 } 4357 HN_UNLOCK(sc); 4358 4359 return 0; 4360 } 4361 4362 static int 4363 hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS) 4364 { 4365 struct hn_softc *sc = arg1; 4366 int error, size; 4367 4368 size = sc->hn_agg_size; 4369 error = sysctl_handle_int(oidp, &size, 0, req); 4370 if (error || req->newptr == NULL) 4371 return (error); 4372 4373 HN_LOCK(sc); 4374 sc->hn_agg_size = size; 4375 hn_set_txagg(sc); 4376 HN_UNLOCK(sc); 4377 4378 return (0); 4379 } 4380 4381 static int 4382 hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS) 4383 { 4384 struct hn_softc *sc = arg1; 4385 int error, pkts; 4386 4387 pkts = sc->hn_agg_pkts; 4388 error = sysctl_handle_int(oidp, &pkts, 0, req); 4389 if (error || req->newptr == NULL) 4390 return (error); 4391 4392 HN_LOCK(sc); 4393 sc->hn_agg_pkts = pkts; 4394 hn_set_txagg(sc); 4395 HN_UNLOCK(sc); 4396 4397 return (0); 4398 } 4399 4400 static int 4401 hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS) 4402 { 4403 struct hn_softc *sc = arg1; 4404 int pkts; 4405 4406 pkts = sc->hn_tx_ring[0].hn_agg_pktmax; 4407 return (sysctl_handle_int(oidp, &pkts, 0, req)); 4408 } 4409 4410 static int 4411 hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS) 4412 { 4413 struct hn_softc *sc = arg1; 4414 int align; 4415 4416 align = sc->hn_tx_ring[0].hn_agg_align; 4417 return (sysctl_handle_int(oidp, &align, 0, req)); 4418 } 4419 4420 static void 4421 hn_chan_polling(struct vmbus_channel *chan, u_int pollhz) 4422 { 4423 if (pollhz == 0) 4424 vmbus_chan_poll_disable(chan); 4425 else 4426 vmbus_chan_poll_enable(chan, pollhz); 4427 } 4428 4429 static void 4430 hn_polling(struct hn_softc *sc, u_int pollhz) 4431 { 4432 int nsubch = sc->hn_rx_ring_inuse - 1; 4433 4434 HN_LOCK_ASSERT(sc); 4435 4436 if (nsubch > 0) { 4437 struct vmbus_channel **subch; 4438 int i; 4439 4440 subch = vmbus_subchan_get(sc->hn_prichan, nsubch); 4441 for (i = 0; i < nsubch; ++i) 4442 hn_chan_polling(subch[i], pollhz); 4443 vmbus_subchan_rel(subch, nsubch); 4444 } 4445 hn_chan_polling(sc->hn_prichan, pollhz); 4446 } 4447 4448 static int 4449 hn_polling_sysctl(SYSCTL_HANDLER_ARGS) 4450 { 4451 struct hn_softc *sc = arg1; 4452 int pollhz, error; 4453 4454 pollhz = sc->hn_pollhz; 4455 error = sysctl_handle_int(oidp, &pollhz, 0, req); 4456 if (error || req->newptr == NULL) 4457 return (error); 4458 4459 if (pollhz != 0 && 4460 (pollhz < VMBUS_CHAN_POLLHZ_MIN || pollhz > VMBUS_CHAN_POLLHZ_MAX)) 4461 return (EINVAL); 4462 4463 HN_LOCK(sc); 4464 if (sc->hn_pollhz != pollhz) { 4465 sc->hn_pollhz = pollhz; 4466 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) && 4467 (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED)) 4468 hn_polling(sc, sc->hn_pollhz); 4469 } 4470 HN_UNLOCK(sc); 4471 4472 return (0); 4473 } 4474 4475 static int 4476 hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS) 4477 { 4478 struct hn_softc *sc = arg1; 4479 char verstr[16]; 4480 4481 snprintf(verstr, sizeof(verstr), "%u.%u", 4482 HN_NDIS_VERSION_MAJOR(sc->hn_ndis_ver), 4483 HN_NDIS_VERSION_MINOR(sc->hn_ndis_ver)); 4484 return sysctl_handle_string(oidp, verstr, sizeof(verstr), req); 4485 } 4486 4487 static int 4488 hn_caps_sysctl(SYSCTL_HANDLER_ARGS) 4489 { 4490 struct hn_softc *sc = arg1; 4491 char caps_str[128]; 4492 uint32_t caps; 4493 4494 HN_LOCK(sc); 4495 caps = sc->hn_caps; 4496 HN_UNLOCK(sc); 4497 snprintf(caps_str, sizeof(caps_str), "%b", caps, HN_CAP_BITS); 4498 return sysctl_handle_string(oidp, caps_str, sizeof(caps_str), req); 4499 } 4500 4501 static int 4502 hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS) 4503 { 4504 struct hn_softc *sc = arg1; 4505 char assist_str[128]; 4506 uint32_t hwassist; 4507 4508 HN_LOCK(sc); 4509 hwassist = sc->hn_ifp->if_hwassist; 4510 HN_UNLOCK(sc); 4511 snprintf(assist_str, sizeof(assist_str), "%b", hwassist, CSUM_BITS); 4512 return sysctl_handle_string(oidp, assist_str, sizeof(assist_str), req); 4513 } 4514 4515 static int 4516 hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS) 4517 { 4518 struct hn_softc *sc = arg1; 4519 char filter_str[128]; 4520 uint32_t filter; 4521 4522 HN_LOCK(sc); 4523 filter = sc->hn_rx_filter; 4524 HN_UNLOCK(sc); 4525 snprintf(filter_str, sizeof(filter_str), "%b", filter, 4526 NDIS_PACKET_TYPES); 4527 return sysctl_handle_string(oidp, filter_str, sizeof(filter_str), req); 4528 } 4529 4530 #ifndef RSS 4531 4532 static int 4533 hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS) 4534 { 4535 struct hn_softc *sc = arg1; 4536 int error; 4537 4538 HN_LOCK(sc); 4539 4540 error = SYSCTL_OUT(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key)); 4541 if (error || req->newptr == NULL) 4542 goto back; 4543 4544 if ((sc->hn_flags & HN_FLAG_RXVF) || 4545 (hn_xpnt_vf && sc->hn_vf_ifp != NULL)) { 4546 /* 4547 * RSS key is synchronized w/ VF's, don't allow users 4548 * to change it. 4549 */ 4550 error = EBUSY; 4551 goto back; 4552 } 4553 4554 error = SYSCTL_IN(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key)); 4555 if (error) 4556 goto back; 4557 sc->hn_flags |= HN_FLAG_HAS_RSSKEY; 4558 4559 if (sc->hn_rx_ring_inuse > 1) { 4560 error = hn_rss_reconfig(sc); 4561 } else { 4562 /* Not RSS capable, at least for now; just save the RSS key. */ 4563 error = 0; 4564 } 4565 back: 4566 HN_UNLOCK(sc); 4567 return (error); 4568 } 4569 4570 static int 4571 hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS) 4572 { 4573 struct hn_softc *sc = arg1; 4574 int error; 4575 4576 HN_LOCK(sc); 4577 4578 error = SYSCTL_OUT(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind)); 4579 if (error || req->newptr == NULL) 4580 goto back; 4581 4582 /* 4583 * Don't allow RSS indirect table change, if this interface is not 4584 * RSS capable currently. 4585 */ 4586 if (sc->hn_rx_ring_inuse == 1) { 4587 error = EOPNOTSUPP; 4588 goto back; 4589 } 4590 4591 error = SYSCTL_IN(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind)); 4592 if (error) 4593 goto back; 4594 sc->hn_flags |= HN_FLAG_HAS_RSSIND; 4595 4596 hn_rss_ind_fixup(sc); 4597 error = hn_rss_reconfig(sc); 4598 back: 4599 HN_UNLOCK(sc); 4600 return (error); 4601 } 4602 4603 #endif /* !RSS */ 4604 4605 static int 4606 hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS) 4607 { 4608 struct hn_softc *sc = arg1; 4609 char hash_str[128]; 4610 uint32_t hash; 4611 4612 HN_LOCK(sc); 4613 hash = sc->hn_rss_hash; 4614 HN_UNLOCK(sc); 4615 snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS); 4616 return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req); 4617 } 4618 4619 static int 4620 hn_rss_hcap_sysctl(SYSCTL_HANDLER_ARGS) 4621 { 4622 struct hn_softc *sc = arg1; 4623 char hash_str[128]; 4624 uint32_t hash; 4625 4626 HN_LOCK(sc); 4627 hash = sc->hn_rss_hcap; 4628 HN_UNLOCK(sc); 4629 snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS); 4630 return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req); 4631 } 4632 4633 static int 4634 hn_rss_mbuf_sysctl(SYSCTL_HANDLER_ARGS) 4635 { 4636 struct hn_softc *sc = arg1; 4637 char hash_str[128]; 4638 uint32_t hash; 4639 4640 HN_LOCK(sc); 4641 hash = sc->hn_rx_ring[0].hn_mbuf_hash; 4642 HN_UNLOCK(sc); 4643 snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS); 4644 return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req); 4645 } 4646 4647 static int 4648 hn_vf_sysctl(SYSCTL_HANDLER_ARGS) 4649 { 4650 struct hn_softc *sc = arg1; 4651 char vf_name[IFNAMSIZ + 1]; 4652 struct ifnet *vf_ifp; 4653 4654 HN_LOCK(sc); 4655 vf_name[0] = '\0'; 4656 vf_ifp = sc->hn_vf_ifp; 4657 if (vf_ifp != NULL) 4658 snprintf(vf_name, sizeof(vf_name), "%s", vf_ifp->if_xname); 4659 HN_UNLOCK(sc); 4660 return sysctl_handle_string(oidp, vf_name, sizeof(vf_name), req); 4661 } 4662 4663 static int 4664 hn_rxvf_sysctl(SYSCTL_HANDLER_ARGS) 4665 { 4666 struct hn_softc *sc = arg1; 4667 char vf_name[IFNAMSIZ + 1]; 4668 struct ifnet *vf_ifp; 4669 4670 HN_LOCK(sc); 4671 vf_name[0] = '\0'; 4672 vf_ifp = sc->hn_rx_ring[0].hn_rxvf_ifp; 4673 if (vf_ifp != NULL) 4674 snprintf(vf_name, sizeof(vf_name), "%s", vf_ifp->if_xname); 4675 HN_UNLOCK(sc); 4676 return sysctl_handle_string(oidp, vf_name, sizeof(vf_name), req); 4677 } 4678 4679 static int 4680 hn_vflist_sysctl(SYSCTL_HANDLER_ARGS) 4681 { 4682 struct rm_priotracker pt; 4683 struct sbuf *sb; 4684 int error, i; 4685 bool first; 4686 4687 error = sysctl_wire_old_buffer(req, 0); 4688 if (error != 0) 4689 return (error); 4690 4691 sb = sbuf_new_for_sysctl(NULL, NULL, 128, req); 4692 if (sb == NULL) 4693 return (ENOMEM); 4694 4695 rm_rlock(&hn_vfmap_lock, &pt); 4696 4697 first = true; 4698 for (i = 0; i < hn_vfmap_size; ++i) { 4699 struct ifnet *ifp; 4700 4701 if (hn_vfmap[i] == NULL) 4702 continue; 4703 4704 ifp = ifnet_byindex(i); 4705 if (ifp != NULL) { 4706 if (first) 4707 sbuf_printf(sb, "%s", ifp->if_xname); 4708 else 4709 sbuf_printf(sb, " %s", ifp->if_xname); 4710 first = false; 4711 } 4712 } 4713 4714 rm_runlock(&hn_vfmap_lock, &pt); 4715 4716 error = sbuf_finish(sb); 4717 sbuf_delete(sb); 4718 return (error); 4719 } 4720 4721 static int 4722 hn_vfmap_sysctl(SYSCTL_HANDLER_ARGS) 4723 { 4724 struct rm_priotracker pt; 4725 struct sbuf *sb; 4726 int error, i; 4727 bool first; 4728 4729 error = sysctl_wire_old_buffer(req, 0); 4730 if (error != 0) 4731 return (error); 4732 4733 sb = sbuf_new_for_sysctl(NULL, NULL, 128, req); 4734 if (sb == NULL) 4735 return (ENOMEM); 4736 4737 rm_rlock(&hn_vfmap_lock, &pt); 4738 4739 first = true; 4740 for (i = 0; i < hn_vfmap_size; ++i) { 4741 struct ifnet *ifp, *hn_ifp; 4742 4743 hn_ifp = hn_vfmap[i]; 4744 if (hn_ifp == NULL) 4745 continue; 4746 4747 ifp = ifnet_byindex(i); 4748 if (ifp != NULL) { 4749 if (first) { 4750 sbuf_printf(sb, "%s:%s", ifp->if_xname, 4751 hn_ifp->if_xname); 4752 } else { 4753 sbuf_printf(sb, " %s:%s", ifp->if_xname, 4754 hn_ifp->if_xname); 4755 } 4756 first = false; 4757 } 4758 } 4759 4760 rm_runlock(&hn_vfmap_lock, &pt); 4761 4762 error = sbuf_finish(sb); 4763 sbuf_delete(sb); 4764 return (error); 4765 } 4766 4767 static int 4768 hn_xpnt_vf_accbpf_sysctl(SYSCTL_HANDLER_ARGS) 4769 { 4770 struct hn_softc *sc = arg1; 4771 int error, onoff = 0; 4772 4773 if (sc->hn_xvf_flags & HN_XVFFLAG_ACCBPF) 4774 onoff = 1; 4775 error = sysctl_handle_int(oidp, &onoff, 0, req); 4776 if (error || req->newptr == NULL) 4777 return (error); 4778 4779 HN_LOCK(sc); 4780 /* NOTE: hn_vf_lock for hn_transmit() */ 4781 rm_wlock(&sc->hn_vf_lock); 4782 if (onoff) 4783 sc->hn_xvf_flags |= HN_XVFFLAG_ACCBPF; 4784 else 4785 sc->hn_xvf_flags &= ~HN_XVFFLAG_ACCBPF; 4786 rm_wunlock(&sc->hn_vf_lock); 4787 HN_UNLOCK(sc); 4788 4789 return (0); 4790 } 4791 4792 static int 4793 hn_xpnt_vf_enabled_sysctl(SYSCTL_HANDLER_ARGS) 4794 { 4795 struct hn_softc *sc = arg1; 4796 int enabled = 0; 4797 4798 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) 4799 enabled = 1; 4800 return (sysctl_handle_int(oidp, &enabled, 0, req)); 4801 } 4802 4803 static int 4804 hn_check_iplen(const struct mbuf *m, int hoff) 4805 { 4806 const struct ip *ip; 4807 int len, iphlen, iplen; 4808 const struct tcphdr *th; 4809 int thoff; /* TCP data offset */ 4810 4811 len = hoff + sizeof(struct ip); 4812 4813 /* The packet must be at least the size of an IP header. */ 4814 if (m->m_pkthdr.len < len) 4815 return IPPROTO_DONE; 4816 4817 /* The fixed IP header must reside completely in the first mbuf. */ 4818 if (m->m_len < len) 4819 return IPPROTO_DONE; 4820 4821 ip = mtodo(m, hoff); 4822 4823 /* Bound check the packet's stated IP header length. */ 4824 iphlen = ip->ip_hl << 2; 4825 if (iphlen < sizeof(struct ip)) /* minimum header length */ 4826 return IPPROTO_DONE; 4827 4828 /* The full IP header must reside completely in the one mbuf. */ 4829 if (m->m_len < hoff + iphlen) 4830 return IPPROTO_DONE; 4831 4832 iplen = ntohs(ip->ip_len); 4833 4834 /* 4835 * Check that the amount of data in the buffers is as 4836 * at least much as the IP header would have us expect. 4837 */ 4838 if (m->m_pkthdr.len < hoff + iplen) 4839 return IPPROTO_DONE; 4840 4841 /* 4842 * Ignore IP fragments. 4843 */ 4844 if (ntohs(ip->ip_off) & (IP_OFFMASK | IP_MF)) 4845 return IPPROTO_DONE; 4846 4847 /* 4848 * The TCP/IP or UDP/IP header must be entirely contained within 4849 * the first fragment of a packet. 4850 */ 4851 switch (ip->ip_p) { 4852 case IPPROTO_TCP: 4853 if (iplen < iphlen + sizeof(struct tcphdr)) 4854 return IPPROTO_DONE; 4855 if (m->m_len < hoff + iphlen + sizeof(struct tcphdr)) 4856 return IPPROTO_DONE; 4857 th = (const struct tcphdr *)((const uint8_t *)ip + iphlen); 4858 thoff = th->th_off << 2; 4859 if (thoff < sizeof(struct tcphdr) || thoff + iphlen > iplen) 4860 return IPPROTO_DONE; 4861 if (m->m_len < hoff + iphlen + thoff) 4862 return IPPROTO_DONE; 4863 break; 4864 case IPPROTO_UDP: 4865 if (iplen < iphlen + sizeof(struct udphdr)) 4866 return IPPROTO_DONE; 4867 if (m->m_len < hoff + iphlen + sizeof(struct udphdr)) 4868 return IPPROTO_DONE; 4869 break; 4870 default: 4871 if (iplen < iphlen) 4872 return IPPROTO_DONE; 4873 break; 4874 } 4875 return ip->ip_p; 4876 } 4877 4878 static void 4879 hn_rxpkt_proto(const struct mbuf *m_new, int *l3proto, int *l4proto) 4880 { 4881 const struct ether_header *eh; 4882 uint16_t etype; 4883 int hoff; 4884 4885 hoff = sizeof(*eh); 4886 /* Checked at the beginning of this function. */ 4887 KASSERT(m_new->m_len >= hoff, ("not ethernet frame")); 4888 4889 eh = mtod(m_new, const struct ether_header *); 4890 etype = ntohs(eh->ether_type); 4891 if (etype == ETHERTYPE_VLAN) { 4892 const struct ether_vlan_header *evl; 4893 4894 hoff = sizeof(*evl); 4895 if (m_new->m_len < hoff) 4896 return; 4897 evl = mtod(m_new, const struct ether_vlan_header *); 4898 etype = ntohs(evl->evl_proto); 4899 } 4900 *l3proto = etype; 4901 4902 if (etype == ETHERTYPE_IP) 4903 *l4proto = hn_check_iplen(m_new, hoff); 4904 else 4905 *l4proto = IPPROTO_DONE; 4906 } 4907 4908 static int 4909 hn_create_rx_data(struct hn_softc *sc, int ring_cnt) 4910 { 4911 struct sysctl_oid_list *child; 4912 struct sysctl_ctx_list *ctx; 4913 device_t dev = sc->hn_dev; 4914 #if defined(INET) || defined(INET6) 4915 #if __FreeBSD_version >= 1100095 4916 int lroent_cnt; 4917 #endif 4918 #endif 4919 int i; 4920 4921 /* 4922 * Create RXBUF for reception. 4923 * 4924 * NOTE: 4925 * - It is shared by all channels. 4926 * - A large enough buffer is allocated, certain version of NVSes 4927 * may further limit the usable space. 4928 */ 4929 sc->hn_rxbuf = hyperv_dmamem_alloc(bus_get_dma_tag(dev), 4930 PAGE_SIZE, 0, HN_RXBUF_SIZE, &sc->hn_rxbuf_dma, 4931 BUS_DMA_WAITOK | BUS_DMA_ZERO); 4932 if (sc->hn_rxbuf == NULL) { 4933 device_printf(sc->hn_dev, "allocate rxbuf failed\n"); 4934 return (ENOMEM); 4935 } 4936 4937 sc->hn_rx_ring_cnt = ring_cnt; 4938 sc->hn_rx_ring_inuse = sc->hn_rx_ring_cnt; 4939 4940 sc->hn_rx_ring = malloc(sizeof(struct hn_rx_ring) * sc->hn_rx_ring_cnt, 4941 M_DEVBUF, M_WAITOK | M_ZERO); 4942 4943 #if defined(INET) || defined(INET6) 4944 #if __FreeBSD_version >= 1100095 4945 lroent_cnt = hn_lro_entry_count; 4946 if (lroent_cnt < TCP_LRO_ENTRIES) 4947 lroent_cnt = TCP_LRO_ENTRIES; 4948 if (bootverbose) 4949 device_printf(dev, "LRO: entry count %d\n", lroent_cnt); 4950 #endif 4951 #endif /* INET || INET6 */ 4952 4953 ctx = device_get_sysctl_ctx(dev); 4954 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); 4955 4956 /* Create dev.hn.UNIT.rx sysctl tree */ 4957 sc->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rx", 4958 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 4959 4960 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4961 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 4962 4963 rxr->hn_br = hyperv_dmamem_alloc(bus_get_dma_tag(dev), 4964 PAGE_SIZE, 0, HN_TXBR_SIZE + HN_RXBR_SIZE, 4965 &rxr->hn_br_dma, BUS_DMA_WAITOK); 4966 if (rxr->hn_br == NULL) { 4967 device_printf(dev, "allocate bufring failed\n"); 4968 return (ENOMEM); 4969 } 4970 4971 if (hn_trust_hosttcp) 4972 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_TCP; 4973 if (hn_trust_hostudp) 4974 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_UDP; 4975 if (hn_trust_hostip) 4976 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_IP; 4977 rxr->hn_mbuf_hash = NDIS_HASH_ALL; 4978 rxr->hn_ifp = sc->hn_ifp; 4979 if (i < sc->hn_tx_ring_cnt) 4980 rxr->hn_txr = &sc->hn_tx_ring[i]; 4981 rxr->hn_pktbuf_len = HN_PKTBUF_LEN_DEF; 4982 rxr->hn_pktbuf = malloc(rxr->hn_pktbuf_len, M_DEVBUF, M_WAITOK); 4983 rxr->hn_rx_idx = i; 4984 rxr->hn_rxbuf = sc->hn_rxbuf; 4985 4986 /* 4987 * Initialize LRO. 4988 */ 4989 #if defined(INET) || defined(INET6) 4990 #if __FreeBSD_version >= 1100095 4991 tcp_lro_init_args(&rxr->hn_lro, sc->hn_ifp, lroent_cnt, 4992 hn_lro_mbufq_depth); 4993 #else 4994 tcp_lro_init(&rxr->hn_lro); 4995 rxr->hn_lro.ifp = sc->hn_ifp; 4996 #endif 4997 #if __FreeBSD_version >= 1100099 4998 rxr->hn_lro.lro_length_lim = HN_LRO_LENLIM_DEF; 4999 rxr->hn_lro.lro_ackcnt_lim = HN_LRO_ACKCNT_DEF; 5000 #endif 5001 #endif /* INET || INET6 */ 5002 5003 if (sc->hn_rx_sysctl_tree != NULL) { 5004 char name[16]; 5005 5006 /* 5007 * Create per RX ring sysctl tree: 5008 * dev.hn.UNIT.rx.RINGID 5009 */ 5010 snprintf(name, sizeof(name), "%d", i); 5011 rxr->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, 5012 SYSCTL_CHILDREN(sc->hn_rx_sysctl_tree), 5013 OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 5014 5015 if (rxr->hn_rx_sysctl_tree != NULL) { 5016 SYSCTL_ADD_ULONG(ctx, 5017 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), 5018 OID_AUTO, "packets", CTLFLAG_RW, 5019 &rxr->hn_pkts, "# of packets received"); 5020 SYSCTL_ADD_ULONG(ctx, 5021 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), 5022 OID_AUTO, "rss_pkts", CTLFLAG_RW, 5023 &rxr->hn_rss_pkts, 5024 "# of packets w/ RSS info received"); 5025 SYSCTL_ADD_INT(ctx, 5026 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), 5027 OID_AUTO, "pktbuf_len", CTLFLAG_RD, 5028 &rxr->hn_pktbuf_len, 0, 5029 "Temporary channel packet buffer length"); 5030 } 5031 } 5032 } 5033 5034 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_queued", 5035 CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5036 __offsetof(struct hn_rx_ring, hn_lro.lro_queued), 5037 #if __FreeBSD_version < 1100095 5038 hn_rx_stat_int_sysctl, 5039 #else 5040 hn_rx_stat_u64_sysctl, 5041 #endif 5042 "LU", "LRO queued"); 5043 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_flushed", 5044 CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5045 __offsetof(struct hn_rx_ring, hn_lro.lro_flushed), 5046 #if __FreeBSD_version < 1100095 5047 hn_rx_stat_int_sysctl, 5048 #else 5049 hn_rx_stat_u64_sysctl, 5050 #endif 5051 "LU", "LRO flushed"); 5052 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_tried", 5053 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5054 __offsetof(struct hn_rx_ring, hn_lro_tried), 5055 hn_rx_stat_ulong_sysctl, "LU", "# of LRO tries"); 5056 #if __FreeBSD_version >= 1100099 5057 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_length_lim", 5058 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 5059 hn_lro_lenlim_sysctl, "IU", 5060 "Max # of data bytes to be aggregated by LRO"); 5061 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_ackcnt_lim", 5062 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 5063 hn_lro_ackcnt_sysctl, "I", 5064 "Max # of ACKs to be aggregated by LRO"); 5065 #endif 5066 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hosttcp", 5067 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_TCP, 5068 hn_trust_hcsum_sysctl, "I", 5069 "Trust tcp segement verification on host side, " 5070 "when csum info is missing"); 5071 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostudp", 5072 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_UDP, 5073 hn_trust_hcsum_sysctl, "I", 5074 "Trust udp datagram verification on host side, " 5075 "when csum info is missing"); 5076 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostip", 5077 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_IP, 5078 hn_trust_hcsum_sysctl, "I", 5079 "Trust ip packet verification on host side, " 5080 "when csum info is missing"); 5081 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_ip", 5082 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5083 __offsetof(struct hn_rx_ring, hn_csum_ip), 5084 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM IP"); 5085 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_tcp", 5086 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5087 __offsetof(struct hn_rx_ring, hn_csum_tcp), 5088 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM TCP"); 5089 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_udp", 5090 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5091 __offsetof(struct hn_rx_ring, hn_csum_udp), 5092 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM UDP"); 5093 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_trusted", 5094 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5095 __offsetof(struct hn_rx_ring, hn_csum_trusted), 5096 hn_rx_stat_ulong_sysctl, "LU", 5097 "# of packets that we trust host's csum verification"); 5098 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "small_pkts", 5099 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5100 __offsetof(struct hn_rx_ring, hn_small_pkts), 5101 hn_rx_stat_ulong_sysctl, "LU", "# of small packets received"); 5102 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_ack_failed", 5103 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5104 __offsetof(struct hn_rx_ring, hn_ack_failed), 5105 hn_rx_stat_ulong_sysctl, "LU", "# of RXBUF ack failures"); 5106 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_cnt", 5107 CTLFLAG_RD, &sc->hn_rx_ring_cnt, 0, "# created RX rings"); 5108 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_inuse", 5109 CTLFLAG_RD, &sc->hn_rx_ring_inuse, 0, "# used RX rings"); 5110 5111 return (0); 5112 } 5113 5114 static void 5115 hn_destroy_rx_data(struct hn_softc *sc) 5116 { 5117 int i; 5118 5119 if (sc->hn_rxbuf != NULL) { 5120 if ((sc->hn_flags & HN_FLAG_RXBUF_REF) == 0) 5121 hyperv_dmamem_free(&sc->hn_rxbuf_dma, sc->hn_rxbuf); 5122 else 5123 device_printf(sc->hn_dev, "RXBUF is referenced\n"); 5124 sc->hn_rxbuf = NULL; 5125 } 5126 5127 if (sc->hn_rx_ring_cnt == 0) 5128 return; 5129 5130 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 5131 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 5132 5133 if (rxr->hn_br == NULL) 5134 continue; 5135 if ((rxr->hn_rx_flags & HN_RX_FLAG_BR_REF) == 0) { 5136 hyperv_dmamem_free(&rxr->hn_br_dma, rxr->hn_br); 5137 } else { 5138 device_printf(sc->hn_dev, 5139 "%dth channel bufring is referenced", i); 5140 } 5141 rxr->hn_br = NULL; 5142 5143 #if defined(INET) || defined(INET6) 5144 tcp_lro_free(&rxr->hn_lro); 5145 #endif 5146 free(rxr->hn_pktbuf, M_DEVBUF); 5147 } 5148 free(sc->hn_rx_ring, M_DEVBUF); 5149 sc->hn_rx_ring = NULL; 5150 5151 sc->hn_rx_ring_cnt = 0; 5152 sc->hn_rx_ring_inuse = 0; 5153 } 5154 5155 static int 5156 hn_tx_ring_create(struct hn_softc *sc, int id) 5157 { 5158 struct hn_tx_ring *txr = &sc->hn_tx_ring[id]; 5159 device_t dev = sc->hn_dev; 5160 bus_dma_tag_t parent_dtag; 5161 int error, i; 5162 5163 txr->hn_sc = sc; 5164 txr->hn_tx_idx = id; 5165 5166 #ifndef HN_USE_TXDESC_BUFRING 5167 mtx_init(&txr->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN); 5168 #endif 5169 mtx_init(&txr->hn_tx_lock, "hn tx", NULL, MTX_DEF); 5170 5171 txr->hn_txdesc_cnt = HN_TX_DESC_CNT; 5172 txr->hn_txdesc = malloc(sizeof(struct hn_txdesc) * txr->hn_txdesc_cnt, 5173 M_DEVBUF, M_WAITOK | M_ZERO); 5174 #ifndef HN_USE_TXDESC_BUFRING 5175 SLIST_INIT(&txr->hn_txlist); 5176 #else 5177 txr->hn_txdesc_br = buf_ring_alloc(txr->hn_txdesc_cnt, M_DEVBUF, 5178 M_WAITOK, &txr->hn_tx_lock); 5179 #endif 5180 5181 if (hn_tx_taskq_mode == HN_TX_TASKQ_M_EVTTQ) { 5182 txr->hn_tx_taskq = VMBUS_GET_EVENT_TASKQ( 5183 device_get_parent(dev), dev, HN_RING_IDX2CPU(sc, id)); 5184 } else { 5185 txr->hn_tx_taskq = sc->hn_tx_taskqs[id % hn_tx_taskq_cnt]; 5186 } 5187 5188 #ifdef HN_IFSTART_SUPPORT 5189 if (hn_use_if_start) { 5190 txr->hn_txeof = hn_start_txeof; 5191 TASK_INIT(&txr->hn_tx_task, 0, hn_start_taskfunc, txr); 5192 TASK_INIT(&txr->hn_txeof_task, 0, hn_start_txeof_taskfunc, txr); 5193 } else 5194 #endif 5195 { 5196 int br_depth; 5197 5198 txr->hn_txeof = hn_xmit_txeof; 5199 TASK_INIT(&txr->hn_tx_task, 0, hn_xmit_taskfunc, txr); 5200 TASK_INIT(&txr->hn_txeof_task, 0, hn_xmit_txeof_taskfunc, txr); 5201 5202 br_depth = hn_get_txswq_depth(txr); 5203 txr->hn_mbuf_br = buf_ring_alloc(br_depth, M_DEVBUF, 5204 M_WAITOK, &txr->hn_tx_lock); 5205 } 5206 5207 txr->hn_direct_tx_size = hn_direct_tx_size; 5208 5209 /* 5210 * Always schedule transmission instead of trying to do direct 5211 * transmission. This one gives the best performance so far. 5212 */ 5213 txr->hn_sched_tx = 1; 5214 5215 parent_dtag = bus_get_dma_tag(dev); 5216 5217 /* DMA tag for RNDIS packet messages. */ 5218 error = bus_dma_tag_create(parent_dtag, /* parent */ 5219 HN_RNDIS_PKT_ALIGN, /* alignment */ 5220 HN_RNDIS_PKT_BOUNDARY, /* boundary */ 5221 BUS_SPACE_MAXADDR, /* lowaddr */ 5222 BUS_SPACE_MAXADDR, /* highaddr */ 5223 NULL, NULL, /* filter, filterarg */ 5224 HN_RNDIS_PKT_LEN, /* maxsize */ 5225 1, /* nsegments */ 5226 HN_RNDIS_PKT_LEN, /* maxsegsize */ 5227 0, /* flags */ 5228 NULL, /* lockfunc */ 5229 NULL, /* lockfuncarg */ 5230 &txr->hn_tx_rndis_dtag); 5231 if (error) { 5232 device_printf(dev, "failed to create rndis dmatag\n"); 5233 return error; 5234 } 5235 5236 /* DMA tag for data. */ 5237 error = bus_dma_tag_create(parent_dtag, /* parent */ 5238 1, /* alignment */ 5239 HN_TX_DATA_BOUNDARY, /* boundary */ 5240 BUS_SPACE_MAXADDR, /* lowaddr */ 5241 BUS_SPACE_MAXADDR, /* highaddr */ 5242 NULL, NULL, /* filter, filterarg */ 5243 HN_TX_DATA_MAXSIZE, /* maxsize */ 5244 HN_TX_DATA_SEGCNT_MAX, /* nsegments */ 5245 HN_TX_DATA_SEGSIZE, /* maxsegsize */ 5246 0, /* flags */ 5247 NULL, /* lockfunc */ 5248 NULL, /* lockfuncarg */ 5249 &txr->hn_tx_data_dtag); 5250 if (error) { 5251 device_printf(dev, "failed to create data dmatag\n"); 5252 return error; 5253 } 5254 5255 for (i = 0; i < txr->hn_txdesc_cnt; ++i) { 5256 struct hn_txdesc *txd = &txr->hn_txdesc[i]; 5257 5258 txd->txr = txr; 5259 txd->chim_index = HN_NVS_CHIM_IDX_INVALID; 5260 STAILQ_INIT(&txd->agg_list); 5261 5262 /* 5263 * Allocate and load RNDIS packet message. 5264 */ 5265 error = bus_dmamem_alloc(txr->hn_tx_rndis_dtag, 5266 (void **)&txd->rndis_pkt, 5267 BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, 5268 &txd->rndis_pkt_dmap); 5269 if (error) { 5270 device_printf(dev, 5271 "failed to allocate rndis_packet_msg, %d\n", i); 5272 return error; 5273 } 5274 5275 error = bus_dmamap_load(txr->hn_tx_rndis_dtag, 5276 txd->rndis_pkt_dmap, 5277 txd->rndis_pkt, HN_RNDIS_PKT_LEN, 5278 hyperv_dma_map_paddr, &txd->rndis_pkt_paddr, 5279 BUS_DMA_NOWAIT); 5280 if (error) { 5281 device_printf(dev, 5282 "failed to load rndis_packet_msg, %d\n", i); 5283 bus_dmamem_free(txr->hn_tx_rndis_dtag, 5284 txd->rndis_pkt, txd->rndis_pkt_dmap); 5285 return error; 5286 } 5287 5288 /* DMA map for TX data. */ 5289 error = bus_dmamap_create(txr->hn_tx_data_dtag, 0, 5290 &txd->data_dmap); 5291 if (error) { 5292 device_printf(dev, 5293 "failed to allocate tx data dmamap\n"); 5294 bus_dmamap_unload(txr->hn_tx_rndis_dtag, 5295 txd->rndis_pkt_dmap); 5296 bus_dmamem_free(txr->hn_tx_rndis_dtag, 5297 txd->rndis_pkt, txd->rndis_pkt_dmap); 5298 return error; 5299 } 5300 5301 /* All set, put it to list */ 5302 txd->flags |= HN_TXD_FLAG_ONLIST; 5303 #ifndef HN_USE_TXDESC_BUFRING 5304 SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link); 5305 #else 5306 buf_ring_enqueue(txr->hn_txdesc_br, txd); 5307 #endif 5308 } 5309 txr->hn_txdesc_avail = txr->hn_txdesc_cnt; 5310 5311 if (sc->hn_tx_sysctl_tree != NULL) { 5312 struct sysctl_oid_list *child; 5313 struct sysctl_ctx_list *ctx; 5314 char name[16]; 5315 5316 /* 5317 * Create per TX ring sysctl tree: 5318 * dev.hn.UNIT.tx.RINGID 5319 */ 5320 ctx = device_get_sysctl_ctx(dev); 5321 child = SYSCTL_CHILDREN(sc->hn_tx_sysctl_tree); 5322 5323 snprintf(name, sizeof(name), "%d", id); 5324 txr->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, 5325 name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 5326 5327 if (txr->hn_tx_sysctl_tree != NULL) { 5328 child = SYSCTL_CHILDREN(txr->hn_tx_sysctl_tree); 5329 5330 #ifdef HN_DEBUG 5331 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail", 5332 CTLFLAG_RD, &txr->hn_txdesc_avail, 0, 5333 "# of available TX descs"); 5334 #endif 5335 #ifdef HN_IFSTART_SUPPORT 5336 if (!hn_use_if_start) 5337 #endif 5338 { 5339 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "oactive", 5340 CTLFLAG_RD, &txr->hn_oactive, 0, 5341 "over active"); 5342 } 5343 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "packets", 5344 CTLFLAG_RW, &txr->hn_pkts, 5345 "# of packets transmitted"); 5346 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "sends", 5347 CTLFLAG_RW, &txr->hn_sends, "# of sends"); 5348 } 5349 } 5350 5351 return 0; 5352 } 5353 5354 static void 5355 hn_txdesc_dmamap_destroy(struct hn_txdesc *txd) 5356 { 5357 struct hn_tx_ring *txr = txd->txr; 5358 5359 KASSERT(txd->m == NULL, ("still has mbuf installed")); 5360 KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("still dma mapped")); 5361 5362 bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_pkt_dmap); 5363 bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_pkt, 5364 txd->rndis_pkt_dmap); 5365 bus_dmamap_destroy(txr->hn_tx_data_dtag, txd->data_dmap); 5366 } 5367 5368 static void 5369 hn_txdesc_gc(struct hn_tx_ring *txr, struct hn_txdesc *txd) 5370 { 5371 5372 KASSERT(txd->refs == 0 || txd->refs == 1, 5373 ("invalid txd refs %d", txd->refs)); 5374 5375 /* Aggregated txds will be freed by their aggregating txd. */ 5376 if (txd->refs > 0 && (txd->flags & HN_TXD_FLAG_ONAGG) == 0) { 5377 int freed; 5378 5379 freed = hn_txdesc_put(txr, txd); 5380 KASSERT(freed, ("can't free txdesc")); 5381 } 5382 } 5383 5384 static void 5385 hn_tx_ring_destroy(struct hn_tx_ring *txr) 5386 { 5387 int i; 5388 5389 if (txr->hn_txdesc == NULL) 5390 return; 5391 5392 /* 5393 * NOTE: 5394 * Because the freeing of aggregated txds will be deferred 5395 * to the aggregating txd, two passes are used here: 5396 * - The first pass GCes any pending txds. This GC is necessary, 5397 * since if the channels are revoked, hypervisor will not 5398 * deliver send-done for all pending txds. 5399 * - The second pass frees the busdma stuffs, i.e. after all txds 5400 * were freed. 5401 */ 5402 for (i = 0; i < txr->hn_txdesc_cnt; ++i) 5403 hn_txdesc_gc(txr, &txr->hn_txdesc[i]); 5404 for (i = 0; i < txr->hn_txdesc_cnt; ++i) 5405 hn_txdesc_dmamap_destroy(&txr->hn_txdesc[i]); 5406 5407 if (txr->hn_tx_data_dtag != NULL) 5408 bus_dma_tag_destroy(txr->hn_tx_data_dtag); 5409 if (txr->hn_tx_rndis_dtag != NULL) 5410 bus_dma_tag_destroy(txr->hn_tx_rndis_dtag); 5411 5412 #ifdef HN_USE_TXDESC_BUFRING 5413 buf_ring_free(txr->hn_txdesc_br, M_DEVBUF); 5414 #endif 5415 5416 free(txr->hn_txdesc, M_DEVBUF); 5417 txr->hn_txdesc = NULL; 5418 5419 if (txr->hn_mbuf_br != NULL) 5420 buf_ring_free(txr->hn_mbuf_br, M_DEVBUF); 5421 5422 #ifndef HN_USE_TXDESC_BUFRING 5423 mtx_destroy(&txr->hn_txlist_spin); 5424 #endif 5425 mtx_destroy(&txr->hn_tx_lock); 5426 } 5427 5428 static int 5429 hn_create_tx_data(struct hn_softc *sc, int ring_cnt) 5430 { 5431 struct sysctl_oid_list *child; 5432 struct sysctl_ctx_list *ctx; 5433 int i; 5434 5435 /* 5436 * Create TXBUF for chimney sending. 5437 * 5438 * NOTE: It is shared by all channels. 5439 */ 5440 sc->hn_chim = hyperv_dmamem_alloc(bus_get_dma_tag(sc->hn_dev), 5441 PAGE_SIZE, 0, HN_CHIM_SIZE, &sc->hn_chim_dma, 5442 BUS_DMA_WAITOK | BUS_DMA_ZERO); 5443 if (sc->hn_chim == NULL) { 5444 device_printf(sc->hn_dev, "allocate txbuf failed\n"); 5445 return (ENOMEM); 5446 } 5447 5448 sc->hn_tx_ring_cnt = ring_cnt; 5449 sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt; 5450 5451 sc->hn_tx_ring = malloc(sizeof(struct hn_tx_ring) * sc->hn_tx_ring_cnt, 5452 M_DEVBUF, M_WAITOK | M_ZERO); 5453 5454 ctx = device_get_sysctl_ctx(sc->hn_dev); 5455 child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->hn_dev)); 5456 5457 /* Create dev.hn.UNIT.tx sysctl tree */ 5458 sc->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "tx", 5459 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 5460 5461 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 5462 int error; 5463 5464 error = hn_tx_ring_create(sc, i); 5465 if (error) 5466 return error; 5467 } 5468 5469 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "no_txdescs", 5470 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5471 __offsetof(struct hn_tx_ring, hn_no_txdescs), 5472 hn_tx_stat_ulong_sysctl, "LU", "# of times short of TX descs"); 5473 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "send_failed", 5474 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5475 __offsetof(struct hn_tx_ring, hn_send_failed), 5476 hn_tx_stat_ulong_sysctl, "LU", "# of hyper-v sending failure"); 5477 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txdma_failed", 5478 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5479 __offsetof(struct hn_tx_ring, hn_txdma_failed), 5480 hn_tx_stat_ulong_sysctl, "LU", "# of TX DMA failure"); 5481 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_flush_failed", 5482 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5483 __offsetof(struct hn_tx_ring, hn_flush_failed), 5484 hn_tx_stat_ulong_sysctl, "LU", 5485 "# of packet transmission aggregation flush failure"); 5486 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_collapsed", 5487 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5488 __offsetof(struct hn_tx_ring, hn_tx_collapsed), 5489 hn_tx_stat_ulong_sysctl, "LU", "# of TX mbuf collapsed"); 5490 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney", 5491 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5492 __offsetof(struct hn_tx_ring, hn_tx_chimney), 5493 hn_tx_stat_ulong_sysctl, "LU", "# of chimney send"); 5494 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_tried", 5495 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5496 __offsetof(struct hn_tx_ring, hn_tx_chimney_tried), 5497 hn_tx_stat_ulong_sysctl, "LU", "# of chimney send tries"); 5498 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt", 5499 CTLFLAG_RD, &sc->hn_tx_ring[0].hn_txdesc_cnt, 0, 5500 "# of total TX descs"); 5501 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max", 5502 CTLFLAG_RD, &sc->hn_chim_szmax, 0, 5503 "Chimney send packet size upper boundary"); 5504 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size", 5505 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 5506 hn_chim_size_sysctl, "I", "Chimney send packet size limit"); 5507 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "direct_tx_size", 5508 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5509 __offsetof(struct hn_tx_ring, hn_direct_tx_size), 5510 hn_tx_conf_int_sysctl, "I", 5511 "Size of the packet for direct transmission"); 5512 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "sched_tx", 5513 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 5514 __offsetof(struct hn_tx_ring, hn_sched_tx), 5515 hn_tx_conf_int_sysctl, "I", 5516 "Always schedule transmission " 5517 "instead of doing direct transmission"); 5518 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_cnt", 5519 CTLFLAG_RD, &sc->hn_tx_ring_cnt, 0, "# created TX rings"); 5520 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_inuse", 5521 CTLFLAG_RD, &sc->hn_tx_ring_inuse, 0, "# used TX rings"); 5522 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "agg_szmax", 5523 CTLFLAG_RD, &sc->hn_tx_ring[0].hn_agg_szmax, 0, 5524 "Applied packet transmission aggregation size"); 5525 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pktmax", 5526 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 5527 hn_txagg_pktmax_sysctl, "I", 5528 "Applied packet transmission aggregation packets"); 5529 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_align", 5530 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 5531 hn_txagg_align_sysctl, "I", 5532 "Applied packet transmission aggregation alignment"); 5533 5534 return 0; 5535 } 5536 5537 static void 5538 hn_set_chim_size(struct hn_softc *sc, int chim_size) 5539 { 5540 int i; 5541 5542 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 5543 sc->hn_tx_ring[i].hn_chim_size = chim_size; 5544 } 5545 5546 static void 5547 hn_set_tso_maxsize(struct hn_softc *sc, int tso_maxlen, int mtu) 5548 { 5549 struct ifnet *ifp = sc->hn_ifp; 5550 u_int hw_tsomax; 5551 int tso_minlen; 5552 5553 HN_LOCK_ASSERT(sc); 5554 5555 if ((ifp->if_capabilities & (IFCAP_TSO4 | IFCAP_TSO6)) == 0) 5556 return; 5557 5558 KASSERT(sc->hn_ndis_tso_sgmin >= 2, 5559 ("invalid NDIS tso sgmin %d", sc->hn_ndis_tso_sgmin)); 5560 tso_minlen = sc->hn_ndis_tso_sgmin * mtu; 5561 5562 KASSERT(sc->hn_ndis_tso_szmax >= tso_minlen && 5563 sc->hn_ndis_tso_szmax <= IP_MAXPACKET, 5564 ("invalid NDIS tso szmax %d", sc->hn_ndis_tso_szmax)); 5565 5566 if (tso_maxlen < tso_minlen) 5567 tso_maxlen = tso_minlen; 5568 else if (tso_maxlen > IP_MAXPACKET) 5569 tso_maxlen = IP_MAXPACKET; 5570 if (tso_maxlen > sc->hn_ndis_tso_szmax) 5571 tso_maxlen = sc->hn_ndis_tso_szmax; 5572 hw_tsomax = tso_maxlen - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); 5573 5574 if (hn_xpnt_vf_isready(sc)) { 5575 if (hw_tsomax > sc->hn_vf_ifp->if_hw_tsomax) 5576 hw_tsomax = sc->hn_vf_ifp->if_hw_tsomax; 5577 } 5578 ifp->if_hw_tsomax = hw_tsomax; 5579 if (bootverbose) 5580 if_printf(ifp, "TSO size max %u\n", ifp->if_hw_tsomax); 5581 } 5582 5583 static void 5584 hn_fixup_tx_data(struct hn_softc *sc) 5585 { 5586 uint64_t csum_assist; 5587 int i; 5588 5589 hn_set_chim_size(sc, sc->hn_chim_szmax); 5590 if (hn_tx_chimney_size > 0 && 5591 hn_tx_chimney_size < sc->hn_chim_szmax) 5592 hn_set_chim_size(sc, hn_tx_chimney_size); 5593 5594 csum_assist = 0; 5595 if (sc->hn_caps & HN_CAP_IPCS) 5596 csum_assist |= CSUM_IP; 5597 if (sc->hn_caps & HN_CAP_TCP4CS) 5598 csum_assist |= CSUM_IP_TCP; 5599 if ((sc->hn_caps & HN_CAP_UDP4CS) && hn_enable_udp4cs) 5600 csum_assist |= CSUM_IP_UDP; 5601 if (sc->hn_caps & HN_CAP_TCP6CS) 5602 csum_assist |= CSUM_IP6_TCP; 5603 if ((sc->hn_caps & HN_CAP_UDP6CS) && hn_enable_udp6cs) 5604 csum_assist |= CSUM_IP6_UDP; 5605 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 5606 sc->hn_tx_ring[i].hn_csum_assist = csum_assist; 5607 5608 if (sc->hn_caps & HN_CAP_HASHVAL) { 5609 /* 5610 * Support HASHVAL pktinfo on TX path. 5611 */ 5612 if (bootverbose) 5613 if_printf(sc->hn_ifp, "support HASHVAL pktinfo\n"); 5614 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 5615 sc->hn_tx_ring[i].hn_tx_flags |= HN_TX_FLAG_HASHVAL; 5616 } 5617 } 5618 5619 static void 5620 hn_fixup_rx_data(struct hn_softc *sc) 5621 { 5622 5623 if (sc->hn_caps & HN_CAP_UDPHASH) { 5624 int i; 5625 5626 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 5627 sc->hn_rx_ring[i].hn_rx_flags |= HN_RX_FLAG_UDP_HASH; 5628 } 5629 } 5630 5631 static void 5632 hn_destroy_tx_data(struct hn_softc *sc) 5633 { 5634 int i; 5635 5636 if (sc->hn_chim != NULL) { 5637 if ((sc->hn_flags & HN_FLAG_CHIM_REF) == 0) { 5638 hyperv_dmamem_free(&sc->hn_chim_dma, sc->hn_chim); 5639 } else { 5640 device_printf(sc->hn_dev, 5641 "chimney sending buffer is referenced"); 5642 } 5643 sc->hn_chim = NULL; 5644 } 5645 5646 if (sc->hn_tx_ring_cnt == 0) 5647 return; 5648 5649 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 5650 hn_tx_ring_destroy(&sc->hn_tx_ring[i]); 5651 5652 free(sc->hn_tx_ring, M_DEVBUF); 5653 sc->hn_tx_ring = NULL; 5654 5655 sc->hn_tx_ring_cnt = 0; 5656 sc->hn_tx_ring_inuse = 0; 5657 } 5658 5659 #ifdef HN_IFSTART_SUPPORT 5660 5661 static void 5662 hn_start_taskfunc(void *xtxr, int pending __unused) 5663 { 5664 struct hn_tx_ring *txr = xtxr; 5665 5666 mtx_lock(&txr->hn_tx_lock); 5667 hn_start_locked(txr, 0); 5668 mtx_unlock(&txr->hn_tx_lock); 5669 } 5670 5671 static int 5672 hn_start_locked(struct hn_tx_ring *txr, int len) 5673 { 5674 struct hn_softc *sc = txr->hn_sc; 5675 struct ifnet *ifp = sc->hn_ifp; 5676 int sched = 0; 5677 5678 KASSERT(hn_use_if_start, 5679 ("hn_start_locked is called, when if_start is disabled")); 5680 KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring")); 5681 mtx_assert(&txr->hn_tx_lock, MA_OWNED); 5682 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc")); 5683 5684 if (__predict_false(txr->hn_suspended)) 5685 return (0); 5686 5687 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != 5688 IFF_DRV_RUNNING) 5689 return (0); 5690 5691 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { 5692 struct hn_txdesc *txd; 5693 struct mbuf *m_head; 5694 int error; 5695 5696 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); 5697 if (m_head == NULL) 5698 break; 5699 5700 if (len > 0 && m_head->m_pkthdr.len > len) { 5701 /* 5702 * This sending could be time consuming; let callers 5703 * dispatch this packet sending (and sending of any 5704 * following up packets) to tx taskqueue. 5705 */ 5706 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 5707 sched = 1; 5708 break; 5709 } 5710 5711 #if defined(INET6) || defined(INET) 5712 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 5713 m_head = hn_tso_fixup(m_head); 5714 if (__predict_false(m_head == NULL)) { 5715 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 5716 continue; 5717 } 5718 } else if (m_head->m_pkthdr.csum_flags & 5719 (CSUM_IP_UDP | CSUM_IP_TCP | CSUM_IP6_UDP | CSUM_IP6_TCP)) { 5720 m_head = hn_set_hlen(m_head); 5721 if (__predict_false(m_head == NULL)) { 5722 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 5723 continue; 5724 } 5725 } 5726 #endif 5727 5728 txd = hn_txdesc_get(txr); 5729 if (txd == NULL) { 5730 txr->hn_no_txdescs++; 5731 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 5732 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 5733 break; 5734 } 5735 5736 error = hn_encap(ifp, txr, txd, &m_head); 5737 if (error) { 5738 /* Both txd and m_head are freed */ 5739 KASSERT(txr->hn_agg_txd == NULL, 5740 ("encap failed w/ pending aggregating txdesc")); 5741 continue; 5742 } 5743 5744 if (txr->hn_agg_pktleft == 0) { 5745 if (txr->hn_agg_txd != NULL) { 5746 KASSERT(m_head == NULL, 5747 ("pending mbuf for aggregating txdesc")); 5748 error = hn_flush_txagg(ifp, txr); 5749 if (__predict_false(error)) { 5750 atomic_set_int(&ifp->if_drv_flags, 5751 IFF_DRV_OACTIVE); 5752 break; 5753 } 5754 } else { 5755 KASSERT(m_head != NULL, ("mbuf was freed")); 5756 error = hn_txpkt(ifp, txr, txd); 5757 if (__predict_false(error)) { 5758 /* txd is freed, but m_head is not */ 5759 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 5760 atomic_set_int(&ifp->if_drv_flags, 5761 IFF_DRV_OACTIVE); 5762 break; 5763 } 5764 } 5765 } 5766 #ifdef INVARIANTS 5767 else { 5768 KASSERT(txr->hn_agg_txd != NULL, 5769 ("no aggregating txdesc")); 5770 KASSERT(m_head == NULL, 5771 ("pending mbuf for aggregating txdesc")); 5772 } 5773 #endif 5774 } 5775 5776 /* Flush pending aggerated transmission. */ 5777 if (txr->hn_agg_txd != NULL) 5778 hn_flush_txagg(ifp, txr); 5779 return (sched); 5780 } 5781 5782 static void 5783 hn_start(struct ifnet *ifp) 5784 { 5785 struct hn_softc *sc = ifp->if_softc; 5786 struct hn_tx_ring *txr = &sc->hn_tx_ring[0]; 5787 5788 if (txr->hn_sched_tx) 5789 goto do_sched; 5790 5791 if (mtx_trylock(&txr->hn_tx_lock)) { 5792 int sched; 5793 5794 sched = hn_start_locked(txr, txr->hn_direct_tx_size); 5795 mtx_unlock(&txr->hn_tx_lock); 5796 if (!sched) 5797 return; 5798 } 5799 do_sched: 5800 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); 5801 } 5802 5803 static void 5804 hn_start_txeof_taskfunc(void *xtxr, int pending __unused) 5805 { 5806 struct hn_tx_ring *txr = xtxr; 5807 5808 mtx_lock(&txr->hn_tx_lock); 5809 atomic_clear_int(&txr->hn_sc->hn_ifp->if_drv_flags, IFF_DRV_OACTIVE); 5810 hn_start_locked(txr, 0); 5811 mtx_unlock(&txr->hn_tx_lock); 5812 } 5813 5814 static void 5815 hn_start_txeof(struct hn_tx_ring *txr) 5816 { 5817 struct hn_softc *sc = txr->hn_sc; 5818 struct ifnet *ifp = sc->hn_ifp; 5819 5820 KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring")); 5821 5822 if (txr->hn_sched_tx) 5823 goto do_sched; 5824 5825 if (mtx_trylock(&txr->hn_tx_lock)) { 5826 int sched; 5827 5828 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 5829 sched = hn_start_locked(txr, txr->hn_direct_tx_size); 5830 mtx_unlock(&txr->hn_tx_lock); 5831 if (sched) { 5832 taskqueue_enqueue(txr->hn_tx_taskq, 5833 &txr->hn_tx_task); 5834 } 5835 } else { 5836 do_sched: 5837 /* 5838 * Release the OACTIVE earlier, with the hope, that 5839 * others could catch up. The task will clear the 5840 * flag again with the hn_tx_lock to avoid possible 5841 * races. 5842 */ 5843 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 5844 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); 5845 } 5846 } 5847 5848 #endif /* HN_IFSTART_SUPPORT */ 5849 5850 static int 5851 hn_xmit(struct hn_tx_ring *txr, int len) 5852 { 5853 struct hn_softc *sc = txr->hn_sc; 5854 struct ifnet *ifp = sc->hn_ifp; 5855 struct mbuf *m_head; 5856 int sched = 0; 5857 5858 mtx_assert(&txr->hn_tx_lock, MA_OWNED); 5859 #ifdef HN_IFSTART_SUPPORT 5860 KASSERT(hn_use_if_start == 0, 5861 ("hn_xmit is called, when if_start is enabled")); 5862 #endif 5863 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc")); 5864 5865 if (__predict_false(txr->hn_suspended)) 5866 return (0); 5867 5868 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || txr->hn_oactive) 5869 return (0); 5870 5871 while ((m_head = drbr_peek(ifp, txr->hn_mbuf_br)) != NULL) { 5872 struct hn_txdesc *txd; 5873 int error; 5874 5875 if (len > 0 && m_head->m_pkthdr.len > len) { 5876 /* 5877 * This sending could be time consuming; let callers 5878 * dispatch this packet sending (and sending of any 5879 * following up packets) to tx taskqueue. 5880 */ 5881 drbr_putback(ifp, txr->hn_mbuf_br, m_head); 5882 sched = 1; 5883 break; 5884 } 5885 5886 txd = hn_txdesc_get(txr); 5887 if (txd == NULL) { 5888 txr->hn_no_txdescs++; 5889 drbr_putback(ifp, txr->hn_mbuf_br, m_head); 5890 txr->hn_oactive = 1; 5891 break; 5892 } 5893 5894 error = hn_encap(ifp, txr, txd, &m_head); 5895 if (error) { 5896 /* Both txd and m_head are freed; discard */ 5897 KASSERT(txr->hn_agg_txd == NULL, 5898 ("encap failed w/ pending aggregating txdesc")); 5899 drbr_advance(ifp, txr->hn_mbuf_br); 5900 continue; 5901 } 5902 5903 if (txr->hn_agg_pktleft == 0) { 5904 if (txr->hn_agg_txd != NULL) { 5905 KASSERT(m_head == NULL, 5906 ("pending mbuf for aggregating txdesc")); 5907 error = hn_flush_txagg(ifp, txr); 5908 if (__predict_false(error)) { 5909 txr->hn_oactive = 1; 5910 break; 5911 } 5912 } else { 5913 KASSERT(m_head != NULL, ("mbuf was freed")); 5914 error = hn_txpkt(ifp, txr, txd); 5915 if (__predict_false(error)) { 5916 /* txd is freed, but m_head is not */ 5917 drbr_putback(ifp, txr->hn_mbuf_br, 5918 m_head); 5919 txr->hn_oactive = 1; 5920 break; 5921 } 5922 } 5923 } 5924 #ifdef INVARIANTS 5925 else { 5926 KASSERT(txr->hn_agg_txd != NULL, 5927 ("no aggregating txdesc")); 5928 KASSERT(m_head == NULL, 5929 ("pending mbuf for aggregating txdesc")); 5930 } 5931 #endif 5932 5933 /* Sent */ 5934 drbr_advance(ifp, txr->hn_mbuf_br); 5935 } 5936 5937 /* Flush pending aggerated transmission. */ 5938 if (txr->hn_agg_txd != NULL) 5939 hn_flush_txagg(ifp, txr); 5940 return (sched); 5941 } 5942 5943 static int 5944 hn_transmit(struct ifnet *ifp, struct mbuf *m) 5945 { 5946 struct hn_softc *sc = ifp->if_softc; 5947 struct hn_tx_ring *txr; 5948 int error, idx = 0; 5949 5950 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) { 5951 struct rm_priotracker pt; 5952 5953 rm_rlock(&sc->hn_vf_lock, &pt); 5954 if (__predict_true(sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) { 5955 struct mbuf *m_bpf = NULL; 5956 int obytes, omcast; 5957 5958 obytes = m->m_pkthdr.len; 5959 omcast = (m->m_flags & M_MCAST) != 0; 5960 5961 if (sc->hn_xvf_flags & HN_XVFFLAG_ACCBPF) { 5962 if (bpf_peers_present(ifp->if_bpf)) { 5963 m_bpf = m_copypacket(m, M_NOWAIT); 5964 if (m_bpf == NULL) { 5965 /* 5966 * Failed to grab a shallow 5967 * copy; tap now. 5968 */ 5969 ETHER_BPF_MTAP(ifp, m); 5970 } 5971 } 5972 } else { 5973 ETHER_BPF_MTAP(ifp, m); 5974 } 5975 5976 error = sc->hn_vf_ifp->if_transmit(sc->hn_vf_ifp, m); 5977 rm_runlock(&sc->hn_vf_lock, &pt); 5978 5979 if (m_bpf != NULL) { 5980 if (!error) 5981 ETHER_BPF_MTAP(ifp, m_bpf); 5982 m_freem(m_bpf); 5983 } 5984 5985 if (error == ENOBUFS) { 5986 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); 5987 } else if (error) { 5988 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 5989 } else { 5990 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 5991 if_inc_counter(ifp, IFCOUNTER_OBYTES, obytes); 5992 if (omcast) { 5993 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 5994 omcast); 5995 } 5996 } 5997 return (error); 5998 } 5999 rm_runlock(&sc->hn_vf_lock, &pt); 6000 } 6001 6002 #if defined(INET6) || defined(INET) 6003 /* 6004 * Perform TSO packet header fixup or get l2/l3 header length now, 6005 * since packet headers should be cache-hot. 6006 */ 6007 if (m->m_pkthdr.csum_flags & CSUM_TSO) { 6008 m = hn_tso_fixup(m); 6009 if (__predict_false(m == NULL)) { 6010 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 6011 return EIO; 6012 } 6013 } else if (m->m_pkthdr.csum_flags & 6014 (CSUM_IP_UDP | CSUM_IP_TCP | CSUM_IP6_UDP | CSUM_IP6_TCP)) { 6015 m = hn_set_hlen(m); 6016 if (__predict_false(m == NULL)) { 6017 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 6018 return EIO; 6019 } 6020 } 6021 #endif 6022 6023 /* 6024 * Select the TX ring based on flowid 6025 */ 6026 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 6027 #ifdef RSS 6028 uint32_t bid; 6029 6030 if (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m), 6031 &bid) == 0) 6032 idx = bid % sc->hn_tx_ring_inuse; 6033 else 6034 #endif 6035 { 6036 #if defined(INET6) || defined(INET) 6037 int tcpsyn = 0; 6038 6039 if (m->m_pkthdr.len < 128 && 6040 (m->m_pkthdr.csum_flags & 6041 (CSUM_IP_TCP | CSUM_IP6_TCP)) && 6042 (m->m_pkthdr.csum_flags & CSUM_TSO) == 0) { 6043 m = hn_check_tcpsyn(m, &tcpsyn); 6044 if (__predict_false(m == NULL)) { 6045 if_inc_counter(ifp, 6046 IFCOUNTER_OERRORS, 1); 6047 return (EIO); 6048 } 6049 } 6050 #else 6051 const int tcpsyn = 0; 6052 #endif 6053 if (tcpsyn) 6054 idx = 0; 6055 else 6056 idx = m->m_pkthdr.flowid % sc->hn_tx_ring_inuse; 6057 } 6058 } 6059 txr = &sc->hn_tx_ring[idx]; 6060 6061 error = drbr_enqueue(ifp, txr->hn_mbuf_br, m); 6062 if (error) { 6063 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); 6064 return error; 6065 } 6066 6067 if (txr->hn_oactive) 6068 return 0; 6069 6070 if (txr->hn_sched_tx) 6071 goto do_sched; 6072 6073 if (mtx_trylock(&txr->hn_tx_lock)) { 6074 int sched; 6075 6076 sched = hn_xmit(txr, txr->hn_direct_tx_size); 6077 mtx_unlock(&txr->hn_tx_lock); 6078 if (!sched) 6079 return 0; 6080 } 6081 do_sched: 6082 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); 6083 return 0; 6084 } 6085 6086 static void 6087 hn_tx_ring_qflush(struct hn_tx_ring *txr) 6088 { 6089 struct mbuf *m; 6090 6091 mtx_lock(&txr->hn_tx_lock); 6092 while ((m = buf_ring_dequeue_sc(txr->hn_mbuf_br)) != NULL) 6093 m_freem(m); 6094 mtx_unlock(&txr->hn_tx_lock); 6095 } 6096 6097 static void 6098 hn_xmit_qflush(struct ifnet *ifp) 6099 { 6100 struct hn_softc *sc = ifp->if_softc; 6101 struct rm_priotracker pt; 6102 int i; 6103 6104 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) 6105 hn_tx_ring_qflush(&sc->hn_tx_ring[i]); 6106 if_qflush(ifp); 6107 6108 rm_rlock(&sc->hn_vf_lock, &pt); 6109 if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) 6110 sc->hn_vf_ifp->if_qflush(sc->hn_vf_ifp); 6111 rm_runlock(&sc->hn_vf_lock, &pt); 6112 } 6113 6114 static void 6115 hn_xmit_txeof(struct hn_tx_ring *txr) 6116 { 6117 6118 if (txr->hn_sched_tx) 6119 goto do_sched; 6120 6121 if (mtx_trylock(&txr->hn_tx_lock)) { 6122 int sched; 6123 6124 txr->hn_oactive = 0; 6125 sched = hn_xmit(txr, txr->hn_direct_tx_size); 6126 mtx_unlock(&txr->hn_tx_lock); 6127 if (sched) { 6128 taskqueue_enqueue(txr->hn_tx_taskq, 6129 &txr->hn_tx_task); 6130 } 6131 } else { 6132 do_sched: 6133 /* 6134 * Release the oactive earlier, with the hope, that 6135 * others could catch up. The task will clear the 6136 * oactive again with the hn_tx_lock to avoid possible 6137 * races. 6138 */ 6139 txr->hn_oactive = 0; 6140 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); 6141 } 6142 } 6143 6144 static void 6145 hn_xmit_taskfunc(void *xtxr, int pending __unused) 6146 { 6147 struct hn_tx_ring *txr = xtxr; 6148 6149 mtx_lock(&txr->hn_tx_lock); 6150 hn_xmit(txr, 0); 6151 mtx_unlock(&txr->hn_tx_lock); 6152 } 6153 6154 static void 6155 hn_xmit_txeof_taskfunc(void *xtxr, int pending __unused) 6156 { 6157 struct hn_tx_ring *txr = xtxr; 6158 6159 mtx_lock(&txr->hn_tx_lock); 6160 txr->hn_oactive = 0; 6161 hn_xmit(txr, 0); 6162 mtx_unlock(&txr->hn_tx_lock); 6163 } 6164 6165 static int 6166 hn_chan_attach(struct hn_softc *sc, struct vmbus_channel *chan) 6167 { 6168 struct vmbus_chan_br cbr; 6169 struct hn_rx_ring *rxr; 6170 struct hn_tx_ring *txr = NULL; 6171 int idx, error; 6172 6173 idx = vmbus_chan_subidx(chan); 6174 6175 /* 6176 * Link this channel to RX/TX ring. 6177 */ 6178 KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse, 6179 ("invalid channel index %d, should > 0 && < %d", 6180 idx, sc->hn_rx_ring_inuse)); 6181 rxr = &sc->hn_rx_ring[idx]; 6182 KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED) == 0, 6183 ("RX ring %d already attached", idx)); 6184 rxr->hn_rx_flags |= HN_RX_FLAG_ATTACHED; 6185 rxr->hn_chan = chan; 6186 6187 if (bootverbose) { 6188 if_printf(sc->hn_ifp, "link RX ring %d to chan%u\n", 6189 idx, vmbus_chan_id(chan)); 6190 } 6191 6192 if (idx < sc->hn_tx_ring_inuse) { 6193 txr = &sc->hn_tx_ring[idx]; 6194 KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED) == 0, 6195 ("TX ring %d already attached", idx)); 6196 txr->hn_tx_flags |= HN_TX_FLAG_ATTACHED; 6197 6198 txr->hn_chan = chan; 6199 if (bootverbose) { 6200 if_printf(sc->hn_ifp, "link TX ring %d to chan%u\n", 6201 idx, vmbus_chan_id(chan)); 6202 } 6203 } 6204 6205 /* Bind this channel to a proper CPU. */ 6206 vmbus_chan_cpu_set(chan, HN_RING_IDX2CPU(sc, idx)); 6207 6208 /* 6209 * Open this channel 6210 */ 6211 cbr.cbr = rxr->hn_br; 6212 cbr.cbr_paddr = rxr->hn_br_dma.hv_paddr; 6213 cbr.cbr_txsz = HN_TXBR_SIZE; 6214 cbr.cbr_rxsz = HN_RXBR_SIZE; 6215 error = vmbus_chan_open_br(chan, &cbr, NULL, 0, hn_chan_callback, rxr); 6216 if (error) { 6217 if (error == EISCONN) { 6218 if_printf(sc->hn_ifp, "bufring is connected after " 6219 "chan%u open failure\n", vmbus_chan_id(chan)); 6220 rxr->hn_rx_flags |= HN_RX_FLAG_BR_REF; 6221 } else { 6222 if_printf(sc->hn_ifp, "open chan%u failed: %d\n", 6223 vmbus_chan_id(chan), error); 6224 } 6225 } 6226 return (error); 6227 } 6228 6229 static void 6230 hn_chan_detach(struct hn_softc *sc, struct vmbus_channel *chan) 6231 { 6232 struct hn_rx_ring *rxr; 6233 int idx, error; 6234 6235 idx = vmbus_chan_subidx(chan); 6236 6237 /* 6238 * Link this channel to RX/TX ring. 6239 */ 6240 KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse, 6241 ("invalid channel index %d, should > 0 && < %d", 6242 idx, sc->hn_rx_ring_inuse)); 6243 rxr = &sc->hn_rx_ring[idx]; 6244 KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED), 6245 ("RX ring %d is not attached", idx)); 6246 rxr->hn_rx_flags &= ~HN_RX_FLAG_ATTACHED; 6247 6248 if (idx < sc->hn_tx_ring_inuse) { 6249 struct hn_tx_ring *txr = &sc->hn_tx_ring[idx]; 6250 6251 KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED), 6252 ("TX ring %d is not attached attached", idx)); 6253 txr->hn_tx_flags &= ~HN_TX_FLAG_ATTACHED; 6254 } 6255 6256 /* 6257 * Close this channel. 6258 * 6259 * NOTE: 6260 * Channel closing does _not_ destroy the target channel. 6261 */ 6262 error = vmbus_chan_close_direct(chan); 6263 if (error == EISCONN) { 6264 if_printf(sc->hn_ifp, "chan%u bufring is connected " 6265 "after being closed\n", vmbus_chan_id(chan)); 6266 rxr->hn_rx_flags |= HN_RX_FLAG_BR_REF; 6267 } else if (error) { 6268 if_printf(sc->hn_ifp, "chan%u close failed: %d\n", 6269 vmbus_chan_id(chan), error); 6270 } 6271 } 6272 6273 static int 6274 hn_attach_subchans(struct hn_softc *sc) 6275 { 6276 struct vmbus_channel **subchans; 6277 int subchan_cnt = sc->hn_rx_ring_inuse - 1; 6278 int i, error = 0; 6279 6280 KASSERT(subchan_cnt > 0, ("no sub-channels")); 6281 6282 /* Attach the sub-channels. */ 6283 subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt); 6284 for (i = 0; i < subchan_cnt; ++i) { 6285 int error1; 6286 6287 error1 = hn_chan_attach(sc, subchans[i]); 6288 if (error1) { 6289 error = error1; 6290 /* Move on; all channels will be detached later. */ 6291 } 6292 } 6293 vmbus_subchan_rel(subchans, subchan_cnt); 6294 6295 if (error) { 6296 if_printf(sc->hn_ifp, "sub-channels attach failed: %d\n", error); 6297 } else { 6298 if (bootverbose) { 6299 if_printf(sc->hn_ifp, "%d sub-channels attached\n", 6300 subchan_cnt); 6301 } 6302 } 6303 return (error); 6304 } 6305 6306 static void 6307 hn_detach_allchans(struct hn_softc *sc) 6308 { 6309 struct vmbus_channel **subchans; 6310 int subchan_cnt = sc->hn_rx_ring_inuse - 1; 6311 int i; 6312 6313 if (subchan_cnt == 0) 6314 goto back; 6315 6316 /* Detach the sub-channels. */ 6317 subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt); 6318 for (i = 0; i < subchan_cnt; ++i) 6319 hn_chan_detach(sc, subchans[i]); 6320 vmbus_subchan_rel(subchans, subchan_cnt); 6321 6322 back: 6323 /* 6324 * Detach the primary channel, _after_ all sub-channels 6325 * are detached. 6326 */ 6327 hn_chan_detach(sc, sc->hn_prichan); 6328 6329 /* Wait for sub-channels to be destroyed, if any. */ 6330 vmbus_subchan_drain(sc->hn_prichan); 6331 6332 #ifdef INVARIANTS 6333 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 6334 KASSERT((sc->hn_rx_ring[i].hn_rx_flags & 6335 HN_RX_FLAG_ATTACHED) == 0, 6336 ("%dth RX ring is still attached", i)); 6337 } 6338 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 6339 KASSERT((sc->hn_tx_ring[i].hn_tx_flags & 6340 HN_TX_FLAG_ATTACHED) == 0, 6341 ("%dth TX ring is still attached", i)); 6342 } 6343 #endif 6344 } 6345 6346 static int 6347 hn_synth_alloc_subchans(struct hn_softc *sc, int *nsubch) 6348 { 6349 struct vmbus_channel **subchans; 6350 int nchan, rxr_cnt, error; 6351 6352 nchan = *nsubch + 1; 6353 if (nchan == 1) { 6354 /* 6355 * Multiple RX/TX rings are not requested. 6356 */ 6357 *nsubch = 0; 6358 return (0); 6359 } 6360 6361 /* 6362 * Query RSS capabilities, e.g. # of RX rings, and # of indirect 6363 * table entries. 6364 */ 6365 error = hn_rndis_query_rsscaps(sc, &rxr_cnt); 6366 if (error) { 6367 /* No RSS; this is benign. */ 6368 *nsubch = 0; 6369 return (0); 6370 } 6371 if (bootverbose) { 6372 if_printf(sc->hn_ifp, "RX rings offered %u, requested %d\n", 6373 rxr_cnt, nchan); 6374 } 6375 6376 if (nchan > rxr_cnt) 6377 nchan = rxr_cnt; 6378 if (nchan == 1) { 6379 if_printf(sc->hn_ifp, "only 1 channel is supported, no vRSS\n"); 6380 *nsubch = 0; 6381 return (0); 6382 } 6383 6384 /* 6385 * Allocate sub-channels from NVS. 6386 */ 6387 *nsubch = nchan - 1; 6388 error = hn_nvs_alloc_subchans(sc, nsubch); 6389 if (error || *nsubch == 0) { 6390 /* Failed to allocate sub-channels. */ 6391 *nsubch = 0; 6392 return (0); 6393 } 6394 6395 /* 6396 * Wait for all sub-channels to become ready before moving on. 6397 */ 6398 subchans = vmbus_subchan_get(sc->hn_prichan, *nsubch); 6399 vmbus_subchan_rel(subchans, *nsubch); 6400 return (0); 6401 } 6402 6403 static bool 6404 hn_synth_attachable(const struct hn_softc *sc) 6405 { 6406 int i; 6407 6408 if (sc->hn_flags & HN_FLAG_ERRORS) 6409 return (false); 6410 6411 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 6412 const struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 6413 6414 if (rxr->hn_rx_flags & HN_RX_FLAG_BR_REF) 6415 return (false); 6416 } 6417 return (true); 6418 } 6419 6420 /* 6421 * Make sure that the RX filter is zero after the successful 6422 * RNDIS initialization. 6423 * 6424 * NOTE: 6425 * Under certain conditions on certain versions of Hyper-V, 6426 * the RNDIS rxfilter is _not_ zero on the hypervisor side 6427 * after the successful RNDIS initialization, which breaks 6428 * the assumption of any following code (well, it breaks the 6429 * RNDIS API contract actually). Clear the RNDIS rxfilter 6430 * explicitly, drain packets sneaking through, and drain the 6431 * interrupt taskqueues scheduled due to the stealth packets. 6432 */ 6433 static void 6434 hn_rndis_init_fixat(struct hn_softc *sc, int nchan) 6435 { 6436 6437 hn_disable_rx(sc); 6438 hn_drain_rxtx(sc, nchan); 6439 } 6440 6441 static int 6442 hn_synth_attach(struct hn_softc *sc, int mtu) 6443 { 6444 #define ATTACHED_NVS 0x0002 6445 #define ATTACHED_RNDIS 0x0004 6446 6447 struct ndis_rssprm_toeplitz *rss = &sc->hn_rss; 6448 int error, nsubch, nchan = 1, i, rndis_inited; 6449 uint32_t old_caps, attached = 0; 6450 6451 KASSERT((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0, 6452 ("synthetic parts were attached")); 6453 6454 if (!hn_synth_attachable(sc)) 6455 return (ENXIO); 6456 6457 /* Save capabilities for later verification. */ 6458 old_caps = sc->hn_caps; 6459 sc->hn_caps = 0; 6460 6461 /* Clear RSS stuffs. */ 6462 sc->hn_rss_ind_size = 0; 6463 sc->hn_rss_hash = 0; 6464 sc->hn_rss_hcap = 0; 6465 6466 /* 6467 * Attach the primary channel _before_ attaching NVS and RNDIS. 6468 */ 6469 error = hn_chan_attach(sc, sc->hn_prichan); 6470 if (error) 6471 goto failed; 6472 6473 /* 6474 * Attach NVS. 6475 */ 6476 error = hn_nvs_attach(sc, mtu); 6477 if (error) 6478 goto failed; 6479 attached |= ATTACHED_NVS; 6480 6481 /* 6482 * Attach RNDIS _after_ NVS is attached. 6483 */ 6484 error = hn_rndis_attach(sc, mtu, &rndis_inited); 6485 if (rndis_inited) 6486 attached |= ATTACHED_RNDIS; 6487 if (error) 6488 goto failed; 6489 6490 /* 6491 * Make sure capabilities are not changed. 6492 */ 6493 if (device_is_attached(sc->hn_dev) && old_caps != sc->hn_caps) { 6494 if_printf(sc->hn_ifp, "caps mismatch old 0x%08x, new 0x%08x\n", 6495 old_caps, sc->hn_caps); 6496 error = ENXIO; 6497 goto failed; 6498 } 6499 6500 /* 6501 * Allocate sub-channels for multi-TX/RX rings. 6502 * 6503 * NOTE: 6504 * The # of RX rings that can be used is equivalent to the # of 6505 * channels to be requested. 6506 */ 6507 nsubch = sc->hn_rx_ring_cnt - 1; 6508 error = hn_synth_alloc_subchans(sc, &nsubch); 6509 if (error) 6510 goto failed; 6511 /* NOTE: _Full_ synthetic parts detach is required now. */ 6512 sc->hn_flags |= HN_FLAG_SYNTH_ATTACHED; 6513 6514 /* 6515 * Set the # of TX/RX rings that could be used according to 6516 * the # of channels that NVS offered. 6517 */ 6518 nchan = nsubch + 1; 6519 hn_set_ring_inuse(sc, nchan); 6520 if (nchan == 1) { 6521 /* Only the primary channel can be used; done */ 6522 goto back; 6523 } 6524 6525 /* 6526 * Attach the sub-channels. 6527 * 6528 * NOTE: hn_set_ring_inuse() _must_ have been called. 6529 */ 6530 error = hn_attach_subchans(sc); 6531 if (error) 6532 goto failed; 6533 6534 /* 6535 * Configure RSS key and indirect table _after_ all sub-channels 6536 * are attached. 6537 */ 6538 if ((sc->hn_flags & HN_FLAG_HAS_RSSKEY) == 0) { 6539 /* 6540 * RSS key is not set yet; set it to the default RSS key. 6541 */ 6542 if (bootverbose) 6543 if_printf(sc->hn_ifp, "setup default RSS key\n"); 6544 #ifdef RSS 6545 rss_getkey(rss->rss_key); 6546 #else 6547 memcpy(rss->rss_key, hn_rss_key_default, sizeof(rss->rss_key)); 6548 #endif 6549 sc->hn_flags |= HN_FLAG_HAS_RSSKEY; 6550 } 6551 6552 if ((sc->hn_flags & HN_FLAG_HAS_RSSIND) == 0) { 6553 /* 6554 * RSS indirect table is not set yet; set it up in round- 6555 * robin fashion. 6556 */ 6557 if (bootverbose) { 6558 if_printf(sc->hn_ifp, "setup default RSS indirect " 6559 "table\n"); 6560 } 6561 for (i = 0; i < NDIS_HASH_INDCNT; ++i) { 6562 uint32_t subidx; 6563 6564 #ifdef RSS 6565 subidx = rss_get_indirection_to_bucket(i); 6566 #else 6567 subidx = i; 6568 #endif 6569 rss->rss_ind[i] = subidx % nchan; 6570 } 6571 sc->hn_flags |= HN_FLAG_HAS_RSSIND; 6572 } else { 6573 /* 6574 * # of usable channels may be changed, so we have to 6575 * make sure that all entries in RSS indirect table 6576 * are valid. 6577 * 6578 * NOTE: hn_set_ring_inuse() _must_ have been called. 6579 */ 6580 hn_rss_ind_fixup(sc); 6581 } 6582 6583 sc->hn_rss_hash = sc->hn_rss_hcap; 6584 if ((sc->hn_flags & HN_FLAG_RXVF) || 6585 (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) { 6586 /* NOTE: Don't reconfigure RSS; will do immediately. */ 6587 hn_vf_rss_fixup(sc, false); 6588 } 6589 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE); 6590 if (error) 6591 goto failed; 6592 back: 6593 /* 6594 * Fixup transmission aggregation setup. 6595 */ 6596 hn_set_txagg(sc); 6597 hn_rndis_init_fixat(sc, nchan); 6598 return (0); 6599 6600 failed: 6601 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) { 6602 hn_rndis_init_fixat(sc, nchan); 6603 hn_synth_detach(sc); 6604 } else { 6605 if (attached & ATTACHED_RNDIS) { 6606 hn_rndis_init_fixat(sc, nchan); 6607 hn_rndis_detach(sc); 6608 } 6609 if (attached & ATTACHED_NVS) 6610 hn_nvs_detach(sc); 6611 hn_chan_detach(sc, sc->hn_prichan); 6612 /* Restore old capabilities. */ 6613 sc->hn_caps = old_caps; 6614 } 6615 return (error); 6616 6617 #undef ATTACHED_RNDIS 6618 #undef ATTACHED_NVS 6619 } 6620 6621 /* 6622 * NOTE: 6623 * The interface must have been suspended though hn_suspend(), before 6624 * this function get called. 6625 */ 6626 static void 6627 hn_synth_detach(struct hn_softc *sc) 6628 { 6629 6630 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, 6631 ("synthetic parts were not attached")); 6632 6633 /* Detach the RNDIS first. */ 6634 hn_rndis_detach(sc); 6635 6636 /* Detach NVS. */ 6637 hn_nvs_detach(sc); 6638 6639 /* Detach all of the channels. */ 6640 hn_detach_allchans(sc); 6641 6642 if (vmbus_current_version >= VMBUS_VERSION_WIN10 && sc->hn_rxbuf_gpadl != 0) { 6643 /* 6644 * Host is post-Win2016, disconnect RXBUF from primary channel here. 6645 */ 6646 int error; 6647 6648 error = vmbus_chan_gpadl_disconnect(sc->hn_prichan, 6649 sc->hn_rxbuf_gpadl); 6650 if (error) { 6651 if_printf(sc->hn_ifp, 6652 "rxbuf gpadl disconn failed: %d\n", error); 6653 sc->hn_flags |= HN_FLAG_RXBUF_REF; 6654 } 6655 sc->hn_rxbuf_gpadl = 0; 6656 } 6657 6658 if (vmbus_current_version >= VMBUS_VERSION_WIN10 && sc->hn_chim_gpadl != 0) { 6659 /* 6660 * Host is post-Win2016, disconnect chimney sending buffer from 6661 * primary channel here. 6662 */ 6663 int error; 6664 6665 error = vmbus_chan_gpadl_disconnect(sc->hn_prichan, 6666 sc->hn_chim_gpadl); 6667 if (error) { 6668 if_printf(sc->hn_ifp, 6669 "chim gpadl disconn failed: %d\n", error); 6670 sc->hn_flags |= HN_FLAG_CHIM_REF; 6671 } 6672 sc->hn_chim_gpadl = 0; 6673 } 6674 sc->hn_flags &= ~HN_FLAG_SYNTH_ATTACHED; 6675 } 6676 6677 static void 6678 hn_set_ring_inuse(struct hn_softc *sc, int ring_cnt) 6679 { 6680 KASSERT(ring_cnt > 0 && ring_cnt <= sc->hn_rx_ring_cnt, 6681 ("invalid ring count %d", ring_cnt)); 6682 6683 if (sc->hn_tx_ring_cnt > ring_cnt) 6684 sc->hn_tx_ring_inuse = ring_cnt; 6685 else 6686 sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt; 6687 sc->hn_rx_ring_inuse = ring_cnt; 6688 6689 #ifdef RSS 6690 if (sc->hn_rx_ring_inuse != rss_getnumbuckets()) { 6691 if_printf(sc->hn_ifp, "# of RX rings (%d) does not match " 6692 "# of RSS buckets (%d)\n", sc->hn_rx_ring_inuse, 6693 rss_getnumbuckets()); 6694 } 6695 #endif 6696 6697 if (bootverbose) { 6698 if_printf(sc->hn_ifp, "%d TX ring, %d RX ring\n", 6699 sc->hn_tx_ring_inuse, sc->hn_rx_ring_inuse); 6700 } 6701 } 6702 6703 static void 6704 hn_chan_drain(struct hn_softc *sc, struct vmbus_channel *chan) 6705 { 6706 6707 /* 6708 * NOTE: 6709 * The TX bufring will not be drained by the hypervisor, 6710 * if the primary channel is revoked. 6711 */ 6712 while (!vmbus_chan_rx_empty(chan) || 6713 (!vmbus_chan_is_revoked(sc->hn_prichan) && 6714 !vmbus_chan_tx_empty(chan))) 6715 pause("waitch", 1); 6716 vmbus_chan_intr_drain(chan); 6717 } 6718 6719 static void 6720 hn_disable_rx(struct hn_softc *sc) 6721 { 6722 6723 /* 6724 * Disable RX by clearing RX filter forcefully. 6725 */ 6726 sc->hn_rx_filter = NDIS_PACKET_TYPE_NONE; 6727 hn_rndis_set_rxfilter(sc, sc->hn_rx_filter); /* ignore error */ 6728 6729 /* 6730 * Give RNDIS enough time to flush all pending data packets. 6731 */ 6732 pause("waitrx", (200 * hz) / 1000); 6733 } 6734 6735 /* 6736 * NOTE: 6737 * RX/TX _must_ have been suspended/disabled, before this function 6738 * is called. 6739 */ 6740 static void 6741 hn_drain_rxtx(struct hn_softc *sc, int nchan) 6742 { 6743 struct vmbus_channel **subch = NULL; 6744 int nsubch; 6745 6746 /* 6747 * Drain RX/TX bufrings and interrupts. 6748 */ 6749 nsubch = nchan - 1; 6750 if (nsubch > 0) 6751 subch = vmbus_subchan_get(sc->hn_prichan, nsubch); 6752 6753 if (subch != NULL) { 6754 int i; 6755 6756 for (i = 0; i < nsubch; ++i) 6757 hn_chan_drain(sc, subch[i]); 6758 } 6759 hn_chan_drain(sc, sc->hn_prichan); 6760 6761 if (subch != NULL) 6762 vmbus_subchan_rel(subch, nsubch); 6763 } 6764 6765 static void 6766 hn_suspend_data(struct hn_softc *sc) 6767 { 6768 struct hn_tx_ring *txr; 6769 int i; 6770 6771 HN_LOCK_ASSERT(sc); 6772 6773 /* 6774 * Suspend TX. 6775 */ 6776 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { 6777 txr = &sc->hn_tx_ring[i]; 6778 6779 mtx_lock(&txr->hn_tx_lock); 6780 txr->hn_suspended = 1; 6781 mtx_unlock(&txr->hn_tx_lock); 6782 /* No one is able send more packets now. */ 6783 6784 /* 6785 * Wait for all pending sends to finish. 6786 * 6787 * NOTE: 6788 * We will _not_ receive all pending send-done, if the 6789 * primary channel is revoked. 6790 */ 6791 while (hn_tx_ring_pending(txr) && 6792 !vmbus_chan_is_revoked(sc->hn_prichan)) 6793 pause("hnwtx", 1 /* 1 tick */); 6794 } 6795 6796 /* 6797 * Disable RX. 6798 */ 6799 hn_disable_rx(sc); 6800 6801 /* 6802 * Drain RX/TX. 6803 */ 6804 hn_drain_rxtx(sc, sc->hn_rx_ring_inuse); 6805 6806 /* 6807 * Drain any pending TX tasks. 6808 * 6809 * NOTE: 6810 * The above hn_drain_rxtx() can dispatch TX tasks, so the TX 6811 * tasks will have to be drained _after_ the above hn_drain_rxtx(). 6812 */ 6813 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { 6814 txr = &sc->hn_tx_ring[i]; 6815 6816 taskqueue_drain(txr->hn_tx_taskq, &txr->hn_tx_task); 6817 taskqueue_drain(txr->hn_tx_taskq, &txr->hn_txeof_task); 6818 } 6819 } 6820 6821 static void 6822 hn_suspend_mgmt_taskfunc(void *xsc, int pending __unused) 6823 { 6824 6825 ((struct hn_softc *)xsc)->hn_mgmt_taskq = NULL; 6826 } 6827 6828 static void 6829 hn_suspend_mgmt(struct hn_softc *sc) 6830 { 6831 struct task task; 6832 6833 HN_LOCK_ASSERT(sc); 6834 6835 /* 6836 * Make sure that hn_mgmt_taskq0 can nolonger be accessed 6837 * through hn_mgmt_taskq. 6838 */ 6839 TASK_INIT(&task, 0, hn_suspend_mgmt_taskfunc, sc); 6840 vmbus_chan_run_task(sc->hn_prichan, &task); 6841 6842 /* 6843 * Make sure that all pending management tasks are completed. 6844 */ 6845 taskqueue_drain(sc->hn_mgmt_taskq0, &sc->hn_netchg_init); 6846 taskqueue_drain_timeout(sc->hn_mgmt_taskq0, &sc->hn_netchg_status); 6847 taskqueue_drain_all(sc->hn_mgmt_taskq0); 6848 } 6849 6850 static void 6851 hn_suspend(struct hn_softc *sc) 6852 { 6853 6854 /* Disable polling. */ 6855 hn_polling(sc, 0); 6856 6857 /* 6858 * If the non-transparent mode VF is activated, the synthetic 6859 * device is receiving packets, so the data path of the 6860 * synthetic device must be suspended. 6861 */ 6862 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) || 6863 (sc->hn_flags & HN_FLAG_RXVF)) 6864 hn_suspend_data(sc); 6865 hn_suspend_mgmt(sc); 6866 } 6867 6868 static void 6869 hn_resume_tx(struct hn_softc *sc, int tx_ring_cnt) 6870 { 6871 int i; 6872 6873 KASSERT(tx_ring_cnt <= sc->hn_tx_ring_cnt, 6874 ("invalid TX ring count %d", tx_ring_cnt)); 6875 6876 for (i = 0; i < tx_ring_cnt; ++i) { 6877 struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; 6878 6879 mtx_lock(&txr->hn_tx_lock); 6880 txr->hn_suspended = 0; 6881 mtx_unlock(&txr->hn_tx_lock); 6882 } 6883 } 6884 6885 static void 6886 hn_resume_data(struct hn_softc *sc) 6887 { 6888 int i; 6889 6890 HN_LOCK_ASSERT(sc); 6891 6892 /* 6893 * Re-enable RX. 6894 */ 6895 hn_rxfilter_config(sc); 6896 6897 /* 6898 * Make sure to clear suspend status on "all" TX rings, 6899 * since hn_tx_ring_inuse can be changed after 6900 * hn_suspend_data(). 6901 */ 6902 hn_resume_tx(sc, sc->hn_tx_ring_cnt); 6903 6904 #ifdef HN_IFSTART_SUPPORT 6905 if (!hn_use_if_start) 6906 #endif 6907 { 6908 /* 6909 * Flush unused drbrs, since hn_tx_ring_inuse may be 6910 * reduced. 6911 */ 6912 for (i = sc->hn_tx_ring_inuse; i < sc->hn_tx_ring_cnt; ++i) 6913 hn_tx_ring_qflush(&sc->hn_tx_ring[i]); 6914 } 6915 6916 /* 6917 * Kick start TX. 6918 */ 6919 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { 6920 struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; 6921 6922 /* 6923 * Use txeof task, so that any pending oactive can be 6924 * cleared properly. 6925 */ 6926 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); 6927 } 6928 } 6929 6930 static void 6931 hn_resume_mgmt(struct hn_softc *sc) 6932 { 6933 6934 sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0; 6935 6936 /* 6937 * Kick off network change detection, if it was pending. 6938 * If no network change was pending, start link status 6939 * checks, which is more lightweight than network change 6940 * detection. 6941 */ 6942 if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG) 6943 hn_change_network(sc); 6944 else 6945 hn_update_link_status(sc); 6946 } 6947 6948 static void 6949 hn_resume(struct hn_softc *sc) 6950 { 6951 6952 /* 6953 * If the non-transparent mode VF is activated, the synthetic 6954 * device have to receive packets, so the data path of the 6955 * synthetic device must be resumed. 6956 */ 6957 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) || 6958 (sc->hn_flags & HN_FLAG_RXVF)) 6959 hn_resume_data(sc); 6960 6961 /* 6962 * Don't resume link status change if VF is attached/activated. 6963 * - In the non-transparent VF mode, the synthetic device marks 6964 * link down until the VF is deactivated; i.e. VF is down. 6965 * - In transparent VF mode, VF's media status is used until 6966 * the VF is detached. 6967 */ 6968 if ((sc->hn_flags & HN_FLAG_RXVF) == 0 && 6969 !(hn_xpnt_vf && sc->hn_vf_ifp != NULL)) 6970 hn_resume_mgmt(sc); 6971 6972 /* 6973 * Re-enable polling if this interface is running and 6974 * the polling is requested. 6975 */ 6976 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) && sc->hn_pollhz > 0) 6977 hn_polling(sc, sc->hn_pollhz); 6978 } 6979 6980 static void 6981 hn_rndis_rx_status(struct hn_softc *sc, const void *data, int dlen) 6982 { 6983 const struct rndis_status_msg *msg; 6984 int ofs; 6985 6986 if (dlen < sizeof(*msg)) { 6987 if_printf(sc->hn_ifp, "invalid RNDIS status\n"); 6988 return; 6989 } 6990 msg = data; 6991 6992 switch (msg->rm_status) { 6993 case RNDIS_STATUS_MEDIA_CONNECT: 6994 case RNDIS_STATUS_MEDIA_DISCONNECT: 6995 hn_update_link_status(sc); 6996 break; 6997 6998 case RNDIS_STATUS_TASK_OFFLOAD_CURRENT_CONFIG: 6999 case RNDIS_STATUS_LINK_SPEED_CHANGE: 7000 /* Not really useful; ignore. */ 7001 break; 7002 7003 case RNDIS_STATUS_NETWORK_CHANGE: 7004 ofs = RNDIS_STBUFOFFSET_ABS(msg->rm_stbufoffset); 7005 if (dlen < ofs + msg->rm_stbuflen || 7006 msg->rm_stbuflen < sizeof(uint32_t)) { 7007 if_printf(sc->hn_ifp, "network changed\n"); 7008 } else { 7009 uint32_t change; 7010 7011 memcpy(&change, ((const uint8_t *)msg) + ofs, 7012 sizeof(change)); 7013 if_printf(sc->hn_ifp, "network changed, change %u\n", 7014 change); 7015 } 7016 hn_change_network(sc); 7017 break; 7018 7019 default: 7020 if_printf(sc->hn_ifp, "unknown RNDIS status 0x%08x\n", 7021 msg->rm_status); 7022 break; 7023 } 7024 } 7025 7026 static int 7027 hn_rndis_rxinfo(const void *info_data, int info_dlen, struct hn_rxinfo *info) 7028 { 7029 const struct rndis_pktinfo *pi = info_data; 7030 uint32_t mask = 0; 7031 7032 while (info_dlen != 0) { 7033 const void *data; 7034 uint32_t dlen; 7035 7036 if (__predict_false(info_dlen < sizeof(*pi))) 7037 return (EINVAL); 7038 if (__predict_false(info_dlen < pi->rm_size)) 7039 return (EINVAL); 7040 info_dlen -= pi->rm_size; 7041 7042 if (__predict_false(pi->rm_size & RNDIS_PKTINFO_SIZE_ALIGNMASK)) 7043 return (EINVAL); 7044 if (__predict_false(pi->rm_size < pi->rm_pktinfooffset)) 7045 return (EINVAL); 7046 dlen = pi->rm_size - pi->rm_pktinfooffset; 7047 data = pi->rm_data; 7048 7049 switch (pi->rm_type) { 7050 case NDIS_PKTINFO_TYPE_VLAN: 7051 if (__predict_false(dlen < NDIS_VLAN_INFO_SIZE)) 7052 return (EINVAL); 7053 info->vlan_info = *((const uint32_t *)data); 7054 mask |= HN_RXINFO_VLAN; 7055 break; 7056 7057 case NDIS_PKTINFO_TYPE_CSUM: 7058 if (__predict_false(dlen < NDIS_RXCSUM_INFO_SIZE)) 7059 return (EINVAL); 7060 info->csum_info = *((const uint32_t *)data); 7061 mask |= HN_RXINFO_CSUM; 7062 break; 7063 7064 case HN_NDIS_PKTINFO_TYPE_HASHVAL: 7065 if (__predict_false(dlen < HN_NDIS_HASH_VALUE_SIZE)) 7066 return (EINVAL); 7067 info->hash_value = *((const uint32_t *)data); 7068 mask |= HN_RXINFO_HASHVAL; 7069 break; 7070 7071 case HN_NDIS_PKTINFO_TYPE_HASHINF: 7072 if (__predict_false(dlen < HN_NDIS_HASH_INFO_SIZE)) 7073 return (EINVAL); 7074 info->hash_info = *((const uint32_t *)data); 7075 mask |= HN_RXINFO_HASHINF; 7076 break; 7077 7078 default: 7079 goto next; 7080 } 7081 7082 if (mask == HN_RXINFO_ALL) { 7083 /* All found; done */ 7084 break; 7085 } 7086 next: 7087 pi = (const struct rndis_pktinfo *) 7088 ((const uint8_t *)pi + pi->rm_size); 7089 } 7090 7091 /* 7092 * Final fixup. 7093 * - If there is no hash value, invalidate the hash info. 7094 */ 7095 if ((mask & HN_RXINFO_HASHVAL) == 0) 7096 info->hash_info = HN_NDIS_HASH_INFO_INVALID; 7097 return (0); 7098 } 7099 7100 static __inline bool 7101 hn_rndis_check_overlap(int off, int len, int check_off, int check_len) 7102 { 7103 7104 if (off < check_off) { 7105 if (__predict_true(off + len <= check_off)) 7106 return (false); 7107 } else if (off > check_off) { 7108 if (__predict_true(check_off + check_len <= off)) 7109 return (false); 7110 } 7111 return (true); 7112 } 7113 7114 static void 7115 hn_rndis_rx_data(struct hn_rx_ring *rxr, const void *data, int dlen) 7116 { 7117 const struct rndis_packet_msg *pkt; 7118 struct hn_rxinfo info; 7119 int data_off, pktinfo_off, data_len, pktinfo_len; 7120 7121 /* 7122 * Check length. 7123 */ 7124 if (__predict_false(dlen < sizeof(*pkt))) { 7125 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg\n"); 7126 return; 7127 } 7128 pkt = data; 7129 7130 if (__predict_false(dlen < pkt->rm_len)) { 7131 if_printf(rxr->hn_ifp, "truncated RNDIS packet msg, " 7132 "dlen %d, msglen %u\n", dlen, pkt->rm_len); 7133 return; 7134 } 7135 if (__predict_false(pkt->rm_len < 7136 pkt->rm_datalen + pkt->rm_oobdatalen + pkt->rm_pktinfolen)) { 7137 if_printf(rxr->hn_ifp, "invalid RNDIS packet msglen, " 7138 "msglen %u, data %u, oob %u, pktinfo %u\n", 7139 pkt->rm_len, pkt->rm_datalen, pkt->rm_oobdatalen, 7140 pkt->rm_pktinfolen); 7141 return; 7142 } 7143 if (__predict_false(pkt->rm_datalen == 0)) { 7144 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, no data\n"); 7145 return; 7146 } 7147 7148 /* 7149 * Check offests. 7150 */ 7151 #define IS_OFFSET_INVALID(ofs) \ 7152 ((ofs) < RNDIS_PACKET_MSG_OFFSET_MIN || \ 7153 ((ofs) & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK)) 7154 7155 /* XXX Hyper-V does not meet data offset alignment requirement */ 7156 if (__predict_false(pkt->rm_dataoffset < RNDIS_PACKET_MSG_OFFSET_MIN)) { 7157 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7158 "data offset %u\n", pkt->rm_dataoffset); 7159 return; 7160 } 7161 if (__predict_false(pkt->rm_oobdataoffset > 0 && 7162 IS_OFFSET_INVALID(pkt->rm_oobdataoffset))) { 7163 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7164 "oob offset %u\n", pkt->rm_oobdataoffset); 7165 return; 7166 } 7167 if (__predict_true(pkt->rm_pktinfooffset > 0) && 7168 __predict_false(IS_OFFSET_INVALID(pkt->rm_pktinfooffset))) { 7169 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7170 "pktinfo offset %u\n", pkt->rm_pktinfooffset); 7171 return; 7172 } 7173 7174 #undef IS_OFFSET_INVALID 7175 7176 data_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_dataoffset); 7177 data_len = pkt->rm_datalen; 7178 pktinfo_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_pktinfooffset); 7179 pktinfo_len = pkt->rm_pktinfolen; 7180 7181 /* 7182 * Check OOB coverage. 7183 */ 7184 if (__predict_false(pkt->rm_oobdatalen != 0)) { 7185 int oob_off, oob_len; 7186 7187 if_printf(rxr->hn_ifp, "got oobdata\n"); 7188 oob_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_oobdataoffset); 7189 oob_len = pkt->rm_oobdatalen; 7190 7191 if (__predict_false(oob_off + oob_len > pkt->rm_len)) { 7192 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7193 "oob overflow, msglen %u, oob abs %d len %d\n", 7194 pkt->rm_len, oob_off, oob_len); 7195 return; 7196 } 7197 7198 /* 7199 * Check against data. 7200 */ 7201 if (hn_rndis_check_overlap(oob_off, oob_len, 7202 data_off, data_len)) { 7203 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7204 "oob overlaps data, oob abs %d len %d, " 7205 "data abs %d len %d\n", 7206 oob_off, oob_len, data_off, data_len); 7207 return; 7208 } 7209 7210 /* 7211 * Check against pktinfo. 7212 */ 7213 if (pktinfo_len != 0 && 7214 hn_rndis_check_overlap(oob_off, oob_len, 7215 pktinfo_off, pktinfo_len)) { 7216 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7217 "oob overlaps pktinfo, oob abs %d len %d, " 7218 "pktinfo abs %d len %d\n", 7219 oob_off, oob_len, pktinfo_off, pktinfo_len); 7220 return; 7221 } 7222 } 7223 7224 /* 7225 * Check per-packet-info coverage and find useful per-packet-info. 7226 */ 7227 info.vlan_info = HN_NDIS_VLAN_INFO_INVALID; 7228 info.csum_info = HN_NDIS_RXCSUM_INFO_INVALID; 7229 info.hash_info = HN_NDIS_HASH_INFO_INVALID; 7230 if (__predict_true(pktinfo_len != 0)) { 7231 bool overlap; 7232 int error; 7233 7234 if (__predict_false(pktinfo_off + pktinfo_len > pkt->rm_len)) { 7235 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7236 "pktinfo overflow, msglen %u, " 7237 "pktinfo abs %d len %d\n", 7238 pkt->rm_len, pktinfo_off, pktinfo_len); 7239 return; 7240 } 7241 7242 /* 7243 * Check packet info coverage. 7244 */ 7245 overlap = hn_rndis_check_overlap(pktinfo_off, pktinfo_len, 7246 data_off, data_len); 7247 if (__predict_false(overlap)) { 7248 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7249 "pktinfo overlap data, pktinfo abs %d len %d, " 7250 "data abs %d len %d\n", 7251 pktinfo_off, pktinfo_len, data_off, data_len); 7252 return; 7253 } 7254 7255 /* 7256 * Find useful per-packet-info. 7257 */ 7258 error = hn_rndis_rxinfo(((const uint8_t *)pkt) + pktinfo_off, 7259 pktinfo_len, &info); 7260 if (__predict_false(error)) { 7261 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg " 7262 "pktinfo\n"); 7263 return; 7264 } 7265 } 7266 7267 if (__predict_false(data_off + data_len > pkt->rm_len)) { 7268 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 7269 "data overflow, msglen %u, data abs %d len %d\n", 7270 pkt->rm_len, data_off, data_len); 7271 return; 7272 } 7273 hn_rxpkt(rxr, ((const uint8_t *)pkt) + data_off, data_len, &info); 7274 } 7275 7276 static __inline void 7277 hn_rndis_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen) 7278 { 7279 const struct rndis_msghdr *hdr; 7280 7281 if (__predict_false(dlen < sizeof(*hdr))) { 7282 if_printf(rxr->hn_ifp, "invalid RNDIS msg\n"); 7283 return; 7284 } 7285 hdr = data; 7286 7287 if (__predict_true(hdr->rm_type == REMOTE_NDIS_PACKET_MSG)) { 7288 /* Hot data path. */ 7289 hn_rndis_rx_data(rxr, data, dlen); 7290 /* Done! */ 7291 return; 7292 } 7293 7294 if (hdr->rm_type == REMOTE_NDIS_INDICATE_STATUS_MSG) 7295 hn_rndis_rx_status(rxr->hn_ifp->if_softc, data, dlen); 7296 else 7297 hn_rndis_rx_ctrl(rxr->hn_ifp->if_softc, data, dlen); 7298 } 7299 7300 static void 7301 hn_nvs_handle_notify(struct hn_softc *sc, const struct vmbus_chanpkt_hdr *pkt) 7302 { 7303 const struct hn_nvs_hdr *hdr; 7304 7305 if (VMBUS_CHANPKT_DATALEN(pkt) < sizeof(*hdr)) { 7306 if_printf(sc->hn_ifp, "invalid nvs notify\n"); 7307 return; 7308 } 7309 hdr = VMBUS_CHANPKT_CONST_DATA(pkt); 7310 7311 if (hdr->nvs_type == HN_NVS_TYPE_TXTBL_NOTE) { 7312 /* Useless; ignore */ 7313 return; 7314 } 7315 if_printf(sc->hn_ifp, "got notify, nvs type %u\n", hdr->nvs_type); 7316 } 7317 7318 static void 7319 hn_nvs_handle_comp(struct hn_softc *sc, struct vmbus_channel *chan, 7320 const struct vmbus_chanpkt_hdr *pkt) 7321 { 7322 struct hn_nvs_sendctx *sndc; 7323 7324 sndc = (struct hn_nvs_sendctx *)(uintptr_t)pkt->cph_xactid; 7325 sndc->hn_cb(sndc, sc, chan, VMBUS_CHANPKT_CONST_DATA(pkt), 7326 VMBUS_CHANPKT_DATALEN(pkt)); 7327 /* 7328 * NOTE: 7329 * 'sndc' CAN NOT be accessed anymore, since it can be freed by 7330 * its callback. 7331 */ 7332 } 7333 7334 static void 7335 hn_nvs_handle_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan, 7336 const struct vmbus_chanpkt_hdr *pkthdr) 7337 { 7338 const struct vmbus_chanpkt_rxbuf *pkt; 7339 const struct hn_nvs_hdr *nvs_hdr; 7340 int count, i, hlen; 7341 7342 if (__predict_false(VMBUS_CHANPKT_DATALEN(pkthdr) < sizeof(*nvs_hdr))) { 7343 if_printf(rxr->hn_ifp, "invalid nvs RNDIS\n"); 7344 return; 7345 } 7346 nvs_hdr = VMBUS_CHANPKT_CONST_DATA(pkthdr); 7347 7348 /* Make sure that this is a RNDIS message. */ 7349 if (__predict_false(nvs_hdr->nvs_type != HN_NVS_TYPE_RNDIS)) { 7350 if_printf(rxr->hn_ifp, "nvs type %u, not RNDIS\n", 7351 nvs_hdr->nvs_type); 7352 return; 7353 } 7354 7355 hlen = VMBUS_CHANPKT_GETLEN(pkthdr->cph_hlen); 7356 if (__predict_false(hlen < sizeof(*pkt))) { 7357 if_printf(rxr->hn_ifp, "invalid rxbuf chanpkt\n"); 7358 return; 7359 } 7360 pkt = (const struct vmbus_chanpkt_rxbuf *)pkthdr; 7361 7362 if (__predict_false(pkt->cp_rxbuf_id != HN_NVS_RXBUF_SIG)) { 7363 if_printf(rxr->hn_ifp, "invalid rxbuf_id 0x%08x\n", 7364 pkt->cp_rxbuf_id); 7365 return; 7366 } 7367 7368 count = pkt->cp_rxbuf_cnt; 7369 if (__predict_false(hlen < 7370 __offsetof(struct vmbus_chanpkt_rxbuf, cp_rxbuf[count]))) { 7371 if_printf(rxr->hn_ifp, "invalid rxbuf_cnt %d\n", count); 7372 return; 7373 } 7374 7375 /* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */ 7376 for (i = 0; i < count; ++i) { 7377 int ofs, len; 7378 7379 ofs = pkt->cp_rxbuf[i].rb_ofs; 7380 len = pkt->cp_rxbuf[i].rb_len; 7381 if (__predict_false(ofs + len > HN_RXBUF_SIZE)) { 7382 if_printf(rxr->hn_ifp, "%dth RNDIS msg overflow rxbuf, " 7383 "ofs %d, len %d\n", i, ofs, len); 7384 continue; 7385 } 7386 hn_rndis_rxpkt(rxr, rxr->hn_rxbuf + ofs, len); 7387 } 7388 7389 /* 7390 * Ack the consumed RXBUF associated w/ this channel packet, 7391 * so that this RXBUF can be recycled by the hypervisor. 7392 */ 7393 hn_nvs_ack_rxbuf(rxr, chan, pkt->cp_hdr.cph_xactid); 7394 } 7395 7396 static void 7397 hn_nvs_ack_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan, 7398 uint64_t tid) 7399 { 7400 struct hn_nvs_rndis_ack ack; 7401 int retries, error; 7402 7403 ack.nvs_type = HN_NVS_TYPE_RNDIS_ACK; 7404 ack.nvs_status = HN_NVS_STATUS_OK; 7405 7406 retries = 0; 7407 again: 7408 error = vmbus_chan_send(chan, VMBUS_CHANPKT_TYPE_COMP, 7409 VMBUS_CHANPKT_FLAG_NONE, &ack, sizeof(ack), tid); 7410 if (__predict_false(error == EAGAIN)) { 7411 /* 7412 * NOTE: 7413 * This should _not_ happen in real world, since the 7414 * consumption of the TX bufring from the TX path is 7415 * controlled. 7416 */ 7417 if (rxr->hn_ack_failed == 0) 7418 if_printf(rxr->hn_ifp, "RXBUF ack retry\n"); 7419 rxr->hn_ack_failed++; 7420 retries++; 7421 if (retries < 10) { 7422 DELAY(100); 7423 goto again; 7424 } 7425 /* RXBUF leaks! */ 7426 if_printf(rxr->hn_ifp, "RXBUF ack failed\n"); 7427 } 7428 } 7429 7430 static void 7431 hn_chan_callback(struct vmbus_channel *chan, void *xrxr) 7432 { 7433 struct hn_rx_ring *rxr = xrxr; 7434 struct hn_softc *sc = rxr->hn_ifp->if_softc; 7435 7436 for (;;) { 7437 struct vmbus_chanpkt_hdr *pkt = rxr->hn_pktbuf; 7438 int error, pktlen; 7439 7440 pktlen = rxr->hn_pktbuf_len; 7441 error = vmbus_chan_recv_pkt(chan, pkt, &pktlen); 7442 if (__predict_false(error == ENOBUFS)) { 7443 void *nbuf; 7444 int nlen; 7445 7446 /* 7447 * Expand channel packet buffer. 7448 * 7449 * XXX 7450 * Use M_WAITOK here, since allocation failure 7451 * is fatal. 7452 */ 7453 nlen = rxr->hn_pktbuf_len * 2; 7454 while (nlen < pktlen) 7455 nlen *= 2; 7456 nbuf = malloc(nlen, M_DEVBUF, M_WAITOK); 7457 7458 if_printf(rxr->hn_ifp, "expand pktbuf %d -> %d\n", 7459 rxr->hn_pktbuf_len, nlen); 7460 7461 free(rxr->hn_pktbuf, M_DEVBUF); 7462 rxr->hn_pktbuf = nbuf; 7463 rxr->hn_pktbuf_len = nlen; 7464 /* Retry! */ 7465 continue; 7466 } else if (__predict_false(error == EAGAIN)) { 7467 /* No more channel packets; done! */ 7468 break; 7469 } 7470 KASSERT(!error, ("vmbus_chan_recv_pkt failed: %d", error)); 7471 7472 switch (pkt->cph_type) { 7473 case VMBUS_CHANPKT_TYPE_COMP: 7474 hn_nvs_handle_comp(sc, chan, pkt); 7475 break; 7476 7477 case VMBUS_CHANPKT_TYPE_RXBUF: 7478 hn_nvs_handle_rxbuf(rxr, chan, pkt); 7479 break; 7480 7481 case VMBUS_CHANPKT_TYPE_INBAND: 7482 hn_nvs_handle_notify(sc, pkt); 7483 break; 7484 7485 default: 7486 if_printf(rxr->hn_ifp, "unknown chan pkt %u\n", 7487 pkt->cph_type); 7488 break; 7489 } 7490 } 7491 hn_chan_rollup(rxr, rxr->hn_txr); 7492 } 7493 7494 static void 7495 hn_sysinit(void *arg __unused) 7496 { 7497 int i; 7498 7499 hn_udpcs_fixup = counter_u64_alloc(M_WAITOK); 7500 7501 #ifdef HN_IFSTART_SUPPORT 7502 /* 7503 * Don't use ifnet.if_start if transparent VF mode is requested; 7504 * mainly due to the IFF_DRV_OACTIVE flag. 7505 */ 7506 if (hn_xpnt_vf && hn_use_if_start) { 7507 hn_use_if_start = 0; 7508 printf("hn: tranparent VF mode, if_transmit will be used, " 7509 "instead of if_start\n"); 7510 } 7511 #endif 7512 if (hn_xpnt_vf_attwait < HN_XPNT_VF_ATTWAIT_MIN) { 7513 printf("hn: invalid transparent VF attach routing " 7514 "wait timeout %d, reset to %d\n", 7515 hn_xpnt_vf_attwait, HN_XPNT_VF_ATTWAIT_MIN); 7516 hn_xpnt_vf_attwait = HN_XPNT_VF_ATTWAIT_MIN; 7517 } 7518 7519 /* 7520 * Initialize VF map. 7521 */ 7522 rm_init_flags(&hn_vfmap_lock, "hn_vfmap", RM_SLEEPABLE); 7523 hn_vfmap_size = HN_VFMAP_SIZE_DEF; 7524 hn_vfmap = malloc(sizeof(struct ifnet *) * hn_vfmap_size, M_DEVBUF, 7525 M_WAITOK | M_ZERO); 7526 7527 /* 7528 * Fix the # of TX taskqueues. 7529 */ 7530 if (hn_tx_taskq_cnt <= 0) 7531 hn_tx_taskq_cnt = 1; 7532 else if (hn_tx_taskq_cnt > mp_ncpus) 7533 hn_tx_taskq_cnt = mp_ncpus; 7534 7535 /* 7536 * Fix the TX taskqueue mode. 7537 */ 7538 switch (hn_tx_taskq_mode) { 7539 case HN_TX_TASKQ_M_INDEP: 7540 case HN_TX_TASKQ_M_GLOBAL: 7541 case HN_TX_TASKQ_M_EVTTQ: 7542 break; 7543 default: 7544 hn_tx_taskq_mode = HN_TX_TASKQ_M_INDEP; 7545 break; 7546 } 7547 7548 if (vm_guest != VM_GUEST_HV) 7549 return; 7550 7551 if (hn_tx_taskq_mode != HN_TX_TASKQ_M_GLOBAL) 7552 return; 7553 7554 hn_tx_taskque = malloc(hn_tx_taskq_cnt * sizeof(struct taskqueue *), 7555 M_DEVBUF, M_WAITOK); 7556 for (i = 0; i < hn_tx_taskq_cnt; ++i) { 7557 hn_tx_taskque[i] = taskqueue_create("hn_tx", M_WAITOK, 7558 taskqueue_thread_enqueue, &hn_tx_taskque[i]); 7559 taskqueue_start_threads(&hn_tx_taskque[i], 1, PI_NET, 7560 "hn tx%d", i); 7561 } 7562 } 7563 SYSINIT(hn_sysinit, SI_SUB_DRIVERS, SI_ORDER_SECOND, hn_sysinit, NULL); 7564 7565 static void 7566 hn_sysuninit(void *arg __unused) 7567 { 7568 7569 if (hn_tx_taskque != NULL) { 7570 int i; 7571 7572 for (i = 0; i < hn_tx_taskq_cnt; ++i) 7573 taskqueue_free(hn_tx_taskque[i]); 7574 free(hn_tx_taskque, M_DEVBUF); 7575 } 7576 7577 if (hn_vfmap != NULL) 7578 free(hn_vfmap, M_DEVBUF); 7579 rm_destroy(&hn_vfmap_lock); 7580 7581 counter_u64_free(hn_udpcs_fixup); 7582 } 7583 SYSUNINIT(hn_sysuninit, SI_SUB_DRIVERS, SI_ORDER_SECOND, hn_sysuninit, NULL); 7584