1 /*- 2 * Copyright (c) 2010-2012 Citrix Inc. 3 * Copyright (c) 2009-2012,2016 Microsoft Corp. 4 * Copyright (c) 2012 NetApp Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /*- 30 * Copyright (c) 2004-2006 Kip Macy 31 * All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 45 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 52 * SUCH DAMAGE. 53 */ 54 55 #include <sys/cdefs.h> 56 __FBSDID("$FreeBSD$"); 57 58 #include "opt_hn.h" 59 #include "opt_inet6.h" 60 #include "opt_inet.h" 61 #include "opt_rss.h" 62 63 #include <sys/param.h> 64 #include <sys/bus.h> 65 #include <sys/kernel.h> 66 #include <sys/limits.h> 67 #include <sys/malloc.h> 68 #include <sys/mbuf.h> 69 #include <sys/module.h> 70 #include <sys/queue.h> 71 #include <sys/lock.h> 72 #include <sys/rmlock.h> 73 #include <sys/sbuf.h> 74 #include <sys/smp.h> 75 #include <sys/socket.h> 76 #include <sys/sockio.h> 77 #include <sys/sx.h> 78 #include <sys/sysctl.h> 79 #include <sys/systm.h> 80 #include <sys/taskqueue.h> 81 #include <sys/buf_ring.h> 82 #include <sys/eventhandler.h> 83 84 #include <machine/atomic.h> 85 #include <machine/in_cksum.h> 86 87 #include <net/bpf.h> 88 #include <net/ethernet.h> 89 #include <net/if.h> 90 #include <net/if_dl.h> 91 #include <net/if_media.h> 92 #include <net/if_types.h> 93 #include <net/if_var.h> 94 #include <net/rndis.h> 95 #ifdef RSS 96 #include <net/rss_config.h> 97 #endif 98 99 #include <netinet/in_systm.h> 100 #include <netinet/in.h> 101 #include <netinet/ip.h> 102 #include <netinet/ip6.h> 103 #include <netinet/tcp.h> 104 #include <netinet/tcp_lro.h> 105 #include <netinet/udp.h> 106 107 #include <dev/hyperv/include/hyperv.h> 108 #include <dev/hyperv/include/hyperv_busdma.h> 109 #include <dev/hyperv/include/vmbus.h> 110 #include <dev/hyperv/include/vmbus_xact.h> 111 112 #include <dev/hyperv/netvsc/ndis.h> 113 #include <dev/hyperv/netvsc/if_hnreg.h> 114 #include <dev/hyperv/netvsc/if_hnvar.h> 115 #include <dev/hyperv/netvsc/hn_nvs.h> 116 #include <dev/hyperv/netvsc/hn_rndis.h> 117 118 #include "vmbus_if.h" 119 120 #define HN_IFSTART_SUPPORT 121 122 #define HN_RING_CNT_DEF_MAX 8 123 124 #define HN_VFMAP_SIZE_DEF 8 125 126 /* YYY should get it from the underlying channel */ 127 #define HN_TX_DESC_CNT 512 128 129 #define HN_RNDIS_PKT_LEN \ 130 (sizeof(struct rndis_packet_msg) + \ 131 HN_RNDIS_PKTINFO_SIZE(HN_NDIS_HASH_VALUE_SIZE) + \ 132 HN_RNDIS_PKTINFO_SIZE(NDIS_VLAN_INFO_SIZE) + \ 133 HN_RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) + \ 134 HN_RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE)) 135 #define HN_RNDIS_PKT_BOUNDARY PAGE_SIZE 136 #define HN_RNDIS_PKT_ALIGN CACHE_LINE_SIZE 137 138 #define HN_TX_DATA_BOUNDARY PAGE_SIZE 139 #define HN_TX_DATA_MAXSIZE IP_MAXPACKET 140 #define HN_TX_DATA_SEGSIZE PAGE_SIZE 141 /* -1 for RNDIS packet message */ 142 #define HN_TX_DATA_SEGCNT_MAX (HN_GPACNT_MAX - 1) 143 144 #define HN_DIRECT_TX_SIZE_DEF 128 145 146 #define HN_EARLY_TXEOF_THRESH 8 147 148 #define HN_PKTBUF_LEN_DEF (16 * 1024) 149 150 #define HN_LROENT_CNT_DEF 128 151 152 #define HN_LRO_LENLIM_MULTIRX_DEF (12 * ETHERMTU) 153 #define HN_LRO_LENLIM_DEF (25 * ETHERMTU) 154 /* YYY 2*MTU is a bit rough, but should be good enough. */ 155 #define HN_LRO_LENLIM_MIN(ifp) (2 * (ifp)->if_mtu) 156 157 #define HN_LRO_ACKCNT_DEF 1 158 159 #define HN_LOCK_INIT(sc) \ 160 sx_init(&(sc)->hn_lock, device_get_nameunit((sc)->hn_dev)) 161 #define HN_LOCK_DESTROY(sc) sx_destroy(&(sc)->hn_lock) 162 #define HN_LOCK_ASSERT(sc) sx_assert(&(sc)->hn_lock, SA_XLOCKED) 163 #define HN_LOCK(sc) \ 164 do { \ 165 while (sx_try_xlock(&(sc)->hn_lock) == 0) \ 166 DELAY(1000); \ 167 } while (0) 168 #define HN_UNLOCK(sc) sx_xunlock(&(sc)->hn_lock) 169 170 #define HN_CSUM_IP_MASK (CSUM_IP | CSUM_IP_TCP | CSUM_IP_UDP) 171 #define HN_CSUM_IP6_MASK (CSUM_IP6_TCP | CSUM_IP6_UDP) 172 #define HN_CSUM_IP_HWASSIST(sc) \ 173 ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP_MASK) 174 #define HN_CSUM_IP6_HWASSIST(sc) \ 175 ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP6_MASK) 176 177 #define HN_PKTSIZE_MIN(align) \ 178 roundup2(ETHER_MIN_LEN + ETHER_VLAN_ENCAP_LEN - ETHER_CRC_LEN + \ 179 HN_RNDIS_PKT_LEN, (align)) 180 #define HN_PKTSIZE(m, align) \ 181 roundup2((m)->m_pkthdr.len + HN_RNDIS_PKT_LEN, (align)) 182 183 #ifdef RSS 184 #define HN_RING_IDX2CPU(sc, idx) rss_getcpu((idx) % rss_getnumbuckets()) 185 #else 186 #define HN_RING_IDX2CPU(sc, idx) (((sc)->hn_cpu + (idx)) % mp_ncpus) 187 #endif 188 189 struct hn_txdesc { 190 #ifndef HN_USE_TXDESC_BUFRING 191 SLIST_ENTRY(hn_txdesc) link; 192 #endif 193 STAILQ_ENTRY(hn_txdesc) agg_link; 194 195 /* Aggregated txdescs, in sending order. */ 196 STAILQ_HEAD(, hn_txdesc) agg_list; 197 198 /* The oldest packet, if transmission aggregation happens. */ 199 struct mbuf *m; 200 struct hn_tx_ring *txr; 201 int refs; 202 uint32_t flags; /* HN_TXD_FLAG_ */ 203 struct hn_nvs_sendctx send_ctx; 204 uint32_t chim_index; 205 int chim_size; 206 207 bus_dmamap_t data_dmap; 208 209 bus_addr_t rndis_pkt_paddr; 210 struct rndis_packet_msg *rndis_pkt; 211 bus_dmamap_t rndis_pkt_dmap; 212 }; 213 214 #define HN_TXD_FLAG_ONLIST 0x0001 215 #define HN_TXD_FLAG_DMAMAP 0x0002 216 #define HN_TXD_FLAG_ONAGG 0x0004 217 218 struct hn_rxinfo { 219 uint32_t vlan_info; 220 uint32_t csum_info; 221 uint32_t hash_info; 222 uint32_t hash_value; 223 }; 224 225 struct hn_rxvf_setarg { 226 struct hn_rx_ring *rxr; 227 struct ifnet *vf_ifp; 228 }; 229 230 #define HN_RXINFO_VLAN 0x0001 231 #define HN_RXINFO_CSUM 0x0002 232 #define HN_RXINFO_HASHINF 0x0004 233 #define HN_RXINFO_HASHVAL 0x0008 234 #define HN_RXINFO_ALL \ 235 (HN_RXINFO_VLAN | \ 236 HN_RXINFO_CSUM | \ 237 HN_RXINFO_HASHINF | \ 238 HN_RXINFO_HASHVAL) 239 240 #define HN_NDIS_VLAN_INFO_INVALID 0xffffffff 241 #define HN_NDIS_RXCSUM_INFO_INVALID 0 242 #define HN_NDIS_HASH_INFO_INVALID 0 243 244 static int hn_probe(device_t); 245 static int hn_attach(device_t); 246 static int hn_detach(device_t); 247 static int hn_shutdown(device_t); 248 static void hn_chan_callback(struct vmbus_channel *, 249 void *); 250 251 static void hn_init(void *); 252 static int hn_ioctl(struct ifnet *, u_long, caddr_t); 253 #ifdef HN_IFSTART_SUPPORT 254 static void hn_start(struct ifnet *); 255 #endif 256 static int hn_transmit(struct ifnet *, struct mbuf *); 257 static void hn_xmit_qflush(struct ifnet *); 258 static int hn_ifmedia_upd(struct ifnet *); 259 static void hn_ifmedia_sts(struct ifnet *, 260 struct ifmediareq *); 261 262 static void hn_ifnet_event(void *, struct ifnet *, int); 263 static void hn_ifaddr_event(void *, struct ifnet *); 264 static void hn_ifnet_attevent(void *, struct ifnet *); 265 static void hn_ifnet_detevent(void *, struct ifnet *); 266 267 static bool hn_ismyvf(const struct hn_softc *, 268 const struct ifnet *); 269 static void hn_rxvf_change(struct hn_softc *, 270 struct ifnet *, bool); 271 static void hn_rxvf_set(struct hn_softc *, struct ifnet *); 272 static void hn_rxvf_set_task(void *, int); 273 274 static int hn_rndis_rxinfo(const void *, int, 275 struct hn_rxinfo *); 276 static void hn_rndis_rx_data(struct hn_rx_ring *, 277 const void *, int); 278 static void hn_rndis_rx_status(struct hn_softc *, 279 const void *, int); 280 static void hn_rndis_init_fixat(struct hn_softc *, int); 281 282 static void hn_nvs_handle_notify(struct hn_softc *, 283 const struct vmbus_chanpkt_hdr *); 284 static void hn_nvs_handle_comp(struct hn_softc *, 285 struct vmbus_channel *, 286 const struct vmbus_chanpkt_hdr *); 287 static void hn_nvs_handle_rxbuf(struct hn_rx_ring *, 288 struct vmbus_channel *, 289 const struct vmbus_chanpkt_hdr *); 290 static void hn_nvs_ack_rxbuf(struct hn_rx_ring *, 291 struct vmbus_channel *, uint64_t); 292 293 #if __FreeBSD_version >= 1100099 294 static int hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS); 295 static int hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS); 296 #endif 297 static int hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS); 298 static int hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS); 299 #if __FreeBSD_version < 1100095 300 static int hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS); 301 #else 302 static int hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS); 303 #endif 304 static int hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS); 305 static int hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS); 306 static int hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS); 307 static int hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS); 308 static int hn_caps_sysctl(SYSCTL_HANDLER_ARGS); 309 static int hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS); 310 static int hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS); 311 #ifndef RSS 312 static int hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS); 313 static int hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS); 314 #endif 315 static int hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS); 316 static int hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS); 317 static int hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS); 318 static int hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS); 319 static int hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS); 320 static int hn_polling_sysctl(SYSCTL_HANDLER_ARGS); 321 static int hn_vf_sysctl(SYSCTL_HANDLER_ARGS); 322 static int hn_rxvf_sysctl(SYSCTL_HANDLER_ARGS); 323 static int hn_vflist_sysctl(SYSCTL_HANDLER_ARGS); 324 static int hn_vfmap_sysctl(SYSCTL_HANDLER_ARGS); 325 326 static void hn_stop(struct hn_softc *, bool); 327 static void hn_init_locked(struct hn_softc *); 328 static int hn_chan_attach(struct hn_softc *, 329 struct vmbus_channel *); 330 static void hn_chan_detach(struct hn_softc *, 331 struct vmbus_channel *); 332 static int hn_attach_subchans(struct hn_softc *); 333 static void hn_detach_allchans(struct hn_softc *); 334 static void hn_chan_rollup(struct hn_rx_ring *, 335 struct hn_tx_ring *); 336 static void hn_set_ring_inuse(struct hn_softc *, int); 337 static int hn_synth_attach(struct hn_softc *, int); 338 static void hn_synth_detach(struct hn_softc *); 339 static int hn_synth_alloc_subchans(struct hn_softc *, 340 int *); 341 static bool hn_synth_attachable(const struct hn_softc *); 342 static void hn_suspend(struct hn_softc *); 343 static void hn_suspend_data(struct hn_softc *); 344 static void hn_suspend_mgmt(struct hn_softc *); 345 static void hn_resume(struct hn_softc *); 346 static void hn_resume_data(struct hn_softc *); 347 static void hn_resume_mgmt(struct hn_softc *); 348 static void hn_suspend_mgmt_taskfunc(void *, int); 349 static void hn_chan_drain(struct hn_softc *, 350 struct vmbus_channel *); 351 static void hn_disable_rx(struct hn_softc *); 352 static void hn_drain_rxtx(struct hn_softc *, int); 353 static void hn_polling(struct hn_softc *, u_int); 354 static void hn_chan_polling(struct vmbus_channel *, u_int); 355 356 static void hn_update_link_status(struct hn_softc *); 357 static void hn_change_network(struct hn_softc *); 358 static void hn_link_taskfunc(void *, int); 359 static void hn_netchg_init_taskfunc(void *, int); 360 static void hn_netchg_status_taskfunc(void *, int); 361 static void hn_link_status(struct hn_softc *); 362 363 static int hn_create_rx_data(struct hn_softc *, int); 364 static void hn_destroy_rx_data(struct hn_softc *); 365 static int hn_check_iplen(const struct mbuf *, int); 366 static int hn_set_rxfilter(struct hn_softc *, uint32_t); 367 static int hn_rxfilter_config(struct hn_softc *); 368 #ifndef RSS 369 static int hn_rss_reconfig(struct hn_softc *); 370 #endif 371 static void hn_rss_ind_fixup(struct hn_softc *); 372 static int hn_rxpkt(struct hn_rx_ring *, const void *, 373 int, const struct hn_rxinfo *); 374 375 static int hn_tx_ring_create(struct hn_softc *, int); 376 static void hn_tx_ring_destroy(struct hn_tx_ring *); 377 static int hn_create_tx_data(struct hn_softc *, int); 378 static void hn_fixup_tx_data(struct hn_softc *); 379 static void hn_destroy_tx_data(struct hn_softc *); 380 static void hn_txdesc_dmamap_destroy(struct hn_txdesc *); 381 static void hn_txdesc_gc(struct hn_tx_ring *, 382 struct hn_txdesc *); 383 static int hn_encap(struct ifnet *, struct hn_tx_ring *, 384 struct hn_txdesc *, struct mbuf **); 385 static int hn_txpkt(struct ifnet *, struct hn_tx_ring *, 386 struct hn_txdesc *); 387 static void hn_set_chim_size(struct hn_softc *, int); 388 static void hn_set_tso_maxsize(struct hn_softc *, int, int); 389 static bool hn_tx_ring_pending(struct hn_tx_ring *); 390 static void hn_tx_ring_qflush(struct hn_tx_ring *); 391 static void hn_resume_tx(struct hn_softc *, int); 392 static void hn_set_txagg(struct hn_softc *); 393 static void *hn_try_txagg(struct ifnet *, 394 struct hn_tx_ring *, struct hn_txdesc *, 395 int); 396 static int hn_get_txswq_depth(const struct hn_tx_ring *); 397 static void hn_txpkt_done(struct hn_nvs_sendctx *, 398 struct hn_softc *, struct vmbus_channel *, 399 const void *, int); 400 static int hn_txpkt_sglist(struct hn_tx_ring *, 401 struct hn_txdesc *); 402 static int hn_txpkt_chim(struct hn_tx_ring *, 403 struct hn_txdesc *); 404 static int hn_xmit(struct hn_tx_ring *, int); 405 static void hn_xmit_taskfunc(void *, int); 406 static void hn_xmit_txeof(struct hn_tx_ring *); 407 static void hn_xmit_txeof_taskfunc(void *, int); 408 #ifdef HN_IFSTART_SUPPORT 409 static int hn_start_locked(struct hn_tx_ring *, int); 410 static void hn_start_taskfunc(void *, int); 411 static void hn_start_txeof(struct hn_tx_ring *); 412 static void hn_start_txeof_taskfunc(void *, int); 413 #endif 414 415 SYSCTL_NODE(_hw, OID_AUTO, hn, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 416 "Hyper-V network interface"); 417 418 /* Trust tcp segements verification on host side. */ 419 static int hn_trust_hosttcp = 1; 420 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hosttcp, CTLFLAG_RDTUN, 421 &hn_trust_hosttcp, 0, 422 "Trust tcp segement verification on host side, " 423 "when csum info is missing (global setting)"); 424 425 /* Trust udp datagrams verification on host side. */ 426 static int hn_trust_hostudp = 1; 427 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostudp, CTLFLAG_RDTUN, 428 &hn_trust_hostudp, 0, 429 "Trust udp datagram verification on host side, " 430 "when csum info is missing (global setting)"); 431 432 /* Trust ip packets verification on host side. */ 433 static int hn_trust_hostip = 1; 434 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostip, CTLFLAG_RDTUN, 435 &hn_trust_hostip, 0, 436 "Trust ip packet verification on host side, " 437 "when csum info is missing (global setting)"); 438 439 /* Limit TSO burst size */ 440 static int hn_tso_maxlen = IP_MAXPACKET; 441 SYSCTL_INT(_hw_hn, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN, 442 &hn_tso_maxlen, 0, "TSO burst limit"); 443 444 /* Limit chimney send size */ 445 static int hn_tx_chimney_size = 0; 446 SYSCTL_INT(_hw_hn, OID_AUTO, tx_chimney_size, CTLFLAG_RDTUN, 447 &hn_tx_chimney_size, 0, "Chimney send packet size limit"); 448 449 /* Limit the size of packet for direct transmission */ 450 static int hn_direct_tx_size = HN_DIRECT_TX_SIZE_DEF; 451 SYSCTL_INT(_hw_hn, OID_AUTO, direct_tx_size, CTLFLAG_RDTUN, 452 &hn_direct_tx_size, 0, "Size of the packet for direct transmission"); 453 454 /* # of LRO entries per RX ring */ 455 #if defined(INET) || defined(INET6) 456 #if __FreeBSD_version >= 1100095 457 static int hn_lro_entry_count = HN_LROENT_CNT_DEF; 458 SYSCTL_INT(_hw_hn, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN, 459 &hn_lro_entry_count, 0, "LRO entry count"); 460 #endif 461 #endif 462 463 static int hn_tx_taskq_cnt = 1; 464 SYSCTL_INT(_hw_hn, OID_AUTO, tx_taskq_cnt, CTLFLAG_RDTUN, 465 &hn_tx_taskq_cnt, 0, "# of TX taskqueues"); 466 467 #define HN_TX_TASKQ_M_INDEP 0 468 #define HN_TX_TASKQ_M_GLOBAL 1 469 #define HN_TX_TASKQ_M_EVTTQ 2 470 471 static int hn_tx_taskq_mode = HN_TX_TASKQ_M_INDEP; 472 SYSCTL_INT(_hw_hn, OID_AUTO, tx_taskq_mode, CTLFLAG_RDTUN, 473 &hn_tx_taskq_mode, 0, "TX taskqueue modes: " 474 "0 - independent, 1 - share global tx taskqs, 2 - share event taskqs"); 475 476 #ifndef HN_USE_TXDESC_BUFRING 477 static int hn_use_txdesc_bufring = 0; 478 #else 479 static int hn_use_txdesc_bufring = 1; 480 #endif 481 SYSCTL_INT(_hw_hn, OID_AUTO, use_txdesc_bufring, CTLFLAG_RD, 482 &hn_use_txdesc_bufring, 0, "Use buf_ring for TX descriptors"); 483 484 #ifdef HN_IFSTART_SUPPORT 485 /* Use ifnet.if_start instead of ifnet.if_transmit */ 486 static int hn_use_if_start = 0; 487 SYSCTL_INT(_hw_hn, OID_AUTO, use_if_start, CTLFLAG_RDTUN, 488 &hn_use_if_start, 0, "Use if_start TX method"); 489 #endif 490 491 /* # of channels to use */ 492 static int hn_chan_cnt = 0; 493 SYSCTL_INT(_hw_hn, OID_AUTO, chan_cnt, CTLFLAG_RDTUN, 494 &hn_chan_cnt, 0, 495 "# of channels to use; each channel has one RX ring and one TX ring"); 496 497 /* # of transmit rings to use */ 498 static int hn_tx_ring_cnt = 0; 499 SYSCTL_INT(_hw_hn, OID_AUTO, tx_ring_cnt, CTLFLAG_RDTUN, 500 &hn_tx_ring_cnt, 0, "# of TX rings to use"); 501 502 /* Software TX ring deptch */ 503 static int hn_tx_swq_depth = 0; 504 SYSCTL_INT(_hw_hn, OID_AUTO, tx_swq_depth, CTLFLAG_RDTUN, 505 &hn_tx_swq_depth, 0, "Depth of IFQ or BUFRING"); 506 507 /* Enable sorted LRO, and the depth of the per-channel mbuf queue */ 508 #if __FreeBSD_version >= 1100095 509 static u_int hn_lro_mbufq_depth = 0; 510 SYSCTL_UINT(_hw_hn, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN, 511 &hn_lro_mbufq_depth, 0, "Depth of LRO mbuf queue"); 512 #endif 513 514 /* Packet transmission aggregation size limit */ 515 static int hn_tx_agg_size = -1; 516 SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_size, CTLFLAG_RDTUN, 517 &hn_tx_agg_size, 0, "Packet transmission aggregation size limit"); 518 519 /* Packet transmission aggregation count limit */ 520 static int hn_tx_agg_pkts = -1; 521 SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_pkts, CTLFLAG_RDTUN, 522 &hn_tx_agg_pkts, 0, "Packet transmission aggregation packet limit"); 523 524 /* VF list */ 525 SYSCTL_PROC(_hw_hn, OID_AUTO, vflist, CTLFLAG_RD | CTLTYPE_STRING, 526 0, 0, hn_vflist_sysctl, "A", "VF list"); 527 528 /* VF mapping */ 529 SYSCTL_PROC(_hw_hn, OID_AUTO, vfmap, CTLFLAG_RD | CTLTYPE_STRING, 530 0, 0, hn_vfmap_sysctl, "A", "VF mapping"); 531 532 static u_int hn_cpu_index; /* next CPU for channel */ 533 static struct taskqueue **hn_tx_taskque;/* shared TX taskqueues */ 534 535 static struct rmlock hn_vfmap_lock; 536 static int hn_vfmap_size; 537 static struct ifnet **hn_vfmap; 538 539 #ifndef RSS 540 static const uint8_t 541 hn_rss_key_default[NDIS_HASH_KEYSIZE_TOEPLITZ] = { 542 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 543 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 544 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, 545 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, 546 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa 547 }; 548 #endif /* !RSS */ 549 550 static device_method_t hn_methods[] = { 551 /* Device interface */ 552 DEVMETHOD(device_probe, hn_probe), 553 DEVMETHOD(device_attach, hn_attach), 554 DEVMETHOD(device_detach, hn_detach), 555 DEVMETHOD(device_shutdown, hn_shutdown), 556 DEVMETHOD_END 557 }; 558 559 static driver_t hn_driver = { 560 "hn", 561 hn_methods, 562 sizeof(struct hn_softc) 563 }; 564 565 static devclass_t hn_devclass; 566 567 DRIVER_MODULE(hn, vmbus, hn_driver, hn_devclass, 0, 0); 568 MODULE_VERSION(hn, 1); 569 MODULE_DEPEND(hn, vmbus, 1, 1, 1); 570 571 #if __FreeBSD_version >= 1100099 572 static void 573 hn_set_lro_lenlim(struct hn_softc *sc, int lenlim) 574 { 575 int i; 576 577 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 578 sc->hn_rx_ring[i].hn_lro.lro_length_lim = lenlim; 579 } 580 #endif 581 582 static int 583 hn_txpkt_sglist(struct hn_tx_ring *txr, struct hn_txdesc *txd) 584 { 585 586 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID && 587 txd->chim_size == 0, ("invalid rndis sglist txd")); 588 return (hn_nvs_send_rndis_sglist(txr->hn_chan, HN_NVS_RNDIS_MTYPE_DATA, 589 &txd->send_ctx, txr->hn_gpa, txr->hn_gpa_cnt)); 590 } 591 592 static int 593 hn_txpkt_chim(struct hn_tx_ring *txr, struct hn_txdesc *txd) 594 { 595 struct hn_nvs_rndis rndis; 596 597 KASSERT(txd->chim_index != HN_NVS_CHIM_IDX_INVALID && 598 txd->chim_size > 0, ("invalid rndis chim txd")); 599 600 rndis.nvs_type = HN_NVS_TYPE_RNDIS; 601 rndis.nvs_rndis_mtype = HN_NVS_RNDIS_MTYPE_DATA; 602 rndis.nvs_chim_idx = txd->chim_index; 603 rndis.nvs_chim_sz = txd->chim_size; 604 605 return (hn_nvs_send(txr->hn_chan, VMBUS_CHANPKT_FLAG_RC, 606 &rndis, sizeof(rndis), &txd->send_ctx)); 607 } 608 609 static __inline uint32_t 610 hn_chim_alloc(struct hn_softc *sc) 611 { 612 int i, bmap_cnt = sc->hn_chim_bmap_cnt; 613 u_long *bmap = sc->hn_chim_bmap; 614 uint32_t ret = HN_NVS_CHIM_IDX_INVALID; 615 616 for (i = 0; i < bmap_cnt; ++i) { 617 int idx; 618 619 idx = ffsl(~bmap[i]); 620 if (idx == 0) 621 continue; 622 623 --idx; /* ffsl is 1-based */ 624 KASSERT(i * LONG_BIT + idx < sc->hn_chim_cnt, 625 ("invalid i %d and idx %d", i, idx)); 626 627 if (atomic_testandset_long(&bmap[i], idx)) 628 continue; 629 630 ret = i * LONG_BIT + idx; 631 break; 632 } 633 return (ret); 634 } 635 636 static __inline void 637 hn_chim_free(struct hn_softc *sc, uint32_t chim_idx) 638 { 639 u_long mask; 640 uint32_t idx; 641 642 idx = chim_idx / LONG_BIT; 643 KASSERT(idx < sc->hn_chim_bmap_cnt, 644 ("invalid chimney index 0x%x", chim_idx)); 645 646 mask = 1UL << (chim_idx % LONG_BIT); 647 KASSERT(sc->hn_chim_bmap[idx] & mask, 648 ("index bitmap 0x%lx, chimney index %u, " 649 "bitmap idx %d, bitmask 0x%lx", 650 sc->hn_chim_bmap[idx], chim_idx, idx, mask)); 651 652 atomic_clear_long(&sc->hn_chim_bmap[idx], mask); 653 } 654 655 #if defined(INET6) || defined(INET) 656 657 #define PULLUP_HDR(m, len) \ 658 do { \ 659 if (__predict_false((m)->m_len < (len))) { \ 660 (m) = m_pullup((m), (len)); \ 661 if ((m) == NULL) \ 662 return (NULL); \ 663 } \ 664 } while (0) 665 666 /* 667 * NOTE: If this function failed, the m_head would be freed. 668 */ 669 static __inline struct mbuf * 670 hn_tso_fixup(struct mbuf *m_head) 671 { 672 struct ether_vlan_header *evl; 673 struct tcphdr *th; 674 int ehlen; 675 676 KASSERT(M_WRITABLE(m_head), ("TSO mbuf not writable")); 677 678 PULLUP_HDR(m_head, sizeof(*evl)); 679 evl = mtod(m_head, struct ether_vlan_header *); 680 if (evl->evl_encap_proto == ntohs(ETHERTYPE_VLAN)) 681 ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 682 else 683 ehlen = ETHER_HDR_LEN; 684 685 #ifdef INET 686 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) { 687 struct ip *ip; 688 int iphlen; 689 690 PULLUP_HDR(m_head, ehlen + sizeof(*ip)); 691 ip = mtodo(m_head, ehlen); 692 iphlen = ip->ip_hl << 2; 693 694 PULLUP_HDR(m_head, ehlen + iphlen + sizeof(*th)); 695 th = mtodo(m_head, ehlen + iphlen); 696 697 ip->ip_len = 0; 698 ip->ip_sum = 0; 699 th->th_sum = in_pseudo(ip->ip_src.s_addr, 700 ip->ip_dst.s_addr, htons(IPPROTO_TCP)); 701 } 702 #endif 703 #if defined(INET6) && defined(INET) 704 else 705 #endif 706 #ifdef INET6 707 { 708 struct ip6_hdr *ip6; 709 710 PULLUP_HDR(m_head, ehlen + sizeof(*ip6)); 711 ip6 = mtodo(m_head, ehlen); 712 if (ip6->ip6_nxt != IPPROTO_TCP) { 713 m_freem(m_head); 714 return (NULL); 715 } 716 717 PULLUP_HDR(m_head, ehlen + sizeof(*ip6) + sizeof(*th)); 718 th = mtodo(m_head, ehlen + sizeof(*ip6)); 719 720 ip6->ip6_plen = 0; 721 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); 722 } 723 #endif 724 return (m_head); 725 726 } 727 728 /* 729 * NOTE: If this function failed, the m_head would be freed. 730 */ 731 static __inline struct mbuf * 732 hn_check_tcpsyn(struct mbuf *m_head, int *tcpsyn) 733 { 734 const struct ether_vlan_header *evl; 735 const struct tcphdr *th; 736 int ehlen; 737 738 *tcpsyn = 0; 739 740 PULLUP_HDR(m_head, sizeof(*evl)); 741 evl = mtod(m_head, const struct ether_vlan_header *); 742 if (evl->evl_encap_proto == ntohs(ETHERTYPE_VLAN)) 743 ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 744 else 745 ehlen = ETHER_HDR_LEN; 746 747 #ifdef INET 748 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TCP) { 749 const struct ip *ip; 750 int iphlen; 751 752 PULLUP_HDR(m_head, ehlen + sizeof(*ip)); 753 ip = mtodo(m_head, ehlen); 754 iphlen = ip->ip_hl << 2; 755 756 PULLUP_HDR(m_head, ehlen + iphlen + sizeof(*th)); 757 th = mtodo(m_head, ehlen + iphlen); 758 if (th->th_flags & TH_SYN) 759 *tcpsyn = 1; 760 } 761 #endif 762 #if defined(INET6) && defined(INET) 763 else 764 #endif 765 #ifdef INET6 766 { 767 const struct ip6_hdr *ip6; 768 769 PULLUP_HDR(m_head, ehlen + sizeof(*ip6)); 770 ip6 = mtodo(m_head, ehlen); 771 if (ip6->ip6_nxt != IPPROTO_TCP) 772 return (m_head); 773 774 PULLUP_HDR(m_head, ehlen + sizeof(*ip6) + sizeof(*th)); 775 th = mtodo(m_head, ehlen + sizeof(*ip6)); 776 if (th->th_flags & TH_SYN) 777 *tcpsyn = 1; 778 } 779 #endif 780 return (m_head); 781 } 782 783 #undef PULLUP_HDR 784 785 #endif /* INET6 || INET */ 786 787 static int 788 hn_set_rxfilter(struct hn_softc *sc, uint32_t filter) 789 { 790 int error = 0; 791 792 HN_LOCK_ASSERT(sc); 793 794 if (sc->hn_rx_filter != filter) { 795 error = hn_rndis_set_rxfilter(sc, filter); 796 if (!error) 797 sc->hn_rx_filter = filter; 798 } 799 return (error); 800 } 801 802 static int 803 hn_rxfilter_config(struct hn_softc *sc) 804 { 805 struct ifnet *ifp = sc->hn_ifp; 806 uint32_t filter; 807 808 HN_LOCK_ASSERT(sc); 809 810 if ((ifp->if_flags & IFF_PROMISC) || 811 (sc->hn_flags & HN_FLAG_RXVF)) { 812 filter = NDIS_PACKET_TYPE_PROMISCUOUS; 813 } else { 814 filter = NDIS_PACKET_TYPE_DIRECTED; 815 if (ifp->if_flags & IFF_BROADCAST) 816 filter |= NDIS_PACKET_TYPE_BROADCAST; 817 /* TODO: support multicast list */ 818 if ((ifp->if_flags & IFF_ALLMULTI) || 819 !TAILQ_EMPTY(&ifp->if_multiaddrs)) 820 filter |= NDIS_PACKET_TYPE_ALL_MULTICAST; 821 } 822 return (hn_set_rxfilter(sc, filter)); 823 } 824 825 static void 826 hn_set_txagg(struct hn_softc *sc) 827 { 828 uint32_t size, pkts; 829 int i; 830 831 /* 832 * Setup aggregation size. 833 */ 834 if (sc->hn_agg_size < 0) 835 size = UINT32_MAX; 836 else 837 size = sc->hn_agg_size; 838 839 if (sc->hn_rndis_agg_size < size) 840 size = sc->hn_rndis_agg_size; 841 842 /* NOTE: We only aggregate packets using chimney sending buffers. */ 843 if (size > (uint32_t)sc->hn_chim_szmax) 844 size = sc->hn_chim_szmax; 845 846 if (size <= 2 * HN_PKTSIZE_MIN(sc->hn_rndis_agg_align)) { 847 /* Disable */ 848 size = 0; 849 pkts = 0; 850 goto done; 851 } 852 853 /* NOTE: Type of the per TX ring setting is 'int'. */ 854 if (size > INT_MAX) 855 size = INT_MAX; 856 857 /* 858 * Setup aggregation packet count. 859 */ 860 if (sc->hn_agg_pkts < 0) 861 pkts = UINT32_MAX; 862 else 863 pkts = sc->hn_agg_pkts; 864 865 if (sc->hn_rndis_agg_pkts < pkts) 866 pkts = sc->hn_rndis_agg_pkts; 867 868 if (pkts <= 1) { 869 /* Disable */ 870 size = 0; 871 pkts = 0; 872 goto done; 873 } 874 875 /* NOTE: Type of the per TX ring setting is 'short'. */ 876 if (pkts > SHRT_MAX) 877 pkts = SHRT_MAX; 878 879 done: 880 /* NOTE: Type of the per TX ring setting is 'short'. */ 881 if (sc->hn_rndis_agg_align > SHRT_MAX) { 882 /* Disable */ 883 size = 0; 884 pkts = 0; 885 } 886 887 if (bootverbose) { 888 if_printf(sc->hn_ifp, "TX agg size %u, pkts %u, align %u\n", 889 size, pkts, sc->hn_rndis_agg_align); 890 } 891 892 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 893 struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; 894 895 mtx_lock(&txr->hn_tx_lock); 896 txr->hn_agg_szmax = size; 897 txr->hn_agg_pktmax = pkts; 898 txr->hn_agg_align = sc->hn_rndis_agg_align; 899 mtx_unlock(&txr->hn_tx_lock); 900 } 901 } 902 903 static int 904 hn_get_txswq_depth(const struct hn_tx_ring *txr) 905 { 906 907 KASSERT(txr->hn_txdesc_cnt > 0, ("tx ring is not setup yet")); 908 if (hn_tx_swq_depth < txr->hn_txdesc_cnt) 909 return txr->hn_txdesc_cnt; 910 return hn_tx_swq_depth; 911 } 912 913 #ifndef RSS 914 static int 915 hn_rss_reconfig(struct hn_softc *sc) 916 { 917 int error; 918 919 HN_LOCK_ASSERT(sc); 920 921 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) 922 return (ENXIO); 923 924 /* 925 * Disable RSS first. 926 * 927 * NOTE: 928 * Direct reconfiguration by setting the UNCHG flags does 929 * _not_ work properly. 930 */ 931 if (bootverbose) 932 if_printf(sc->hn_ifp, "disable RSS\n"); 933 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_DISABLE); 934 if (error) { 935 if_printf(sc->hn_ifp, "RSS disable failed\n"); 936 return (error); 937 } 938 939 /* 940 * Reenable the RSS w/ the updated RSS key or indirect 941 * table. 942 */ 943 if (bootverbose) 944 if_printf(sc->hn_ifp, "reconfig RSS\n"); 945 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE); 946 if (error) { 947 if_printf(sc->hn_ifp, "RSS reconfig failed\n"); 948 return (error); 949 } 950 return (0); 951 } 952 #endif /* !RSS */ 953 954 static void 955 hn_rss_ind_fixup(struct hn_softc *sc) 956 { 957 struct ndis_rssprm_toeplitz *rss = &sc->hn_rss; 958 int i, nchan; 959 960 nchan = sc->hn_rx_ring_inuse; 961 KASSERT(nchan > 1, ("invalid # of channels %d", nchan)); 962 963 /* 964 * Check indirect table to make sure that all channels in it 965 * can be used. 966 */ 967 for (i = 0; i < NDIS_HASH_INDCNT; ++i) { 968 if (rss->rss_ind[i] >= nchan) { 969 if_printf(sc->hn_ifp, 970 "RSS indirect table %d fixup: %u -> %d\n", 971 i, rss->rss_ind[i], nchan - 1); 972 rss->rss_ind[i] = nchan - 1; 973 } 974 } 975 } 976 977 static int 978 hn_ifmedia_upd(struct ifnet *ifp __unused) 979 { 980 981 return EOPNOTSUPP; 982 } 983 984 static void 985 hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) 986 { 987 struct hn_softc *sc = ifp->if_softc; 988 989 ifmr->ifm_status = IFM_AVALID; 990 ifmr->ifm_active = IFM_ETHER; 991 992 if ((sc->hn_link_flags & HN_LINK_FLAG_LINKUP) == 0) { 993 ifmr->ifm_active |= IFM_NONE; 994 return; 995 } 996 ifmr->ifm_status |= IFM_ACTIVE; 997 ifmr->ifm_active |= IFM_10G_T | IFM_FDX; 998 } 999 1000 static void 1001 hn_rxvf_set_task(void *xarg, int pending __unused) 1002 { 1003 struct hn_rxvf_setarg *arg = xarg; 1004 1005 arg->rxr->hn_rxvf_ifp = arg->vf_ifp; 1006 } 1007 1008 static void 1009 hn_rxvf_set(struct hn_softc *sc, struct ifnet *vf_ifp) 1010 { 1011 struct hn_rx_ring *rxr; 1012 struct hn_rxvf_setarg arg; 1013 struct task task; 1014 int i; 1015 1016 HN_LOCK_ASSERT(sc); 1017 1018 TASK_INIT(&task, 0, hn_rxvf_set_task, &arg); 1019 1020 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 1021 rxr = &sc->hn_rx_ring[i]; 1022 1023 if (i < sc->hn_rx_ring_inuse) { 1024 arg.rxr = rxr; 1025 arg.vf_ifp = vf_ifp; 1026 vmbus_chan_run_task(rxr->hn_chan, &task); 1027 } else { 1028 rxr->hn_rxvf_ifp = vf_ifp; 1029 } 1030 } 1031 } 1032 1033 static bool 1034 hn_ismyvf(const struct hn_softc *sc, const struct ifnet *ifp) 1035 { 1036 const struct ifnet *hn_ifp; 1037 1038 hn_ifp = sc->hn_ifp; 1039 1040 if (ifp == hn_ifp) 1041 return (false); 1042 1043 if (ifp->if_alloctype != IFT_ETHER) 1044 return (false); 1045 1046 /* Ignore lagg/vlan interfaces */ 1047 if (strcmp(ifp->if_dname, "lagg") == 0 || 1048 strcmp(ifp->if_dname, "vlan") == 0) 1049 return (false); 1050 1051 if (bcmp(IF_LLADDR(ifp), IF_LLADDR(hn_ifp), ETHER_ADDR_LEN) != 0) 1052 return (false); 1053 1054 return (true); 1055 } 1056 1057 static void 1058 hn_rxvf_change(struct hn_softc *sc, struct ifnet *ifp, bool rxvf) 1059 { 1060 struct ifnet *hn_ifp; 1061 1062 HN_LOCK(sc); 1063 1064 if (!(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED)) 1065 goto out; 1066 1067 if (!hn_ismyvf(sc, ifp)) 1068 goto out; 1069 hn_ifp = sc->hn_ifp; 1070 1071 if (rxvf) { 1072 if (sc->hn_flags & HN_FLAG_RXVF) 1073 goto out; 1074 1075 sc->hn_flags |= HN_FLAG_RXVF; 1076 hn_rxfilter_config(sc); 1077 } else { 1078 if (!(sc->hn_flags & HN_FLAG_RXVF)) 1079 goto out; 1080 1081 sc->hn_flags &= ~HN_FLAG_RXVF; 1082 if (hn_ifp->if_drv_flags & IFF_DRV_RUNNING) 1083 hn_rxfilter_config(sc); 1084 else 1085 hn_set_rxfilter(sc, NDIS_PACKET_TYPE_NONE); 1086 } 1087 1088 hn_nvs_set_datapath(sc, 1089 rxvf ? HN_NVS_DATAPATH_VF : HN_NVS_DATAPATH_SYNTHETIC); 1090 1091 hn_rxvf_set(sc, rxvf ? ifp : NULL); 1092 1093 if (rxvf) { 1094 hn_suspend_mgmt(sc); 1095 sc->hn_link_flags &= 1096 ~(HN_LINK_FLAG_LINKUP | HN_LINK_FLAG_NETCHG); 1097 if_link_state_change(hn_ifp, LINK_STATE_DOWN); 1098 } else { 1099 hn_resume_mgmt(sc); 1100 } 1101 1102 devctl_notify("HYPERV_NIC_VF", hn_ifp->if_xname, 1103 rxvf ? "VF_UP" : "VF_DOWN", NULL); 1104 1105 if (bootverbose) { 1106 if_printf(hn_ifp, "datapath is switched %s %s\n", 1107 rxvf ? "to" : "from", ifp->if_xname); 1108 } 1109 out: 1110 HN_UNLOCK(sc); 1111 } 1112 1113 static void 1114 hn_ifnet_event(void *arg, struct ifnet *ifp, int event) 1115 { 1116 1117 if (event != IFNET_EVENT_UP && event != IFNET_EVENT_DOWN) 1118 return; 1119 hn_rxvf_change(arg, ifp, event == IFNET_EVENT_UP); 1120 } 1121 1122 static void 1123 hn_ifaddr_event(void *arg, struct ifnet *ifp) 1124 { 1125 1126 hn_rxvf_change(arg, ifp, ifp->if_flags & IFF_UP); 1127 } 1128 1129 static void 1130 hn_ifnet_attevent(void *xsc, struct ifnet *ifp) 1131 { 1132 struct hn_softc *sc = xsc; 1133 1134 HN_LOCK(sc); 1135 1136 if (!(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED)) 1137 goto done; 1138 1139 if (!hn_ismyvf(sc, ifp)) 1140 goto done; 1141 1142 if (sc->hn_vf_ifp != NULL) { 1143 if_printf(sc->hn_ifp, "%s was attached as VF\n", 1144 sc->hn_vf_ifp->if_xname); 1145 goto done; 1146 } 1147 1148 rm_wlock(&hn_vfmap_lock); 1149 1150 if (ifp->if_index >= hn_vfmap_size) { 1151 struct ifnet **newmap; 1152 int newsize; 1153 1154 newsize = ifp->if_index + HN_VFMAP_SIZE_DEF; 1155 newmap = malloc(sizeof(struct ifnet *) * newsize, M_DEVBUF, 1156 M_WAITOK | M_ZERO); 1157 1158 memcpy(newmap, hn_vfmap, 1159 sizeof(struct ifnet *) * hn_vfmap_size); 1160 free(hn_vfmap, M_DEVBUF); 1161 hn_vfmap = newmap; 1162 hn_vfmap_size = newsize; 1163 } 1164 KASSERT(hn_vfmap[ifp->if_index] == NULL, 1165 ("%s: ifindex %d was mapped to %s", 1166 ifp->if_xname, ifp->if_index, hn_vfmap[ifp->if_index]->if_xname)); 1167 hn_vfmap[ifp->if_index] = sc->hn_ifp; 1168 1169 rm_wunlock(&hn_vfmap_lock); 1170 1171 sc->hn_vf_ifp = ifp; 1172 done: 1173 HN_UNLOCK(sc); 1174 } 1175 1176 static void 1177 hn_ifnet_detevent(void *xsc, struct ifnet *ifp) 1178 { 1179 struct hn_softc *sc = xsc; 1180 1181 HN_LOCK(sc); 1182 1183 if (sc->hn_vf_ifp == NULL) 1184 goto done; 1185 1186 if (!hn_ismyvf(sc, ifp)) 1187 goto done; 1188 1189 sc->hn_vf_ifp = NULL; 1190 1191 rm_wlock(&hn_vfmap_lock); 1192 1193 KASSERT(ifp->if_index < hn_vfmap_size, 1194 ("ifindex %d, vfmapsize %d", ifp->if_index, hn_vfmap_size)); 1195 if (hn_vfmap[ifp->if_index] != NULL) { 1196 KASSERT(hn_vfmap[ifp->if_index] == sc->hn_ifp, 1197 ("%s: ifindex %d was mapped to %s", 1198 ifp->if_xname, ifp->if_index, 1199 hn_vfmap[ifp->if_index]->if_xname)); 1200 hn_vfmap[ifp->if_index] = NULL; 1201 } 1202 1203 rm_wunlock(&hn_vfmap_lock); 1204 done: 1205 HN_UNLOCK(sc); 1206 } 1207 1208 /* {F8615163-DF3E-46c5-913F-F2D2F965ED0E} */ 1209 static const struct hyperv_guid g_net_vsc_device_type = { 1210 .hv_guid = {0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46, 1211 0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E} 1212 }; 1213 1214 static int 1215 hn_probe(device_t dev) 1216 { 1217 1218 if (VMBUS_PROBE_GUID(device_get_parent(dev), dev, 1219 &g_net_vsc_device_type) == 0) { 1220 device_set_desc(dev, "Hyper-V Network Interface"); 1221 return BUS_PROBE_DEFAULT; 1222 } 1223 return ENXIO; 1224 } 1225 1226 static int 1227 hn_attach(device_t dev) 1228 { 1229 struct hn_softc *sc = device_get_softc(dev); 1230 struct sysctl_oid_list *child; 1231 struct sysctl_ctx_list *ctx; 1232 uint8_t eaddr[ETHER_ADDR_LEN]; 1233 struct ifnet *ifp = NULL; 1234 int error, ring_cnt, tx_ring_cnt; 1235 1236 sc->hn_dev = dev; 1237 sc->hn_prichan = vmbus_get_channel(dev); 1238 HN_LOCK_INIT(sc); 1239 1240 /* 1241 * Initialize these tunables once. 1242 */ 1243 sc->hn_agg_size = hn_tx_agg_size; 1244 sc->hn_agg_pkts = hn_tx_agg_pkts; 1245 1246 /* 1247 * Setup taskqueue for transmission. 1248 */ 1249 if (hn_tx_taskq_mode == HN_TX_TASKQ_M_INDEP) { 1250 int i; 1251 1252 sc->hn_tx_taskqs = 1253 malloc(hn_tx_taskq_cnt * sizeof(struct taskqueue *), 1254 M_DEVBUF, M_WAITOK); 1255 for (i = 0; i < hn_tx_taskq_cnt; ++i) { 1256 sc->hn_tx_taskqs[i] = taskqueue_create("hn_tx", 1257 M_WAITOK, taskqueue_thread_enqueue, 1258 &sc->hn_tx_taskqs[i]); 1259 taskqueue_start_threads(&sc->hn_tx_taskqs[i], 1, PI_NET, 1260 "%s tx%d", device_get_nameunit(dev), i); 1261 } 1262 } else if (hn_tx_taskq_mode == HN_TX_TASKQ_M_GLOBAL) { 1263 sc->hn_tx_taskqs = hn_tx_taskque; 1264 } 1265 1266 /* 1267 * Setup taskqueue for mangement tasks, e.g. link status. 1268 */ 1269 sc->hn_mgmt_taskq0 = taskqueue_create("hn_mgmt", M_WAITOK, 1270 taskqueue_thread_enqueue, &sc->hn_mgmt_taskq0); 1271 taskqueue_start_threads(&sc->hn_mgmt_taskq0, 1, PI_NET, "%s mgmt", 1272 device_get_nameunit(dev)); 1273 TASK_INIT(&sc->hn_link_task, 0, hn_link_taskfunc, sc); 1274 TASK_INIT(&sc->hn_netchg_init, 0, hn_netchg_init_taskfunc, sc); 1275 TIMEOUT_TASK_INIT(sc->hn_mgmt_taskq0, &sc->hn_netchg_status, 0, 1276 hn_netchg_status_taskfunc, sc); 1277 1278 /* 1279 * Allocate ifnet and setup its name earlier, so that if_printf 1280 * can be used by functions, which will be called after 1281 * ether_ifattach(). 1282 */ 1283 ifp = sc->hn_ifp = if_alloc(IFT_ETHER); 1284 ifp->if_softc = sc; 1285 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 1286 1287 /* 1288 * Initialize ifmedia earlier so that it can be unconditionally 1289 * destroyed, if error happened later on. 1290 */ 1291 ifmedia_init(&sc->hn_media, 0, hn_ifmedia_upd, hn_ifmedia_sts); 1292 1293 /* 1294 * Figure out the # of RX rings (ring_cnt) and the # of TX rings 1295 * to use (tx_ring_cnt). 1296 * 1297 * NOTE: 1298 * The # of RX rings to use is same as the # of channels to use. 1299 */ 1300 ring_cnt = hn_chan_cnt; 1301 if (ring_cnt <= 0) { 1302 /* Default */ 1303 ring_cnt = mp_ncpus; 1304 if (ring_cnt > HN_RING_CNT_DEF_MAX) 1305 ring_cnt = HN_RING_CNT_DEF_MAX; 1306 } else if (ring_cnt > mp_ncpus) { 1307 ring_cnt = mp_ncpus; 1308 } 1309 #ifdef RSS 1310 if (ring_cnt > rss_getnumbuckets()) 1311 ring_cnt = rss_getnumbuckets(); 1312 #endif 1313 1314 tx_ring_cnt = hn_tx_ring_cnt; 1315 if (tx_ring_cnt <= 0 || tx_ring_cnt > ring_cnt) 1316 tx_ring_cnt = ring_cnt; 1317 #ifdef HN_IFSTART_SUPPORT 1318 if (hn_use_if_start) { 1319 /* ifnet.if_start only needs one TX ring. */ 1320 tx_ring_cnt = 1; 1321 } 1322 #endif 1323 1324 /* 1325 * Set the leader CPU for channels. 1326 */ 1327 sc->hn_cpu = atomic_fetchadd_int(&hn_cpu_index, ring_cnt) % mp_ncpus; 1328 1329 /* 1330 * Create enough TX/RX rings, even if only limited number of 1331 * channels can be allocated. 1332 */ 1333 error = hn_create_tx_data(sc, tx_ring_cnt); 1334 if (error) 1335 goto failed; 1336 error = hn_create_rx_data(sc, ring_cnt); 1337 if (error) 1338 goto failed; 1339 1340 /* 1341 * Create transaction context for NVS and RNDIS transactions. 1342 */ 1343 sc->hn_xact = vmbus_xact_ctx_create(bus_get_dma_tag(dev), 1344 HN_XACT_REQ_SIZE, HN_XACT_RESP_SIZE, 0); 1345 if (sc->hn_xact == NULL) { 1346 error = ENXIO; 1347 goto failed; 1348 } 1349 1350 /* 1351 * Install orphan handler for the revocation of this device's 1352 * primary channel. 1353 * 1354 * NOTE: 1355 * The processing order is critical here: 1356 * Install the orphan handler, _before_ testing whether this 1357 * device's primary channel has been revoked or not. 1358 */ 1359 vmbus_chan_set_orphan(sc->hn_prichan, sc->hn_xact); 1360 if (vmbus_chan_is_revoked(sc->hn_prichan)) { 1361 error = ENXIO; 1362 goto failed; 1363 } 1364 1365 /* 1366 * Attach the synthetic parts, i.e. NVS and RNDIS. 1367 */ 1368 error = hn_synth_attach(sc, ETHERMTU); 1369 if (error) 1370 goto failed; 1371 1372 error = hn_rndis_get_eaddr(sc, eaddr); 1373 if (error) 1374 goto failed; 1375 1376 #if __FreeBSD_version >= 1100099 1377 if (sc->hn_rx_ring_inuse > 1) { 1378 /* 1379 * Reduce TCP segment aggregation limit for multiple 1380 * RX rings to increase ACK timeliness. 1381 */ 1382 hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MULTIRX_DEF); 1383 } 1384 #endif 1385 1386 /* 1387 * Fixup TX stuffs after synthetic parts are attached. 1388 */ 1389 hn_fixup_tx_data(sc); 1390 1391 ctx = device_get_sysctl_ctx(dev); 1392 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); 1393 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "nvs_version", CTLFLAG_RD, 1394 &sc->hn_nvs_ver, 0, "NVS version"); 1395 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "ndis_version", 1396 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 1397 hn_ndis_version_sysctl, "A", "NDIS version"); 1398 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "caps", 1399 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 1400 hn_caps_sysctl, "A", "capabilities"); 1401 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "hwassist", 1402 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 1403 hn_hwassist_sysctl, "A", "hwassist"); 1404 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxfilter", 1405 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 1406 hn_rxfilter_sysctl, "A", "rxfilter"); 1407 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_hash", 1408 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 1409 hn_rss_hash_sysctl, "A", "RSS hash"); 1410 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rss_ind_size", 1411 CTLFLAG_RD, &sc->hn_rss_ind_size, 0, "RSS indirect entry count"); 1412 #ifndef RSS 1413 /* 1414 * Don't allow RSS key/indirect table changes, if RSS is defined. 1415 */ 1416 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_key", 1417 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 1418 hn_rss_key_sysctl, "IU", "RSS key"); 1419 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_ind", 1420 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 1421 hn_rss_ind_sysctl, "IU", "RSS indirect table"); 1422 #endif 1423 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_size", 1424 CTLFLAG_RD, &sc->hn_rndis_agg_size, 0, 1425 "RNDIS offered packet transmission aggregation size limit"); 1426 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_pkts", 1427 CTLFLAG_RD, &sc->hn_rndis_agg_pkts, 0, 1428 "RNDIS offered packet transmission aggregation count limit"); 1429 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_align", 1430 CTLFLAG_RD, &sc->hn_rndis_agg_align, 0, 1431 "RNDIS packet transmission aggregation alignment"); 1432 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_size", 1433 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 1434 hn_txagg_size_sysctl, "I", 1435 "Packet transmission aggregation size, 0 -- disable, -1 -- auto"); 1436 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pkts", 1437 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 1438 hn_txagg_pkts_sysctl, "I", 1439 "Packet transmission aggregation packets, " 1440 "0 -- disable, -1 -- auto"); 1441 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "polling", 1442 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 1443 hn_polling_sysctl, "I", 1444 "Polling frequency: [100,1000000], 0 disable polling"); 1445 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf", 1446 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 1447 hn_vf_sysctl, "A", "Virtual Function's name"); 1448 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxvf", 1449 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 1450 hn_rxvf_sysctl, "A", "activated Virtual Function's name"); 1451 1452 /* 1453 * Setup the ifmedia, which has been initialized earlier. 1454 */ 1455 ifmedia_add(&sc->hn_media, IFM_ETHER | IFM_AUTO, 0, NULL); 1456 ifmedia_set(&sc->hn_media, IFM_ETHER | IFM_AUTO); 1457 /* XXX ifmedia_set really should do this for us */ 1458 sc->hn_media.ifm_media = sc->hn_media.ifm_cur->ifm_media; 1459 1460 /* 1461 * Setup the ifnet for this interface. 1462 */ 1463 1464 ifp->if_baudrate = IF_Gbps(10); 1465 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 1466 ifp->if_ioctl = hn_ioctl; 1467 ifp->if_init = hn_init; 1468 #ifdef HN_IFSTART_SUPPORT 1469 if (hn_use_if_start) { 1470 int qdepth = hn_get_txswq_depth(&sc->hn_tx_ring[0]); 1471 1472 ifp->if_start = hn_start; 1473 IFQ_SET_MAXLEN(&ifp->if_snd, qdepth); 1474 ifp->if_snd.ifq_drv_maxlen = qdepth - 1; 1475 IFQ_SET_READY(&ifp->if_snd); 1476 } else 1477 #endif 1478 { 1479 ifp->if_transmit = hn_transmit; 1480 ifp->if_qflush = hn_xmit_qflush; 1481 } 1482 1483 ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_LRO; 1484 #ifdef foo 1485 /* We can't diff IPv6 packets from IPv4 packets on RX path. */ 1486 ifp->if_capabilities |= IFCAP_RXCSUM_IPV6; 1487 #endif 1488 if (sc->hn_caps & HN_CAP_VLAN) { 1489 /* XXX not sure about VLAN_MTU. */ 1490 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU; 1491 } 1492 1493 ifp->if_hwassist = sc->hn_tx_ring[0].hn_csum_assist; 1494 if (ifp->if_hwassist & HN_CSUM_IP_MASK) 1495 ifp->if_capabilities |= IFCAP_TXCSUM; 1496 if (ifp->if_hwassist & HN_CSUM_IP6_MASK) 1497 ifp->if_capabilities |= IFCAP_TXCSUM_IPV6; 1498 if (sc->hn_caps & HN_CAP_TSO4) { 1499 ifp->if_capabilities |= IFCAP_TSO4; 1500 ifp->if_hwassist |= CSUM_IP_TSO; 1501 } 1502 if (sc->hn_caps & HN_CAP_TSO6) { 1503 ifp->if_capabilities |= IFCAP_TSO6; 1504 ifp->if_hwassist |= CSUM_IP6_TSO; 1505 } 1506 1507 /* Enable all available capabilities by default. */ 1508 ifp->if_capenable = ifp->if_capabilities; 1509 1510 /* 1511 * Disable IPv6 TSO and TXCSUM by default, they still can 1512 * be enabled through SIOCSIFCAP. 1513 */ 1514 ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6 | IFCAP_TSO6); 1515 ifp->if_hwassist &= ~(HN_CSUM_IP6_MASK | CSUM_IP6_TSO); 1516 1517 if (ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) { 1518 hn_set_tso_maxsize(sc, hn_tso_maxlen, ETHERMTU); 1519 ifp->if_hw_tsomaxsegcount = HN_TX_DATA_SEGCNT_MAX; 1520 ifp->if_hw_tsomaxsegsize = PAGE_SIZE; 1521 } 1522 1523 ether_ifattach(ifp, eaddr); 1524 1525 if ((ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) && bootverbose) { 1526 if_printf(ifp, "TSO segcnt %u segsz %u\n", 1527 ifp->if_hw_tsomaxsegcount, ifp->if_hw_tsomaxsegsize); 1528 } 1529 1530 /* Inform the upper layer about the long frame support. */ 1531 ifp->if_hdrlen = sizeof(struct ether_vlan_header); 1532 1533 /* 1534 * Kick off link status check. 1535 */ 1536 sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0; 1537 hn_update_link_status(sc); 1538 1539 sc->hn_ifnet_evthand = EVENTHANDLER_REGISTER(ifnet_event, 1540 hn_ifnet_event, sc, EVENTHANDLER_PRI_ANY); 1541 sc->hn_ifaddr_evthand = EVENTHANDLER_REGISTER(ifaddr_event, 1542 hn_ifaddr_event, sc, EVENTHANDLER_PRI_ANY); 1543 1544 /* 1545 * NOTE: 1546 * Subscribe ether_ifattach event, instead of ifnet_arrival event, 1547 * since interface's LLADDR is needed; interface LLADDR is not 1548 * available when ifnet_arrival event is triggered. 1549 */ 1550 sc->hn_ifnet_atthand = EVENTHANDLER_REGISTER(ether_ifattach_event, 1551 hn_ifnet_attevent, sc, EVENTHANDLER_PRI_ANY); 1552 sc->hn_ifnet_dethand = EVENTHANDLER_REGISTER(ifnet_departure_event, 1553 hn_ifnet_detevent, sc, EVENTHANDLER_PRI_ANY); 1554 1555 return (0); 1556 failed: 1557 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) 1558 hn_synth_detach(sc); 1559 hn_detach(dev); 1560 return (error); 1561 } 1562 1563 static int 1564 hn_detach(device_t dev) 1565 { 1566 struct hn_softc *sc = device_get_softc(dev); 1567 struct ifnet *ifp = sc->hn_ifp, *vf_ifp; 1568 1569 if (sc->hn_ifaddr_evthand != NULL) 1570 EVENTHANDLER_DEREGISTER(ifaddr_event, sc->hn_ifaddr_evthand); 1571 if (sc->hn_ifnet_evthand != NULL) 1572 EVENTHANDLER_DEREGISTER(ifnet_event, sc->hn_ifnet_evthand); 1573 if (sc->hn_ifnet_atthand != NULL) { 1574 EVENTHANDLER_DEREGISTER(ether_ifattach_event, 1575 sc->hn_ifnet_atthand); 1576 } 1577 if (sc->hn_ifnet_dethand != NULL) { 1578 EVENTHANDLER_DEREGISTER(ifnet_departure_event, 1579 sc->hn_ifnet_dethand); 1580 } 1581 1582 vf_ifp = sc->hn_vf_ifp; 1583 __compiler_membar(); 1584 if (vf_ifp != NULL) 1585 hn_ifnet_detevent(sc, vf_ifp); 1586 1587 if (sc->hn_xact != NULL && vmbus_chan_is_revoked(sc->hn_prichan)) { 1588 /* 1589 * In case that the vmbus missed the orphan handler 1590 * installation. 1591 */ 1592 vmbus_xact_ctx_orphan(sc->hn_xact); 1593 } 1594 1595 if (device_is_attached(dev)) { 1596 HN_LOCK(sc); 1597 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) { 1598 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 1599 hn_stop(sc, true); 1600 /* 1601 * NOTE: 1602 * hn_stop() only suspends data, so managment 1603 * stuffs have to be suspended manually here. 1604 */ 1605 hn_suspend_mgmt(sc); 1606 hn_synth_detach(sc); 1607 } 1608 HN_UNLOCK(sc); 1609 ether_ifdetach(ifp); 1610 } 1611 1612 ifmedia_removeall(&sc->hn_media); 1613 hn_destroy_rx_data(sc); 1614 hn_destroy_tx_data(sc); 1615 1616 if (sc->hn_tx_taskqs != NULL && sc->hn_tx_taskqs != hn_tx_taskque) { 1617 int i; 1618 1619 for (i = 0; i < hn_tx_taskq_cnt; ++i) 1620 taskqueue_free(sc->hn_tx_taskqs[i]); 1621 free(sc->hn_tx_taskqs, M_DEVBUF); 1622 } 1623 taskqueue_free(sc->hn_mgmt_taskq0); 1624 1625 if (sc->hn_xact != NULL) { 1626 /* 1627 * Uninstall the orphan handler _before_ the xact is 1628 * destructed. 1629 */ 1630 vmbus_chan_unset_orphan(sc->hn_prichan); 1631 vmbus_xact_ctx_destroy(sc->hn_xact); 1632 } 1633 1634 if_free(ifp); 1635 1636 HN_LOCK_DESTROY(sc); 1637 return (0); 1638 } 1639 1640 static int 1641 hn_shutdown(device_t dev) 1642 { 1643 1644 return (0); 1645 } 1646 1647 static void 1648 hn_link_status(struct hn_softc *sc) 1649 { 1650 uint32_t link_status; 1651 int error; 1652 1653 error = hn_rndis_get_linkstatus(sc, &link_status); 1654 if (error) { 1655 /* XXX what to do? */ 1656 return; 1657 } 1658 1659 if (link_status == NDIS_MEDIA_STATE_CONNECTED) 1660 sc->hn_link_flags |= HN_LINK_FLAG_LINKUP; 1661 else 1662 sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP; 1663 if_link_state_change(sc->hn_ifp, 1664 (sc->hn_link_flags & HN_LINK_FLAG_LINKUP) ? 1665 LINK_STATE_UP : LINK_STATE_DOWN); 1666 } 1667 1668 static void 1669 hn_link_taskfunc(void *xsc, int pending __unused) 1670 { 1671 struct hn_softc *sc = xsc; 1672 1673 if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG) 1674 return; 1675 hn_link_status(sc); 1676 } 1677 1678 static void 1679 hn_netchg_init_taskfunc(void *xsc, int pending __unused) 1680 { 1681 struct hn_softc *sc = xsc; 1682 1683 /* Prevent any link status checks from running. */ 1684 sc->hn_link_flags |= HN_LINK_FLAG_NETCHG; 1685 1686 /* 1687 * Fake up a [link down --> link up] state change; 5 seconds 1688 * delay is used, which closely simulates miibus reaction 1689 * upon link down event. 1690 */ 1691 sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP; 1692 if_link_state_change(sc->hn_ifp, LINK_STATE_DOWN); 1693 taskqueue_enqueue_timeout(sc->hn_mgmt_taskq0, 1694 &sc->hn_netchg_status, 5 * hz); 1695 } 1696 1697 static void 1698 hn_netchg_status_taskfunc(void *xsc, int pending __unused) 1699 { 1700 struct hn_softc *sc = xsc; 1701 1702 /* Re-allow link status checks. */ 1703 sc->hn_link_flags &= ~HN_LINK_FLAG_NETCHG; 1704 hn_link_status(sc); 1705 } 1706 1707 static void 1708 hn_update_link_status(struct hn_softc *sc) 1709 { 1710 1711 if (sc->hn_mgmt_taskq != NULL) 1712 taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_link_task); 1713 } 1714 1715 static void 1716 hn_change_network(struct hn_softc *sc) 1717 { 1718 1719 if (sc->hn_mgmt_taskq != NULL) 1720 taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_netchg_init); 1721 } 1722 1723 static __inline int 1724 hn_txdesc_dmamap_load(struct hn_tx_ring *txr, struct hn_txdesc *txd, 1725 struct mbuf **m_head, bus_dma_segment_t *segs, int *nsegs) 1726 { 1727 struct mbuf *m = *m_head; 1728 int error; 1729 1730 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID, ("txd uses chim")); 1731 1732 error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap, 1733 m, segs, nsegs, BUS_DMA_NOWAIT); 1734 if (error == EFBIG) { 1735 struct mbuf *m_new; 1736 1737 m_new = m_collapse(m, M_NOWAIT, HN_TX_DATA_SEGCNT_MAX); 1738 if (m_new == NULL) 1739 return ENOBUFS; 1740 else 1741 *m_head = m = m_new; 1742 txr->hn_tx_collapsed++; 1743 1744 error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, 1745 txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT); 1746 } 1747 if (!error) { 1748 bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap, 1749 BUS_DMASYNC_PREWRITE); 1750 txd->flags |= HN_TXD_FLAG_DMAMAP; 1751 } 1752 return error; 1753 } 1754 1755 static __inline int 1756 hn_txdesc_put(struct hn_tx_ring *txr, struct hn_txdesc *txd) 1757 { 1758 1759 KASSERT((txd->flags & HN_TXD_FLAG_ONLIST) == 0, 1760 ("put an onlist txd %#x", txd->flags)); 1761 KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0, 1762 ("put an onagg txd %#x", txd->flags)); 1763 1764 KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs)); 1765 if (atomic_fetchadd_int(&txd->refs, -1) != 1) 1766 return 0; 1767 1768 if (!STAILQ_EMPTY(&txd->agg_list)) { 1769 struct hn_txdesc *tmp_txd; 1770 1771 while ((tmp_txd = STAILQ_FIRST(&txd->agg_list)) != NULL) { 1772 int freed; 1773 1774 KASSERT(STAILQ_EMPTY(&tmp_txd->agg_list), 1775 ("resursive aggregation on aggregated txdesc")); 1776 KASSERT((tmp_txd->flags & HN_TXD_FLAG_ONAGG), 1777 ("not aggregated txdesc")); 1778 KASSERT((tmp_txd->flags & HN_TXD_FLAG_DMAMAP) == 0, 1779 ("aggregated txdesc uses dmamap")); 1780 KASSERT(tmp_txd->chim_index == HN_NVS_CHIM_IDX_INVALID, 1781 ("aggregated txdesc consumes " 1782 "chimney sending buffer")); 1783 KASSERT(tmp_txd->chim_size == 0, 1784 ("aggregated txdesc has non-zero " 1785 "chimney sending size")); 1786 1787 STAILQ_REMOVE_HEAD(&txd->agg_list, agg_link); 1788 tmp_txd->flags &= ~HN_TXD_FLAG_ONAGG; 1789 freed = hn_txdesc_put(txr, tmp_txd); 1790 KASSERT(freed, ("failed to free aggregated txdesc")); 1791 } 1792 } 1793 1794 if (txd->chim_index != HN_NVS_CHIM_IDX_INVALID) { 1795 KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, 1796 ("chim txd uses dmamap")); 1797 hn_chim_free(txr->hn_sc, txd->chim_index); 1798 txd->chim_index = HN_NVS_CHIM_IDX_INVALID; 1799 txd->chim_size = 0; 1800 } else if (txd->flags & HN_TXD_FLAG_DMAMAP) { 1801 bus_dmamap_sync(txr->hn_tx_data_dtag, 1802 txd->data_dmap, BUS_DMASYNC_POSTWRITE); 1803 bus_dmamap_unload(txr->hn_tx_data_dtag, 1804 txd->data_dmap); 1805 txd->flags &= ~HN_TXD_FLAG_DMAMAP; 1806 } 1807 1808 if (txd->m != NULL) { 1809 m_freem(txd->m); 1810 txd->m = NULL; 1811 } 1812 1813 txd->flags |= HN_TXD_FLAG_ONLIST; 1814 #ifndef HN_USE_TXDESC_BUFRING 1815 mtx_lock_spin(&txr->hn_txlist_spin); 1816 KASSERT(txr->hn_txdesc_avail >= 0 && 1817 txr->hn_txdesc_avail < txr->hn_txdesc_cnt, 1818 ("txdesc_put: invalid txd avail %d", txr->hn_txdesc_avail)); 1819 txr->hn_txdesc_avail++; 1820 SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link); 1821 mtx_unlock_spin(&txr->hn_txlist_spin); 1822 #else /* HN_USE_TXDESC_BUFRING */ 1823 #ifdef HN_DEBUG 1824 atomic_add_int(&txr->hn_txdesc_avail, 1); 1825 #endif 1826 buf_ring_enqueue(txr->hn_txdesc_br, txd); 1827 #endif /* !HN_USE_TXDESC_BUFRING */ 1828 1829 return 1; 1830 } 1831 1832 static __inline struct hn_txdesc * 1833 hn_txdesc_get(struct hn_tx_ring *txr) 1834 { 1835 struct hn_txdesc *txd; 1836 1837 #ifndef HN_USE_TXDESC_BUFRING 1838 mtx_lock_spin(&txr->hn_txlist_spin); 1839 txd = SLIST_FIRST(&txr->hn_txlist); 1840 if (txd != NULL) { 1841 KASSERT(txr->hn_txdesc_avail > 0, 1842 ("txdesc_get: invalid txd avail %d", txr->hn_txdesc_avail)); 1843 txr->hn_txdesc_avail--; 1844 SLIST_REMOVE_HEAD(&txr->hn_txlist, link); 1845 } 1846 mtx_unlock_spin(&txr->hn_txlist_spin); 1847 #else 1848 txd = buf_ring_dequeue_sc(txr->hn_txdesc_br); 1849 #endif 1850 1851 if (txd != NULL) { 1852 #ifdef HN_USE_TXDESC_BUFRING 1853 #ifdef HN_DEBUG 1854 atomic_subtract_int(&txr->hn_txdesc_avail, 1); 1855 #endif 1856 #endif /* HN_USE_TXDESC_BUFRING */ 1857 KASSERT(txd->m == NULL && txd->refs == 0 && 1858 STAILQ_EMPTY(&txd->agg_list) && 1859 txd->chim_index == HN_NVS_CHIM_IDX_INVALID && 1860 txd->chim_size == 0 && 1861 (txd->flags & HN_TXD_FLAG_ONLIST) && 1862 (txd->flags & HN_TXD_FLAG_ONAGG) == 0 && 1863 (txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("invalid txd")); 1864 txd->flags &= ~HN_TXD_FLAG_ONLIST; 1865 txd->refs = 1; 1866 } 1867 return txd; 1868 } 1869 1870 static __inline void 1871 hn_txdesc_hold(struct hn_txdesc *txd) 1872 { 1873 1874 /* 0->1 transition will never work */ 1875 KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs)); 1876 atomic_add_int(&txd->refs, 1); 1877 } 1878 1879 static __inline void 1880 hn_txdesc_agg(struct hn_txdesc *agg_txd, struct hn_txdesc *txd) 1881 { 1882 1883 KASSERT((agg_txd->flags & HN_TXD_FLAG_ONAGG) == 0, 1884 ("recursive aggregation on aggregating txdesc")); 1885 1886 KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0, 1887 ("already aggregated")); 1888 KASSERT(STAILQ_EMPTY(&txd->agg_list), 1889 ("recursive aggregation on to-be-aggregated txdesc")); 1890 1891 txd->flags |= HN_TXD_FLAG_ONAGG; 1892 STAILQ_INSERT_TAIL(&agg_txd->agg_list, txd, agg_link); 1893 } 1894 1895 static bool 1896 hn_tx_ring_pending(struct hn_tx_ring *txr) 1897 { 1898 bool pending = false; 1899 1900 #ifndef HN_USE_TXDESC_BUFRING 1901 mtx_lock_spin(&txr->hn_txlist_spin); 1902 if (txr->hn_txdesc_avail != txr->hn_txdesc_cnt) 1903 pending = true; 1904 mtx_unlock_spin(&txr->hn_txlist_spin); 1905 #else 1906 if (!buf_ring_full(txr->hn_txdesc_br)) 1907 pending = true; 1908 #endif 1909 return (pending); 1910 } 1911 1912 static __inline void 1913 hn_txeof(struct hn_tx_ring *txr) 1914 { 1915 txr->hn_has_txeof = 0; 1916 txr->hn_txeof(txr); 1917 } 1918 1919 static void 1920 hn_txpkt_done(struct hn_nvs_sendctx *sndc, struct hn_softc *sc, 1921 struct vmbus_channel *chan, const void *data __unused, int dlen __unused) 1922 { 1923 struct hn_txdesc *txd = sndc->hn_cbarg; 1924 struct hn_tx_ring *txr; 1925 1926 txr = txd->txr; 1927 KASSERT(txr->hn_chan == chan, 1928 ("channel mismatch, on chan%u, should be chan%u", 1929 vmbus_chan_id(chan), vmbus_chan_id(txr->hn_chan))); 1930 1931 txr->hn_has_txeof = 1; 1932 hn_txdesc_put(txr, txd); 1933 1934 ++txr->hn_txdone_cnt; 1935 if (txr->hn_txdone_cnt >= HN_EARLY_TXEOF_THRESH) { 1936 txr->hn_txdone_cnt = 0; 1937 if (txr->hn_oactive) 1938 hn_txeof(txr); 1939 } 1940 } 1941 1942 static void 1943 hn_chan_rollup(struct hn_rx_ring *rxr, struct hn_tx_ring *txr) 1944 { 1945 #if defined(INET) || defined(INET6) 1946 tcp_lro_flush_all(&rxr->hn_lro); 1947 #endif 1948 1949 /* 1950 * NOTE: 1951 * 'txr' could be NULL, if multiple channels and 1952 * ifnet.if_start method are enabled. 1953 */ 1954 if (txr == NULL || !txr->hn_has_txeof) 1955 return; 1956 1957 txr->hn_txdone_cnt = 0; 1958 hn_txeof(txr); 1959 } 1960 1961 static __inline uint32_t 1962 hn_rndis_pktmsg_offset(uint32_t ofs) 1963 { 1964 1965 KASSERT(ofs >= sizeof(struct rndis_packet_msg), 1966 ("invalid RNDIS packet msg offset %u", ofs)); 1967 return (ofs - __offsetof(struct rndis_packet_msg, rm_dataoffset)); 1968 } 1969 1970 static __inline void * 1971 hn_rndis_pktinfo_append(struct rndis_packet_msg *pkt, size_t pktsize, 1972 size_t pi_dlen, uint32_t pi_type) 1973 { 1974 const size_t pi_size = HN_RNDIS_PKTINFO_SIZE(pi_dlen); 1975 struct rndis_pktinfo *pi; 1976 1977 KASSERT((pi_size & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK) == 0, 1978 ("unaligned pktinfo size %zu, pktinfo dlen %zu", pi_size, pi_dlen)); 1979 1980 /* 1981 * Per-packet-info does not move; it only grows. 1982 * 1983 * NOTE: 1984 * rm_pktinfooffset in this phase counts from the beginning 1985 * of rndis_packet_msg. 1986 */ 1987 KASSERT(pkt->rm_pktinfooffset + pkt->rm_pktinfolen + pi_size <= pktsize, 1988 ("%u pktinfo overflows RNDIS packet msg", pi_type)); 1989 pi = (struct rndis_pktinfo *)((uint8_t *)pkt + pkt->rm_pktinfooffset + 1990 pkt->rm_pktinfolen); 1991 pkt->rm_pktinfolen += pi_size; 1992 1993 pi->rm_size = pi_size; 1994 pi->rm_type = pi_type; 1995 pi->rm_pktinfooffset = RNDIS_PKTINFO_OFFSET; 1996 1997 return (pi->rm_data); 1998 } 1999 2000 static __inline int 2001 hn_flush_txagg(struct ifnet *ifp, struct hn_tx_ring *txr) 2002 { 2003 struct hn_txdesc *txd; 2004 struct mbuf *m; 2005 int error, pkts; 2006 2007 txd = txr->hn_agg_txd; 2008 KASSERT(txd != NULL, ("no aggregate txdesc")); 2009 2010 /* 2011 * Since hn_txpkt() will reset this temporary stat, save 2012 * it now, so that oerrors can be updated properly, if 2013 * hn_txpkt() ever fails. 2014 */ 2015 pkts = txr->hn_stat_pkts; 2016 2017 /* 2018 * Since txd's mbuf will _not_ be freed upon hn_txpkt() 2019 * failure, save it for later freeing, if hn_txpkt() ever 2020 * fails. 2021 */ 2022 m = txd->m; 2023 error = hn_txpkt(ifp, txr, txd); 2024 if (__predict_false(error)) { 2025 /* txd is freed, but m is not. */ 2026 m_freem(m); 2027 2028 txr->hn_flush_failed++; 2029 if_inc_counter(ifp, IFCOUNTER_OERRORS, pkts); 2030 } 2031 2032 /* Reset all aggregation states. */ 2033 txr->hn_agg_txd = NULL; 2034 txr->hn_agg_szleft = 0; 2035 txr->hn_agg_pktleft = 0; 2036 txr->hn_agg_prevpkt = NULL; 2037 2038 return (error); 2039 } 2040 2041 static void * 2042 hn_try_txagg(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd, 2043 int pktsize) 2044 { 2045 void *chim; 2046 2047 if (txr->hn_agg_txd != NULL) { 2048 if (txr->hn_agg_pktleft >= 1 && txr->hn_agg_szleft > pktsize) { 2049 struct hn_txdesc *agg_txd = txr->hn_agg_txd; 2050 struct rndis_packet_msg *pkt = txr->hn_agg_prevpkt; 2051 int olen; 2052 2053 /* 2054 * Update the previous RNDIS packet's total length, 2055 * it can be increased due to the mandatory alignment 2056 * padding for this RNDIS packet. And update the 2057 * aggregating txdesc's chimney sending buffer size 2058 * accordingly. 2059 * 2060 * XXX 2061 * Zero-out the padding, as required by the RNDIS spec. 2062 */ 2063 olen = pkt->rm_len; 2064 pkt->rm_len = roundup2(olen, txr->hn_agg_align); 2065 agg_txd->chim_size += pkt->rm_len - olen; 2066 2067 /* Link this txdesc to the parent. */ 2068 hn_txdesc_agg(agg_txd, txd); 2069 2070 chim = (uint8_t *)pkt + pkt->rm_len; 2071 /* Save the current packet for later fixup. */ 2072 txr->hn_agg_prevpkt = chim; 2073 2074 txr->hn_agg_pktleft--; 2075 txr->hn_agg_szleft -= pktsize; 2076 if (txr->hn_agg_szleft <= 2077 HN_PKTSIZE_MIN(txr->hn_agg_align)) { 2078 /* 2079 * Probably can't aggregate more packets, 2080 * flush this aggregating txdesc proactively. 2081 */ 2082 txr->hn_agg_pktleft = 0; 2083 } 2084 /* Done! */ 2085 return (chim); 2086 } 2087 hn_flush_txagg(ifp, txr); 2088 } 2089 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc")); 2090 2091 txr->hn_tx_chimney_tried++; 2092 txd->chim_index = hn_chim_alloc(txr->hn_sc); 2093 if (txd->chim_index == HN_NVS_CHIM_IDX_INVALID) 2094 return (NULL); 2095 txr->hn_tx_chimney++; 2096 2097 chim = txr->hn_sc->hn_chim + 2098 (txd->chim_index * txr->hn_sc->hn_chim_szmax); 2099 2100 if (txr->hn_agg_pktmax > 1 && 2101 txr->hn_agg_szmax > pktsize + HN_PKTSIZE_MIN(txr->hn_agg_align)) { 2102 txr->hn_agg_txd = txd; 2103 txr->hn_agg_pktleft = txr->hn_agg_pktmax - 1; 2104 txr->hn_agg_szleft = txr->hn_agg_szmax - pktsize; 2105 txr->hn_agg_prevpkt = chim; 2106 } 2107 return (chim); 2108 } 2109 2110 /* 2111 * NOTE: 2112 * If this function fails, then both txd and m_head0 will be freed. 2113 */ 2114 static int 2115 hn_encap(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd, 2116 struct mbuf **m_head0) 2117 { 2118 bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX]; 2119 int error, nsegs, i; 2120 struct mbuf *m_head = *m_head0; 2121 struct rndis_packet_msg *pkt; 2122 uint32_t *pi_data; 2123 void *chim = NULL; 2124 int pkt_hlen, pkt_size; 2125 2126 pkt = txd->rndis_pkt; 2127 pkt_size = HN_PKTSIZE(m_head, txr->hn_agg_align); 2128 if (pkt_size < txr->hn_chim_size) { 2129 chim = hn_try_txagg(ifp, txr, txd, pkt_size); 2130 if (chim != NULL) 2131 pkt = chim; 2132 } else { 2133 if (txr->hn_agg_txd != NULL) 2134 hn_flush_txagg(ifp, txr); 2135 } 2136 2137 pkt->rm_type = REMOTE_NDIS_PACKET_MSG; 2138 pkt->rm_len = m_head->m_pkthdr.len; 2139 pkt->rm_dataoffset = 0; 2140 pkt->rm_datalen = m_head->m_pkthdr.len; 2141 pkt->rm_oobdataoffset = 0; 2142 pkt->rm_oobdatalen = 0; 2143 pkt->rm_oobdataelements = 0; 2144 pkt->rm_pktinfooffset = sizeof(*pkt); 2145 pkt->rm_pktinfolen = 0; 2146 pkt->rm_vchandle = 0; 2147 pkt->rm_reserved = 0; 2148 2149 if (txr->hn_tx_flags & HN_TX_FLAG_HASHVAL) { 2150 /* 2151 * Set the hash value for this packet, so that the host could 2152 * dispatch the TX done event for this packet back to this TX 2153 * ring's channel. 2154 */ 2155 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 2156 HN_NDIS_HASH_VALUE_SIZE, HN_NDIS_PKTINFO_TYPE_HASHVAL); 2157 *pi_data = txr->hn_tx_idx; 2158 } 2159 2160 if (m_head->m_flags & M_VLANTAG) { 2161 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 2162 NDIS_VLAN_INFO_SIZE, NDIS_PKTINFO_TYPE_VLAN); 2163 *pi_data = NDIS_VLAN_INFO_MAKE( 2164 EVL_VLANOFTAG(m_head->m_pkthdr.ether_vtag), 2165 EVL_PRIOFTAG(m_head->m_pkthdr.ether_vtag), 2166 EVL_CFIOFTAG(m_head->m_pkthdr.ether_vtag)); 2167 } 2168 2169 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 2170 #if defined(INET6) || defined(INET) 2171 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 2172 NDIS_LSO2_INFO_SIZE, NDIS_PKTINFO_TYPE_LSO); 2173 #ifdef INET 2174 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) { 2175 *pi_data = NDIS_LSO2_INFO_MAKEIPV4(0, 2176 m_head->m_pkthdr.tso_segsz); 2177 } 2178 #endif 2179 #if defined(INET6) && defined(INET) 2180 else 2181 #endif 2182 #ifdef INET6 2183 { 2184 *pi_data = NDIS_LSO2_INFO_MAKEIPV6(0, 2185 m_head->m_pkthdr.tso_segsz); 2186 } 2187 #endif 2188 #endif /* INET6 || INET */ 2189 } else if (m_head->m_pkthdr.csum_flags & txr->hn_csum_assist) { 2190 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 2191 NDIS_TXCSUM_INFO_SIZE, NDIS_PKTINFO_TYPE_CSUM); 2192 if (m_head->m_pkthdr.csum_flags & 2193 (CSUM_IP6_TCP | CSUM_IP6_UDP)) { 2194 *pi_data = NDIS_TXCSUM_INFO_IPV6; 2195 } else { 2196 *pi_data = NDIS_TXCSUM_INFO_IPV4; 2197 if (m_head->m_pkthdr.csum_flags & CSUM_IP) 2198 *pi_data |= NDIS_TXCSUM_INFO_IPCS; 2199 } 2200 2201 if (m_head->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP)) 2202 *pi_data |= NDIS_TXCSUM_INFO_TCPCS; 2203 else if (m_head->m_pkthdr.csum_flags & 2204 (CSUM_IP_UDP | CSUM_IP6_UDP)) 2205 *pi_data |= NDIS_TXCSUM_INFO_UDPCS; 2206 } 2207 2208 pkt_hlen = pkt->rm_pktinfooffset + pkt->rm_pktinfolen; 2209 /* Fixup RNDIS packet message total length */ 2210 pkt->rm_len += pkt_hlen; 2211 /* Convert RNDIS packet message offsets */ 2212 pkt->rm_dataoffset = hn_rndis_pktmsg_offset(pkt_hlen); 2213 pkt->rm_pktinfooffset = hn_rndis_pktmsg_offset(pkt->rm_pktinfooffset); 2214 2215 /* 2216 * Fast path: Chimney sending. 2217 */ 2218 if (chim != NULL) { 2219 struct hn_txdesc *tgt_txd = txd; 2220 2221 if (txr->hn_agg_txd != NULL) { 2222 tgt_txd = txr->hn_agg_txd; 2223 #ifdef INVARIANTS 2224 *m_head0 = NULL; 2225 #endif 2226 } 2227 2228 KASSERT(pkt == chim, 2229 ("RNDIS pkt not in chimney sending buffer")); 2230 KASSERT(tgt_txd->chim_index != HN_NVS_CHIM_IDX_INVALID, 2231 ("chimney sending buffer is not used")); 2232 tgt_txd->chim_size += pkt->rm_len; 2233 2234 m_copydata(m_head, 0, m_head->m_pkthdr.len, 2235 ((uint8_t *)chim) + pkt_hlen); 2236 2237 txr->hn_gpa_cnt = 0; 2238 txr->hn_sendpkt = hn_txpkt_chim; 2239 goto done; 2240 } 2241 2242 KASSERT(txr->hn_agg_txd == NULL, ("aggregating sglist txdesc")); 2243 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID, 2244 ("chimney buffer is used")); 2245 KASSERT(pkt == txd->rndis_pkt, ("RNDIS pkt not in txdesc")); 2246 2247 error = hn_txdesc_dmamap_load(txr, txd, &m_head, segs, &nsegs); 2248 if (__predict_false(error)) { 2249 int freed; 2250 2251 /* 2252 * This mbuf is not linked w/ the txd yet, so free it now. 2253 */ 2254 m_freem(m_head); 2255 *m_head0 = NULL; 2256 2257 freed = hn_txdesc_put(txr, txd); 2258 KASSERT(freed != 0, 2259 ("fail to free txd upon txdma error")); 2260 2261 txr->hn_txdma_failed++; 2262 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2263 return error; 2264 } 2265 *m_head0 = m_head; 2266 2267 /* +1 RNDIS packet message */ 2268 txr->hn_gpa_cnt = nsegs + 1; 2269 2270 /* send packet with page buffer */ 2271 txr->hn_gpa[0].gpa_page = atop(txd->rndis_pkt_paddr); 2272 txr->hn_gpa[0].gpa_ofs = txd->rndis_pkt_paddr & PAGE_MASK; 2273 txr->hn_gpa[0].gpa_len = pkt_hlen; 2274 2275 /* 2276 * Fill the page buffers with mbuf info after the page 2277 * buffer for RNDIS packet message. 2278 */ 2279 for (i = 0; i < nsegs; ++i) { 2280 struct vmbus_gpa *gpa = &txr->hn_gpa[i + 1]; 2281 2282 gpa->gpa_page = atop(segs[i].ds_addr); 2283 gpa->gpa_ofs = segs[i].ds_addr & PAGE_MASK; 2284 gpa->gpa_len = segs[i].ds_len; 2285 } 2286 2287 txd->chim_index = HN_NVS_CHIM_IDX_INVALID; 2288 txd->chim_size = 0; 2289 txr->hn_sendpkt = hn_txpkt_sglist; 2290 done: 2291 txd->m = m_head; 2292 2293 /* Set the completion routine */ 2294 hn_nvs_sendctx_init(&txd->send_ctx, hn_txpkt_done, txd); 2295 2296 /* Update temporary stats for later use. */ 2297 txr->hn_stat_pkts++; 2298 txr->hn_stat_size += m_head->m_pkthdr.len; 2299 if (m_head->m_flags & M_MCAST) 2300 txr->hn_stat_mcasts++; 2301 2302 return 0; 2303 } 2304 2305 /* 2306 * NOTE: 2307 * If this function fails, then txd will be freed, but the mbuf 2308 * associated w/ the txd will _not_ be freed. 2309 */ 2310 static int 2311 hn_txpkt(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd) 2312 { 2313 int error, send_failed = 0, has_bpf; 2314 2315 again: 2316 has_bpf = bpf_peers_present(ifp->if_bpf); 2317 if (has_bpf) { 2318 /* 2319 * Make sure that this txd and any aggregated txds are not 2320 * freed before ETHER_BPF_MTAP. 2321 */ 2322 hn_txdesc_hold(txd); 2323 } 2324 error = txr->hn_sendpkt(txr, txd); 2325 if (!error) { 2326 if (has_bpf) { 2327 const struct hn_txdesc *tmp_txd; 2328 2329 ETHER_BPF_MTAP(ifp, txd->m); 2330 STAILQ_FOREACH(tmp_txd, &txd->agg_list, agg_link) 2331 ETHER_BPF_MTAP(ifp, tmp_txd->m); 2332 } 2333 2334 if_inc_counter(ifp, IFCOUNTER_OPACKETS, txr->hn_stat_pkts); 2335 #ifdef HN_IFSTART_SUPPORT 2336 if (!hn_use_if_start) 2337 #endif 2338 { 2339 if_inc_counter(ifp, IFCOUNTER_OBYTES, 2340 txr->hn_stat_size); 2341 if (txr->hn_stat_mcasts != 0) { 2342 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 2343 txr->hn_stat_mcasts); 2344 } 2345 } 2346 txr->hn_pkts += txr->hn_stat_pkts; 2347 txr->hn_sends++; 2348 } 2349 if (has_bpf) 2350 hn_txdesc_put(txr, txd); 2351 2352 if (__predict_false(error)) { 2353 int freed; 2354 2355 /* 2356 * This should "really rarely" happen. 2357 * 2358 * XXX Too many RX to be acked or too many sideband 2359 * commands to run? Ask netvsc_channel_rollup() 2360 * to kick start later. 2361 */ 2362 txr->hn_has_txeof = 1; 2363 if (!send_failed) { 2364 txr->hn_send_failed++; 2365 send_failed = 1; 2366 /* 2367 * Try sending again after set hn_has_txeof; 2368 * in case that we missed the last 2369 * netvsc_channel_rollup(). 2370 */ 2371 goto again; 2372 } 2373 if_printf(ifp, "send failed\n"); 2374 2375 /* 2376 * Caller will perform further processing on the 2377 * associated mbuf, so don't free it in hn_txdesc_put(); 2378 * only unload it from the DMA map in hn_txdesc_put(), 2379 * if it was loaded. 2380 */ 2381 txd->m = NULL; 2382 freed = hn_txdesc_put(txr, txd); 2383 KASSERT(freed != 0, 2384 ("fail to free txd upon send error")); 2385 2386 txr->hn_send_failed++; 2387 } 2388 2389 /* Reset temporary stats, after this sending is done. */ 2390 txr->hn_stat_size = 0; 2391 txr->hn_stat_pkts = 0; 2392 txr->hn_stat_mcasts = 0; 2393 2394 return (error); 2395 } 2396 2397 /* 2398 * Append the specified data to the indicated mbuf chain, 2399 * Extend the mbuf chain if the new data does not fit in 2400 * existing space. 2401 * 2402 * This is a minor rewrite of m_append() from sys/kern/uipc_mbuf.c. 2403 * There should be an equivalent in the kernel mbuf code, 2404 * but there does not appear to be one yet. 2405 * 2406 * Differs from m_append() in that additional mbufs are 2407 * allocated with cluster size MJUMPAGESIZE, and filled 2408 * accordingly. 2409 * 2410 * Return 1 if able to complete the job; otherwise 0. 2411 */ 2412 static int 2413 hv_m_append(struct mbuf *m0, int len, c_caddr_t cp) 2414 { 2415 struct mbuf *m, *n; 2416 int remainder, space; 2417 2418 for (m = m0; m->m_next != NULL; m = m->m_next) 2419 ; 2420 remainder = len; 2421 space = M_TRAILINGSPACE(m); 2422 if (space > 0) { 2423 /* 2424 * Copy into available space. 2425 */ 2426 if (space > remainder) 2427 space = remainder; 2428 bcopy(cp, mtod(m, caddr_t) + m->m_len, space); 2429 m->m_len += space; 2430 cp += space; 2431 remainder -= space; 2432 } 2433 while (remainder > 0) { 2434 /* 2435 * Allocate a new mbuf; could check space 2436 * and allocate a cluster instead. 2437 */ 2438 n = m_getjcl(M_NOWAIT, m->m_type, 0, MJUMPAGESIZE); 2439 if (n == NULL) 2440 break; 2441 n->m_len = min(MJUMPAGESIZE, remainder); 2442 bcopy(cp, mtod(n, caddr_t), n->m_len); 2443 cp += n->m_len; 2444 remainder -= n->m_len; 2445 m->m_next = n; 2446 m = n; 2447 } 2448 if (m0->m_flags & M_PKTHDR) 2449 m0->m_pkthdr.len += len - remainder; 2450 2451 return (remainder == 0); 2452 } 2453 2454 #if defined(INET) || defined(INET6) 2455 static __inline int 2456 hn_lro_rx(struct lro_ctrl *lc, struct mbuf *m) 2457 { 2458 #if __FreeBSD_version >= 1100095 2459 if (hn_lro_mbufq_depth) { 2460 tcp_lro_queue_mbuf(lc, m); 2461 return 0; 2462 } 2463 #endif 2464 return tcp_lro_rx(lc, m, 0); 2465 } 2466 #endif 2467 2468 static int 2469 hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen, 2470 const struct hn_rxinfo *info) 2471 { 2472 struct ifnet *ifp; 2473 struct mbuf *m_new; 2474 int size, do_lro = 0, do_csum = 1; 2475 int hash_type; 2476 2477 /* If the VF is active, inject the packet through the VF */ 2478 ifp = rxr->hn_rxvf_ifp ? rxr->hn_rxvf_ifp : rxr->hn_ifp; 2479 2480 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 2481 /* 2482 * NOTE: 2483 * See the NOTE of hn_rndis_init_fixat(). This 2484 * function can be reached, immediately after the 2485 * RNDIS is initialized but before the ifnet is 2486 * setup on the hn_attach() path; drop the unexpected 2487 * packets. 2488 */ 2489 return (0); 2490 } 2491 2492 if (dlen <= MHLEN) { 2493 m_new = m_gethdr(M_NOWAIT, MT_DATA); 2494 if (m_new == NULL) { 2495 if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); 2496 return (0); 2497 } 2498 memcpy(mtod(m_new, void *), data, dlen); 2499 m_new->m_pkthdr.len = m_new->m_len = dlen; 2500 rxr->hn_small_pkts++; 2501 } else { 2502 /* 2503 * Get an mbuf with a cluster. For packets 2K or less, 2504 * get a standard 2K cluster. For anything larger, get a 2505 * 4K cluster. Any buffers larger than 4K can cause problems 2506 * if looped around to the Hyper-V TX channel, so avoid them. 2507 */ 2508 size = MCLBYTES; 2509 if (dlen > MCLBYTES) { 2510 /* 4096 */ 2511 size = MJUMPAGESIZE; 2512 } 2513 2514 m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size); 2515 if (m_new == NULL) { 2516 if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); 2517 return (0); 2518 } 2519 2520 hv_m_append(m_new, dlen, data); 2521 } 2522 m_new->m_pkthdr.rcvif = ifp; 2523 2524 if (__predict_false((ifp->if_capenable & IFCAP_RXCSUM) == 0)) 2525 do_csum = 0; 2526 2527 /* receive side checksum offload */ 2528 if (info->csum_info != HN_NDIS_RXCSUM_INFO_INVALID) { 2529 /* IP csum offload */ 2530 if ((info->csum_info & NDIS_RXCSUM_INFO_IPCS_OK) && do_csum) { 2531 m_new->m_pkthdr.csum_flags |= 2532 (CSUM_IP_CHECKED | CSUM_IP_VALID); 2533 rxr->hn_csum_ip++; 2534 } 2535 2536 /* TCP/UDP csum offload */ 2537 if ((info->csum_info & (NDIS_RXCSUM_INFO_UDPCS_OK | 2538 NDIS_RXCSUM_INFO_TCPCS_OK)) && do_csum) { 2539 m_new->m_pkthdr.csum_flags |= 2540 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 2541 m_new->m_pkthdr.csum_data = 0xffff; 2542 if (info->csum_info & NDIS_RXCSUM_INFO_TCPCS_OK) 2543 rxr->hn_csum_tcp++; 2544 else 2545 rxr->hn_csum_udp++; 2546 } 2547 2548 /* 2549 * XXX 2550 * As of this write (Oct 28th, 2016), host side will turn 2551 * on only TCPCS_OK and IPCS_OK even for UDP datagrams, so 2552 * the do_lro setting here is actually _not_ accurate. We 2553 * depend on the RSS hash type check to reset do_lro. 2554 */ 2555 if ((info->csum_info & 2556 (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) == 2557 (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) 2558 do_lro = 1; 2559 } else { 2560 const struct ether_header *eh; 2561 uint16_t etype; 2562 int hoff; 2563 2564 hoff = sizeof(*eh); 2565 if (m_new->m_len < hoff) 2566 goto skip; 2567 eh = mtod(m_new, struct ether_header *); 2568 etype = ntohs(eh->ether_type); 2569 if (etype == ETHERTYPE_VLAN) { 2570 const struct ether_vlan_header *evl; 2571 2572 hoff = sizeof(*evl); 2573 if (m_new->m_len < hoff) 2574 goto skip; 2575 evl = mtod(m_new, struct ether_vlan_header *); 2576 etype = ntohs(evl->evl_proto); 2577 } 2578 2579 if (etype == ETHERTYPE_IP) { 2580 int pr; 2581 2582 pr = hn_check_iplen(m_new, hoff); 2583 if (pr == IPPROTO_TCP) { 2584 if (do_csum && 2585 (rxr->hn_trust_hcsum & 2586 HN_TRUST_HCSUM_TCP)) { 2587 rxr->hn_csum_trusted++; 2588 m_new->m_pkthdr.csum_flags |= 2589 (CSUM_IP_CHECKED | CSUM_IP_VALID | 2590 CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 2591 m_new->m_pkthdr.csum_data = 0xffff; 2592 } 2593 do_lro = 1; 2594 } else if (pr == IPPROTO_UDP) { 2595 if (do_csum && 2596 (rxr->hn_trust_hcsum & 2597 HN_TRUST_HCSUM_UDP)) { 2598 rxr->hn_csum_trusted++; 2599 m_new->m_pkthdr.csum_flags |= 2600 (CSUM_IP_CHECKED | CSUM_IP_VALID | 2601 CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 2602 m_new->m_pkthdr.csum_data = 0xffff; 2603 } 2604 } else if (pr != IPPROTO_DONE && do_csum && 2605 (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_IP)) { 2606 rxr->hn_csum_trusted++; 2607 m_new->m_pkthdr.csum_flags |= 2608 (CSUM_IP_CHECKED | CSUM_IP_VALID); 2609 } 2610 } 2611 } 2612 skip: 2613 if (info->vlan_info != HN_NDIS_VLAN_INFO_INVALID) { 2614 m_new->m_pkthdr.ether_vtag = EVL_MAKETAG( 2615 NDIS_VLAN_INFO_ID(info->vlan_info), 2616 NDIS_VLAN_INFO_PRI(info->vlan_info), 2617 NDIS_VLAN_INFO_CFI(info->vlan_info)); 2618 m_new->m_flags |= M_VLANTAG; 2619 } 2620 2621 if (info->hash_info != HN_NDIS_HASH_INFO_INVALID) { 2622 rxr->hn_rss_pkts++; 2623 m_new->m_pkthdr.flowid = info->hash_value; 2624 hash_type = M_HASHTYPE_OPAQUE_HASH; 2625 if ((info->hash_info & NDIS_HASH_FUNCTION_MASK) == 2626 NDIS_HASH_FUNCTION_TOEPLITZ) { 2627 uint32_t type = (info->hash_info & NDIS_HASH_TYPE_MASK); 2628 2629 /* 2630 * NOTE: 2631 * do_lro is resetted, if the hash types are not TCP 2632 * related. See the comment in the above csum_flags 2633 * setup section. 2634 */ 2635 switch (type) { 2636 case NDIS_HASH_IPV4: 2637 hash_type = M_HASHTYPE_RSS_IPV4; 2638 do_lro = 0; 2639 break; 2640 2641 case NDIS_HASH_TCP_IPV4: 2642 hash_type = M_HASHTYPE_RSS_TCP_IPV4; 2643 break; 2644 2645 case NDIS_HASH_IPV6: 2646 hash_type = M_HASHTYPE_RSS_IPV6; 2647 do_lro = 0; 2648 break; 2649 2650 case NDIS_HASH_IPV6_EX: 2651 hash_type = M_HASHTYPE_RSS_IPV6_EX; 2652 do_lro = 0; 2653 break; 2654 2655 case NDIS_HASH_TCP_IPV6: 2656 hash_type = M_HASHTYPE_RSS_TCP_IPV6; 2657 break; 2658 2659 case NDIS_HASH_TCP_IPV6_EX: 2660 hash_type = M_HASHTYPE_RSS_TCP_IPV6_EX; 2661 break; 2662 } 2663 } 2664 } else { 2665 m_new->m_pkthdr.flowid = rxr->hn_rx_idx; 2666 hash_type = M_HASHTYPE_OPAQUE; 2667 } 2668 M_HASHTYPE_SET(m_new, hash_type); 2669 2670 /* 2671 * Note: Moved RX completion back to hv_nv_on_receive() so all 2672 * messages (not just data messages) will trigger a response. 2673 */ 2674 2675 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); 2676 rxr->hn_pkts++; 2677 2678 if ((ifp->if_capenable & IFCAP_LRO) && do_lro) { 2679 #if defined(INET) || defined(INET6) 2680 struct lro_ctrl *lro = &rxr->hn_lro; 2681 2682 if (lro->lro_cnt) { 2683 rxr->hn_lro_tried++; 2684 if (hn_lro_rx(lro, m_new) == 0) { 2685 /* DONE! */ 2686 return 0; 2687 } 2688 } 2689 #endif 2690 } 2691 2692 /* We're not holding the lock here, so don't release it */ 2693 (*ifp->if_input)(ifp, m_new); 2694 2695 return (0); 2696 } 2697 2698 static int 2699 hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 2700 { 2701 struct hn_softc *sc = ifp->if_softc; 2702 struct ifreq *ifr = (struct ifreq *)data; 2703 int mask, error = 0; 2704 2705 switch (cmd) { 2706 case SIOCSIFMTU: 2707 if (ifr->ifr_mtu > HN_MTU_MAX) { 2708 error = EINVAL; 2709 break; 2710 } 2711 2712 HN_LOCK(sc); 2713 2714 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) { 2715 HN_UNLOCK(sc); 2716 break; 2717 } 2718 2719 if ((sc->hn_caps & HN_CAP_MTU) == 0) { 2720 /* Can't change MTU */ 2721 HN_UNLOCK(sc); 2722 error = EOPNOTSUPP; 2723 break; 2724 } 2725 2726 if (ifp->if_mtu == ifr->ifr_mtu) { 2727 HN_UNLOCK(sc); 2728 break; 2729 } 2730 2731 /* 2732 * Suspend this interface before the synthetic parts 2733 * are ripped. 2734 */ 2735 hn_suspend(sc); 2736 2737 /* 2738 * Detach the synthetics parts, i.e. NVS and RNDIS. 2739 */ 2740 hn_synth_detach(sc); 2741 2742 /* 2743 * Reattach the synthetic parts, i.e. NVS and RNDIS, 2744 * with the new MTU setting. 2745 */ 2746 error = hn_synth_attach(sc, ifr->ifr_mtu); 2747 if (error) { 2748 HN_UNLOCK(sc); 2749 break; 2750 } 2751 2752 /* 2753 * Commit the requested MTU, after the synthetic parts 2754 * have been successfully attached. 2755 */ 2756 ifp->if_mtu = ifr->ifr_mtu; 2757 2758 /* 2759 * Make sure that various parameters based on MTU are 2760 * still valid, after the MTU change. 2761 */ 2762 if (sc->hn_tx_ring[0].hn_chim_size > sc->hn_chim_szmax) 2763 hn_set_chim_size(sc, sc->hn_chim_szmax); 2764 hn_set_tso_maxsize(sc, hn_tso_maxlen, ifp->if_mtu); 2765 #if __FreeBSD_version >= 1100099 2766 if (sc->hn_rx_ring[0].hn_lro.lro_length_lim < 2767 HN_LRO_LENLIM_MIN(ifp)) 2768 hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MIN(ifp)); 2769 #endif 2770 2771 /* 2772 * All done! Resume the interface now. 2773 */ 2774 hn_resume(sc); 2775 2776 HN_UNLOCK(sc); 2777 break; 2778 2779 case SIOCSIFFLAGS: 2780 HN_LOCK(sc); 2781 2782 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) { 2783 HN_UNLOCK(sc); 2784 break; 2785 } 2786 2787 if (ifp->if_flags & IFF_UP) { 2788 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 2789 /* 2790 * Caller meight hold mutex, e.g. 2791 * bpf; use busy-wait for the RNDIS 2792 * reply. 2793 */ 2794 HN_NO_SLEEPING(sc); 2795 hn_rxfilter_config(sc); 2796 HN_SLEEPING_OK(sc); 2797 } else { 2798 hn_init_locked(sc); 2799 } 2800 } else { 2801 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 2802 hn_stop(sc, false); 2803 } 2804 sc->hn_if_flags = ifp->if_flags; 2805 2806 HN_UNLOCK(sc); 2807 break; 2808 2809 case SIOCSIFCAP: 2810 HN_LOCK(sc); 2811 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 2812 2813 if (mask & IFCAP_TXCSUM) { 2814 ifp->if_capenable ^= IFCAP_TXCSUM; 2815 if (ifp->if_capenable & IFCAP_TXCSUM) 2816 ifp->if_hwassist |= HN_CSUM_IP_HWASSIST(sc); 2817 else 2818 ifp->if_hwassist &= ~HN_CSUM_IP_HWASSIST(sc); 2819 } 2820 if (mask & IFCAP_TXCSUM_IPV6) { 2821 ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; 2822 if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) 2823 ifp->if_hwassist |= HN_CSUM_IP6_HWASSIST(sc); 2824 else 2825 ifp->if_hwassist &= ~HN_CSUM_IP6_HWASSIST(sc); 2826 } 2827 2828 /* TODO: flip RNDIS offload parameters for RXCSUM. */ 2829 if (mask & IFCAP_RXCSUM) 2830 ifp->if_capenable ^= IFCAP_RXCSUM; 2831 #ifdef foo 2832 /* We can't diff IPv6 packets from IPv4 packets on RX path. */ 2833 if (mask & IFCAP_RXCSUM_IPV6) 2834 ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; 2835 #endif 2836 2837 if (mask & IFCAP_LRO) 2838 ifp->if_capenable ^= IFCAP_LRO; 2839 2840 if (mask & IFCAP_TSO4) { 2841 ifp->if_capenable ^= IFCAP_TSO4; 2842 if (ifp->if_capenable & IFCAP_TSO4) 2843 ifp->if_hwassist |= CSUM_IP_TSO; 2844 else 2845 ifp->if_hwassist &= ~CSUM_IP_TSO; 2846 } 2847 if (mask & IFCAP_TSO6) { 2848 ifp->if_capenable ^= IFCAP_TSO6; 2849 if (ifp->if_capenable & IFCAP_TSO6) 2850 ifp->if_hwassist |= CSUM_IP6_TSO; 2851 else 2852 ifp->if_hwassist &= ~CSUM_IP6_TSO; 2853 } 2854 2855 HN_UNLOCK(sc); 2856 break; 2857 2858 case SIOCADDMULTI: 2859 case SIOCDELMULTI: 2860 HN_LOCK(sc); 2861 2862 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) { 2863 HN_UNLOCK(sc); 2864 break; 2865 } 2866 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 2867 /* 2868 * Multicast uses mutex; use busy-wait for 2869 * the RNDIS reply. 2870 */ 2871 HN_NO_SLEEPING(sc); 2872 hn_rxfilter_config(sc); 2873 HN_SLEEPING_OK(sc); 2874 } 2875 2876 HN_UNLOCK(sc); 2877 break; 2878 2879 case SIOCSIFMEDIA: 2880 case SIOCGIFMEDIA: 2881 error = ifmedia_ioctl(ifp, ifr, &sc->hn_media, cmd); 2882 break; 2883 2884 default: 2885 error = ether_ioctl(ifp, cmd, data); 2886 break; 2887 } 2888 return (error); 2889 } 2890 2891 static void 2892 hn_stop(struct hn_softc *sc, bool detaching) 2893 { 2894 struct ifnet *ifp = sc->hn_ifp; 2895 int i; 2896 2897 HN_LOCK_ASSERT(sc); 2898 2899 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, 2900 ("synthetic parts were not attached")); 2901 2902 /* Disable polling. */ 2903 hn_polling(sc, 0); 2904 2905 /* Clear RUNNING bit _before_ hn_suspend_data() */ 2906 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_RUNNING); 2907 hn_suspend_data(sc); 2908 2909 /* Clear OACTIVE bit. */ 2910 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 2911 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) 2912 sc->hn_tx_ring[i].hn_oactive = 0; 2913 2914 /* 2915 * If the VF is active, make sure the filter is not 0, even if 2916 * the synthetic NIC is down. 2917 */ 2918 if (!detaching && (sc->hn_flags & HN_FLAG_RXVF)) 2919 hn_rxfilter_config(sc); 2920 } 2921 2922 static void 2923 hn_init_locked(struct hn_softc *sc) 2924 { 2925 struct ifnet *ifp = sc->hn_ifp; 2926 int i; 2927 2928 HN_LOCK_ASSERT(sc); 2929 2930 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) 2931 return; 2932 2933 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 2934 return; 2935 2936 /* Configure RX filter */ 2937 hn_rxfilter_config(sc); 2938 2939 /* Clear OACTIVE bit. */ 2940 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 2941 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) 2942 sc->hn_tx_ring[i].hn_oactive = 0; 2943 2944 /* Clear TX 'suspended' bit. */ 2945 hn_resume_tx(sc, sc->hn_tx_ring_inuse); 2946 2947 /* Everything is ready; unleash! */ 2948 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_RUNNING); 2949 2950 /* Re-enable polling if requested. */ 2951 if (sc->hn_pollhz > 0) 2952 hn_polling(sc, sc->hn_pollhz); 2953 } 2954 2955 static void 2956 hn_init(void *xsc) 2957 { 2958 struct hn_softc *sc = xsc; 2959 2960 HN_LOCK(sc); 2961 hn_init_locked(sc); 2962 HN_UNLOCK(sc); 2963 } 2964 2965 #if __FreeBSD_version >= 1100099 2966 2967 static int 2968 hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS) 2969 { 2970 struct hn_softc *sc = arg1; 2971 unsigned int lenlim; 2972 int error; 2973 2974 lenlim = sc->hn_rx_ring[0].hn_lro.lro_length_lim; 2975 error = sysctl_handle_int(oidp, &lenlim, 0, req); 2976 if (error || req->newptr == NULL) 2977 return error; 2978 2979 HN_LOCK(sc); 2980 if (lenlim < HN_LRO_LENLIM_MIN(sc->hn_ifp) || 2981 lenlim > TCP_LRO_LENGTH_MAX) { 2982 HN_UNLOCK(sc); 2983 return EINVAL; 2984 } 2985 hn_set_lro_lenlim(sc, lenlim); 2986 HN_UNLOCK(sc); 2987 2988 return 0; 2989 } 2990 2991 static int 2992 hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS) 2993 { 2994 struct hn_softc *sc = arg1; 2995 int ackcnt, error, i; 2996 2997 /* 2998 * lro_ackcnt_lim is append count limit, 2999 * +1 to turn it into aggregation limit. 3000 */ 3001 ackcnt = sc->hn_rx_ring[0].hn_lro.lro_ackcnt_lim + 1; 3002 error = sysctl_handle_int(oidp, &ackcnt, 0, req); 3003 if (error || req->newptr == NULL) 3004 return error; 3005 3006 if (ackcnt < 2 || ackcnt > (TCP_LRO_ACKCNT_MAX + 1)) 3007 return EINVAL; 3008 3009 /* 3010 * Convert aggregation limit back to append 3011 * count limit. 3012 */ 3013 --ackcnt; 3014 HN_LOCK(sc); 3015 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 3016 sc->hn_rx_ring[i].hn_lro.lro_ackcnt_lim = ackcnt; 3017 HN_UNLOCK(sc); 3018 return 0; 3019 } 3020 3021 #endif 3022 3023 static int 3024 hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS) 3025 { 3026 struct hn_softc *sc = arg1; 3027 int hcsum = arg2; 3028 int on, error, i; 3029 3030 on = 0; 3031 if (sc->hn_rx_ring[0].hn_trust_hcsum & hcsum) 3032 on = 1; 3033 3034 error = sysctl_handle_int(oidp, &on, 0, req); 3035 if (error || req->newptr == NULL) 3036 return error; 3037 3038 HN_LOCK(sc); 3039 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 3040 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 3041 3042 if (on) 3043 rxr->hn_trust_hcsum |= hcsum; 3044 else 3045 rxr->hn_trust_hcsum &= ~hcsum; 3046 } 3047 HN_UNLOCK(sc); 3048 return 0; 3049 } 3050 3051 static int 3052 hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS) 3053 { 3054 struct hn_softc *sc = arg1; 3055 int chim_size, error; 3056 3057 chim_size = sc->hn_tx_ring[0].hn_chim_size; 3058 error = sysctl_handle_int(oidp, &chim_size, 0, req); 3059 if (error || req->newptr == NULL) 3060 return error; 3061 3062 if (chim_size > sc->hn_chim_szmax || chim_size <= 0) 3063 return EINVAL; 3064 3065 HN_LOCK(sc); 3066 hn_set_chim_size(sc, chim_size); 3067 HN_UNLOCK(sc); 3068 return 0; 3069 } 3070 3071 #if __FreeBSD_version < 1100095 3072 static int 3073 hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS) 3074 { 3075 struct hn_softc *sc = arg1; 3076 int ofs = arg2, i, error; 3077 struct hn_rx_ring *rxr; 3078 uint64_t stat; 3079 3080 stat = 0; 3081 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 3082 rxr = &sc->hn_rx_ring[i]; 3083 stat += *((int *)((uint8_t *)rxr + ofs)); 3084 } 3085 3086 error = sysctl_handle_64(oidp, &stat, 0, req); 3087 if (error || req->newptr == NULL) 3088 return error; 3089 3090 /* Zero out this stat. */ 3091 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 3092 rxr = &sc->hn_rx_ring[i]; 3093 *((int *)((uint8_t *)rxr + ofs)) = 0; 3094 } 3095 return 0; 3096 } 3097 #else 3098 static int 3099 hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS) 3100 { 3101 struct hn_softc *sc = arg1; 3102 int ofs = arg2, i, error; 3103 struct hn_rx_ring *rxr; 3104 uint64_t stat; 3105 3106 stat = 0; 3107 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 3108 rxr = &sc->hn_rx_ring[i]; 3109 stat += *((uint64_t *)((uint8_t *)rxr + ofs)); 3110 } 3111 3112 error = sysctl_handle_64(oidp, &stat, 0, req); 3113 if (error || req->newptr == NULL) 3114 return error; 3115 3116 /* Zero out this stat. */ 3117 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 3118 rxr = &sc->hn_rx_ring[i]; 3119 *((uint64_t *)((uint8_t *)rxr + ofs)) = 0; 3120 } 3121 return 0; 3122 } 3123 3124 #endif 3125 3126 static int 3127 hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS) 3128 { 3129 struct hn_softc *sc = arg1; 3130 int ofs = arg2, i, error; 3131 struct hn_rx_ring *rxr; 3132 u_long stat; 3133 3134 stat = 0; 3135 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 3136 rxr = &sc->hn_rx_ring[i]; 3137 stat += *((u_long *)((uint8_t *)rxr + ofs)); 3138 } 3139 3140 error = sysctl_handle_long(oidp, &stat, 0, req); 3141 if (error || req->newptr == NULL) 3142 return error; 3143 3144 /* Zero out this stat. */ 3145 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 3146 rxr = &sc->hn_rx_ring[i]; 3147 *((u_long *)((uint8_t *)rxr + ofs)) = 0; 3148 } 3149 return 0; 3150 } 3151 3152 static int 3153 hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS) 3154 { 3155 struct hn_softc *sc = arg1; 3156 int ofs = arg2, i, error; 3157 struct hn_tx_ring *txr; 3158 u_long stat; 3159 3160 stat = 0; 3161 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 3162 txr = &sc->hn_tx_ring[i]; 3163 stat += *((u_long *)((uint8_t *)txr + ofs)); 3164 } 3165 3166 error = sysctl_handle_long(oidp, &stat, 0, req); 3167 if (error || req->newptr == NULL) 3168 return error; 3169 3170 /* Zero out this stat. */ 3171 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 3172 txr = &sc->hn_tx_ring[i]; 3173 *((u_long *)((uint8_t *)txr + ofs)) = 0; 3174 } 3175 return 0; 3176 } 3177 3178 static int 3179 hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS) 3180 { 3181 struct hn_softc *sc = arg1; 3182 int ofs = arg2, i, error, conf; 3183 struct hn_tx_ring *txr; 3184 3185 txr = &sc->hn_tx_ring[0]; 3186 conf = *((int *)((uint8_t *)txr + ofs)); 3187 3188 error = sysctl_handle_int(oidp, &conf, 0, req); 3189 if (error || req->newptr == NULL) 3190 return error; 3191 3192 HN_LOCK(sc); 3193 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 3194 txr = &sc->hn_tx_ring[i]; 3195 *((int *)((uint8_t *)txr + ofs)) = conf; 3196 } 3197 HN_UNLOCK(sc); 3198 3199 return 0; 3200 } 3201 3202 static int 3203 hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS) 3204 { 3205 struct hn_softc *sc = arg1; 3206 int error, size; 3207 3208 size = sc->hn_agg_size; 3209 error = sysctl_handle_int(oidp, &size, 0, req); 3210 if (error || req->newptr == NULL) 3211 return (error); 3212 3213 HN_LOCK(sc); 3214 sc->hn_agg_size = size; 3215 hn_set_txagg(sc); 3216 HN_UNLOCK(sc); 3217 3218 return (0); 3219 } 3220 3221 static int 3222 hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS) 3223 { 3224 struct hn_softc *sc = arg1; 3225 int error, pkts; 3226 3227 pkts = sc->hn_agg_pkts; 3228 error = sysctl_handle_int(oidp, &pkts, 0, req); 3229 if (error || req->newptr == NULL) 3230 return (error); 3231 3232 HN_LOCK(sc); 3233 sc->hn_agg_pkts = pkts; 3234 hn_set_txagg(sc); 3235 HN_UNLOCK(sc); 3236 3237 return (0); 3238 } 3239 3240 static int 3241 hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS) 3242 { 3243 struct hn_softc *sc = arg1; 3244 int pkts; 3245 3246 pkts = sc->hn_tx_ring[0].hn_agg_pktmax; 3247 return (sysctl_handle_int(oidp, &pkts, 0, req)); 3248 } 3249 3250 static int 3251 hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS) 3252 { 3253 struct hn_softc *sc = arg1; 3254 int align; 3255 3256 align = sc->hn_tx_ring[0].hn_agg_align; 3257 return (sysctl_handle_int(oidp, &align, 0, req)); 3258 } 3259 3260 static void 3261 hn_chan_polling(struct vmbus_channel *chan, u_int pollhz) 3262 { 3263 if (pollhz == 0) 3264 vmbus_chan_poll_disable(chan); 3265 else 3266 vmbus_chan_poll_enable(chan, pollhz); 3267 } 3268 3269 static void 3270 hn_polling(struct hn_softc *sc, u_int pollhz) 3271 { 3272 int nsubch = sc->hn_rx_ring_inuse - 1; 3273 3274 HN_LOCK_ASSERT(sc); 3275 3276 if (nsubch > 0) { 3277 struct vmbus_channel **subch; 3278 int i; 3279 3280 subch = vmbus_subchan_get(sc->hn_prichan, nsubch); 3281 for (i = 0; i < nsubch; ++i) 3282 hn_chan_polling(subch[i], pollhz); 3283 vmbus_subchan_rel(subch, nsubch); 3284 } 3285 hn_chan_polling(sc->hn_prichan, pollhz); 3286 } 3287 3288 static int 3289 hn_polling_sysctl(SYSCTL_HANDLER_ARGS) 3290 { 3291 struct hn_softc *sc = arg1; 3292 int pollhz, error; 3293 3294 pollhz = sc->hn_pollhz; 3295 error = sysctl_handle_int(oidp, &pollhz, 0, req); 3296 if (error || req->newptr == NULL) 3297 return (error); 3298 3299 if (pollhz != 0 && 3300 (pollhz < VMBUS_CHAN_POLLHZ_MIN || pollhz > VMBUS_CHAN_POLLHZ_MAX)) 3301 return (EINVAL); 3302 3303 HN_LOCK(sc); 3304 if (sc->hn_pollhz != pollhz) { 3305 sc->hn_pollhz = pollhz; 3306 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) && 3307 (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED)) 3308 hn_polling(sc, sc->hn_pollhz); 3309 } 3310 HN_UNLOCK(sc); 3311 3312 return (0); 3313 } 3314 3315 static int 3316 hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS) 3317 { 3318 struct hn_softc *sc = arg1; 3319 char verstr[16]; 3320 3321 snprintf(verstr, sizeof(verstr), "%u.%u", 3322 HN_NDIS_VERSION_MAJOR(sc->hn_ndis_ver), 3323 HN_NDIS_VERSION_MINOR(sc->hn_ndis_ver)); 3324 return sysctl_handle_string(oidp, verstr, sizeof(verstr), req); 3325 } 3326 3327 static int 3328 hn_caps_sysctl(SYSCTL_HANDLER_ARGS) 3329 { 3330 struct hn_softc *sc = arg1; 3331 char caps_str[128]; 3332 uint32_t caps; 3333 3334 HN_LOCK(sc); 3335 caps = sc->hn_caps; 3336 HN_UNLOCK(sc); 3337 snprintf(caps_str, sizeof(caps_str), "%b", caps, HN_CAP_BITS); 3338 return sysctl_handle_string(oidp, caps_str, sizeof(caps_str), req); 3339 } 3340 3341 static int 3342 hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS) 3343 { 3344 struct hn_softc *sc = arg1; 3345 char assist_str[128]; 3346 uint32_t hwassist; 3347 3348 HN_LOCK(sc); 3349 hwassist = sc->hn_ifp->if_hwassist; 3350 HN_UNLOCK(sc); 3351 snprintf(assist_str, sizeof(assist_str), "%b", hwassist, CSUM_BITS); 3352 return sysctl_handle_string(oidp, assist_str, sizeof(assist_str), req); 3353 } 3354 3355 static int 3356 hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS) 3357 { 3358 struct hn_softc *sc = arg1; 3359 char filter_str[128]; 3360 uint32_t filter; 3361 3362 HN_LOCK(sc); 3363 filter = sc->hn_rx_filter; 3364 HN_UNLOCK(sc); 3365 snprintf(filter_str, sizeof(filter_str), "%b", filter, 3366 NDIS_PACKET_TYPES); 3367 return sysctl_handle_string(oidp, filter_str, sizeof(filter_str), req); 3368 } 3369 3370 #ifndef RSS 3371 3372 static int 3373 hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS) 3374 { 3375 struct hn_softc *sc = arg1; 3376 int error; 3377 3378 HN_LOCK(sc); 3379 3380 error = SYSCTL_OUT(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key)); 3381 if (error || req->newptr == NULL) 3382 goto back; 3383 3384 error = SYSCTL_IN(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key)); 3385 if (error) 3386 goto back; 3387 sc->hn_flags |= HN_FLAG_HAS_RSSKEY; 3388 3389 if (sc->hn_rx_ring_inuse > 1) { 3390 error = hn_rss_reconfig(sc); 3391 } else { 3392 /* Not RSS capable, at least for now; just save the RSS key. */ 3393 error = 0; 3394 } 3395 back: 3396 HN_UNLOCK(sc); 3397 return (error); 3398 } 3399 3400 static int 3401 hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS) 3402 { 3403 struct hn_softc *sc = arg1; 3404 int error; 3405 3406 HN_LOCK(sc); 3407 3408 error = SYSCTL_OUT(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind)); 3409 if (error || req->newptr == NULL) 3410 goto back; 3411 3412 /* 3413 * Don't allow RSS indirect table change, if this interface is not 3414 * RSS capable currently. 3415 */ 3416 if (sc->hn_rx_ring_inuse == 1) { 3417 error = EOPNOTSUPP; 3418 goto back; 3419 } 3420 3421 error = SYSCTL_IN(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind)); 3422 if (error) 3423 goto back; 3424 sc->hn_flags |= HN_FLAG_HAS_RSSIND; 3425 3426 hn_rss_ind_fixup(sc); 3427 error = hn_rss_reconfig(sc); 3428 back: 3429 HN_UNLOCK(sc); 3430 return (error); 3431 } 3432 3433 #endif /* !RSS */ 3434 3435 static int 3436 hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS) 3437 { 3438 struct hn_softc *sc = arg1; 3439 char hash_str[128]; 3440 uint32_t hash; 3441 3442 HN_LOCK(sc); 3443 hash = sc->hn_rss_hash; 3444 HN_UNLOCK(sc); 3445 snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS); 3446 return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req); 3447 } 3448 3449 static int 3450 hn_vf_sysctl(SYSCTL_HANDLER_ARGS) 3451 { 3452 struct hn_softc *sc = arg1; 3453 char vf_name[IFNAMSIZ + 1]; 3454 struct ifnet *vf_ifp; 3455 3456 HN_LOCK(sc); 3457 vf_name[0] = '\0'; 3458 vf_ifp = sc->hn_vf_ifp; 3459 if (vf_ifp != NULL) 3460 snprintf(vf_name, sizeof(vf_name), "%s", vf_ifp->if_xname); 3461 HN_UNLOCK(sc); 3462 return sysctl_handle_string(oidp, vf_name, sizeof(vf_name), req); 3463 } 3464 3465 static int 3466 hn_rxvf_sysctl(SYSCTL_HANDLER_ARGS) 3467 { 3468 struct hn_softc *sc = arg1; 3469 char vf_name[IFNAMSIZ + 1]; 3470 struct ifnet *vf_ifp; 3471 3472 HN_LOCK(sc); 3473 vf_name[0] = '\0'; 3474 vf_ifp = sc->hn_rx_ring[0].hn_rxvf_ifp; 3475 if (vf_ifp != NULL) 3476 snprintf(vf_name, sizeof(vf_name), "%s", vf_ifp->if_xname); 3477 HN_UNLOCK(sc); 3478 return sysctl_handle_string(oidp, vf_name, sizeof(vf_name), req); 3479 } 3480 3481 static int 3482 hn_vflist_sysctl(SYSCTL_HANDLER_ARGS) 3483 { 3484 struct rm_priotracker pt; 3485 struct sbuf *sb; 3486 int error, i; 3487 bool first; 3488 3489 error = sysctl_wire_old_buffer(req, 0); 3490 if (error != 0) 3491 return (error); 3492 3493 sb = sbuf_new_for_sysctl(NULL, NULL, 128, req); 3494 if (sb == NULL) 3495 return (ENOMEM); 3496 3497 rm_rlock(&hn_vfmap_lock, &pt); 3498 3499 first = true; 3500 for (i = 0; i < hn_vfmap_size; ++i) { 3501 struct ifnet *ifp; 3502 3503 if (hn_vfmap[i] == NULL) 3504 continue; 3505 3506 ifp = ifnet_byindex(i); 3507 if (ifp != NULL) { 3508 if (first) 3509 sbuf_printf(sb, "%s", ifp->if_xname); 3510 else 3511 sbuf_printf(sb, " %s", ifp->if_xname); 3512 first = false; 3513 } 3514 } 3515 3516 rm_runlock(&hn_vfmap_lock, &pt); 3517 3518 error = sbuf_finish(sb); 3519 sbuf_delete(sb); 3520 return (error); 3521 } 3522 3523 static int 3524 hn_vfmap_sysctl(SYSCTL_HANDLER_ARGS) 3525 { 3526 struct rm_priotracker pt; 3527 struct sbuf *sb; 3528 int error, i; 3529 bool first; 3530 3531 error = sysctl_wire_old_buffer(req, 0); 3532 if (error != 0) 3533 return (error); 3534 3535 sb = sbuf_new_for_sysctl(NULL, NULL, 128, req); 3536 if (sb == NULL) 3537 return (ENOMEM); 3538 3539 rm_rlock(&hn_vfmap_lock, &pt); 3540 3541 first = true; 3542 for (i = 0; i < hn_vfmap_size; ++i) { 3543 struct ifnet *ifp, *hn_ifp; 3544 3545 hn_ifp = hn_vfmap[i]; 3546 if (hn_ifp == NULL) 3547 continue; 3548 3549 ifp = ifnet_byindex(i); 3550 if (ifp != NULL) { 3551 if (first) { 3552 sbuf_printf(sb, "%s:%s", ifp->if_xname, 3553 hn_ifp->if_xname); 3554 } else { 3555 sbuf_printf(sb, " %s:%s", ifp->if_xname, 3556 hn_ifp->if_xname); 3557 } 3558 first = false; 3559 } 3560 } 3561 3562 rm_runlock(&hn_vfmap_lock, &pt); 3563 3564 error = sbuf_finish(sb); 3565 sbuf_delete(sb); 3566 return (error); 3567 } 3568 3569 static int 3570 hn_check_iplen(const struct mbuf *m, int hoff) 3571 { 3572 const struct ip *ip; 3573 int len, iphlen, iplen; 3574 const struct tcphdr *th; 3575 int thoff; /* TCP data offset */ 3576 3577 len = hoff + sizeof(struct ip); 3578 3579 /* The packet must be at least the size of an IP header. */ 3580 if (m->m_pkthdr.len < len) 3581 return IPPROTO_DONE; 3582 3583 /* The fixed IP header must reside completely in the first mbuf. */ 3584 if (m->m_len < len) 3585 return IPPROTO_DONE; 3586 3587 ip = mtodo(m, hoff); 3588 3589 /* Bound check the packet's stated IP header length. */ 3590 iphlen = ip->ip_hl << 2; 3591 if (iphlen < sizeof(struct ip)) /* minimum header length */ 3592 return IPPROTO_DONE; 3593 3594 /* The full IP header must reside completely in the one mbuf. */ 3595 if (m->m_len < hoff + iphlen) 3596 return IPPROTO_DONE; 3597 3598 iplen = ntohs(ip->ip_len); 3599 3600 /* 3601 * Check that the amount of data in the buffers is as 3602 * at least much as the IP header would have us expect. 3603 */ 3604 if (m->m_pkthdr.len < hoff + iplen) 3605 return IPPROTO_DONE; 3606 3607 /* 3608 * Ignore IP fragments. 3609 */ 3610 if (ntohs(ip->ip_off) & (IP_OFFMASK | IP_MF)) 3611 return IPPROTO_DONE; 3612 3613 /* 3614 * The TCP/IP or UDP/IP header must be entirely contained within 3615 * the first fragment of a packet. 3616 */ 3617 switch (ip->ip_p) { 3618 case IPPROTO_TCP: 3619 if (iplen < iphlen + sizeof(struct tcphdr)) 3620 return IPPROTO_DONE; 3621 if (m->m_len < hoff + iphlen + sizeof(struct tcphdr)) 3622 return IPPROTO_DONE; 3623 th = (const struct tcphdr *)((const uint8_t *)ip + iphlen); 3624 thoff = th->th_off << 2; 3625 if (thoff < sizeof(struct tcphdr) || thoff + iphlen > iplen) 3626 return IPPROTO_DONE; 3627 if (m->m_len < hoff + iphlen + thoff) 3628 return IPPROTO_DONE; 3629 break; 3630 case IPPROTO_UDP: 3631 if (iplen < iphlen + sizeof(struct udphdr)) 3632 return IPPROTO_DONE; 3633 if (m->m_len < hoff + iphlen + sizeof(struct udphdr)) 3634 return IPPROTO_DONE; 3635 break; 3636 default: 3637 if (iplen < iphlen) 3638 return IPPROTO_DONE; 3639 break; 3640 } 3641 return ip->ip_p; 3642 } 3643 3644 static int 3645 hn_create_rx_data(struct hn_softc *sc, int ring_cnt) 3646 { 3647 struct sysctl_oid_list *child; 3648 struct sysctl_ctx_list *ctx; 3649 device_t dev = sc->hn_dev; 3650 #if defined(INET) || defined(INET6) 3651 #if __FreeBSD_version >= 1100095 3652 int lroent_cnt; 3653 #endif 3654 #endif 3655 int i; 3656 3657 /* 3658 * Create RXBUF for reception. 3659 * 3660 * NOTE: 3661 * - It is shared by all channels. 3662 * - A large enough buffer is allocated, certain version of NVSes 3663 * may further limit the usable space. 3664 */ 3665 sc->hn_rxbuf = hyperv_dmamem_alloc(bus_get_dma_tag(dev), 3666 PAGE_SIZE, 0, HN_RXBUF_SIZE, &sc->hn_rxbuf_dma, 3667 BUS_DMA_WAITOK | BUS_DMA_ZERO); 3668 if (sc->hn_rxbuf == NULL) { 3669 device_printf(sc->hn_dev, "allocate rxbuf failed\n"); 3670 return (ENOMEM); 3671 } 3672 3673 sc->hn_rx_ring_cnt = ring_cnt; 3674 sc->hn_rx_ring_inuse = sc->hn_rx_ring_cnt; 3675 3676 sc->hn_rx_ring = malloc(sizeof(struct hn_rx_ring) * sc->hn_rx_ring_cnt, 3677 M_DEVBUF, M_WAITOK | M_ZERO); 3678 3679 #if defined(INET) || defined(INET6) 3680 #if __FreeBSD_version >= 1100095 3681 lroent_cnt = hn_lro_entry_count; 3682 if (lroent_cnt < TCP_LRO_ENTRIES) 3683 lroent_cnt = TCP_LRO_ENTRIES; 3684 if (bootverbose) 3685 device_printf(dev, "LRO: entry count %d\n", lroent_cnt); 3686 #endif 3687 #endif /* INET || INET6 */ 3688 3689 ctx = device_get_sysctl_ctx(dev); 3690 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); 3691 3692 /* Create dev.hn.UNIT.rx sysctl tree */ 3693 sc->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rx", 3694 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 3695 3696 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 3697 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 3698 3699 rxr->hn_br = hyperv_dmamem_alloc(bus_get_dma_tag(dev), 3700 PAGE_SIZE, 0, HN_TXBR_SIZE + HN_RXBR_SIZE, 3701 &rxr->hn_br_dma, BUS_DMA_WAITOK); 3702 if (rxr->hn_br == NULL) { 3703 device_printf(dev, "allocate bufring failed\n"); 3704 return (ENOMEM); 3705 } 3706 3707 if (hn_trust_hosttcp) 3708 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_TCP; 3709 if (hn_trust_hostudp) 3710 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_UDP; 3711 if (hn_trust_hostip) 3712 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_IP; 3713 rxr->hn_ifp = sc->hn_ifp; 3714 if (i < sc->hn_tx_ring_cnt) 3715 rxr->hn_txr = &sc->hn_tx_ring[i]; 3716 rxr->hn_pktbuf_len = HN_PKTBUF_LEN_DEF; 3717 rxr->hn_pktbuf = malloc(rxr->hn_pktbuf_len, M_DEVBUF, M_WAITOK); 3718 rxr->hn_rx_idx = i; 3719 rxr->hn_rxbuf = sc->hn_rxbuf; 3720 3721 /* 3722 * Initialize LRO. 3723 */ 3724 #if defined(INET) || defined(INET6) 3725 #if __FreeBSD_version >= 1100095 3726 tcp_lro_init_args(&rxr->hn_lro, sc->hn_ifp, lroent_cnt, 3727 hn_lro_mbufq_depth); 3728 #else 3729 tcp_lro_init(&rxr->hn_lro); 3730 rxr->hn_lro.ifp = sc->hn_ifp; 3731 #endif 3732 #if __FreeBSD_version >= 1100099 3733 rxr->hn_lro.lro_length_lim = HN_LRO_LENLIM_DEF; 3734 rxr->hn_lro.lro_ackcnt_lim = HN_LRO_ACKCNT_DEF; 3735 #endif 3736 #endif /* INET || INET6 */ 3737 3738 if (sc->hn_rx_sysctl_tree != NULL) { 3739 char name[16]; 3740 3741 /* 3742 * Create per RX ring sysctl tree: 3743 * dev.hn.UNIT.rx.RINGID 3744 */ 3745 snprintf(name, sizeof(name), "%d", i); 3746 rxr->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, 3747 SYSCTL_CHILDREN(sc->hn_rx_sysctl_tree), 3748 OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 3749 3750 if (rxr->hn_rx_sysctl_tree != NULL) { 3751 SYSCTL_ADD_ULONG(ctx, 3752 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), 3753 OID_AUTO, "packets", CTLFLAG_RW, 3754 &rxr->hn_pkts, "# of packets received"); 3755 SYSCTL_ADD_ULONG(ctx, 3756 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), 3757 OID_AUTO, "rss_pkts", CTLFLAG_RW, 3758 &rxr->hn_rss_pkts, 3759 "# of packets w/ RSS info received"); 3760 SYSCTL_ADD_INT(ctx, 3761 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), 3762 OID_AUTO, "pktbuf_len", CTLFLAG_RD, 3763 &rxr->hn_pktbuf_len, 0, 3764 "Temporary channel packet buffer length"); 3765 } 3766 } 3767 } 3768 3769 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_queued", 3770 CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3771 __offsetof(struct hn_rx_ring, hn_lro.lro_queued), 3772 #if __FreeBSD_version < 1100095 3773 hn_rx_stat_int_sysctl, 3774 #else 3775 hn_rx_stat_u64_sysctl, 3776 #endif 3777 "LU", "LRO queued"); 3778 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_flushed", 3779 CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3780 __offsetof(struct hn_rx_ring, hn_lro.lro_flushed), 3781 #if __FreeBSD_version < 1100095 3782 hn_rx_stat_int_sysctl, 3783 #else 3784 hn_rx_stat_u64_sysctl, 3785 #endif 3786 "LU", "LRO flushed"); 3787 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_tried", 3788 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3789 __offsetof(struct hn_rx_ring, hn_lro_tried), 3790 hn_rx_stat_ulong_sysctl, "LU", "# of LRO tries"); 3791 #if __FreeBSD_version >= 1100099 3792 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_length_lim", 3793 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 3794 hn_lro_lenlim_sysctl, "IU", 3795 "Max # of data bytes to be aggregated by LRO"); 3796 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_ackcnt_lim", 3797 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 3798 hn_lro_ackcnt_sysctl, "I", 3799 "Max # of ACKs to be aggregated by LRO"); 3800 #endif 3801 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hosttcp", 3802 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_TCP, 3803 hn_trust_hcsum_sysctl, "I", 3804 "Trust tcp segement verification on host side, " 3805 "when csum info is missing"); 3806 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostudp", 3807 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_UDP, 3808 hn_trust_hcsum_sysctl, "I", 3809 "Trust udp datagram verification on host side, " 3810 "when csum info is missing"); 3811 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostip", 3812 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_IP, 3813 hn_trust_hcsum_sysctl, "I", 3814 "Trust ip packet verification on host side, " 3815 "when csum info is missing"); 3816 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_ip", 3817 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3818 __offsetof(struct hn_rx_ring, hn_csum_ip), 3819 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM IP"); 3820 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_tcp", 3821 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3822 __offsetof(struct hn_rx_ring, hn_csum_tcp), 3823 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM TCP"); 3824 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_udp", 3825 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3826 __offsetof(struct hn_rx_ring, hn_csum_udp), 3827 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM UDP"); 3828 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_trusted", 3829 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3830 __offsetof(struct hn_rx_ring, hn_csum_trusted), 3831 hn_rx_stat_ulong_sysctl, "LU", 3832 "# of packets that we trust host's csum verification"); 3833 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "small_pkts", 3834 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3835 __offsetof(struct hn_rx_ring, hn_small_pkts), 3836 hn_rx_stat_ulong_sysctl, "LU", "# of small packets received"); 3837 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_ack_failed", 3838 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3839 __offsetof(struct hn_rx_ring, hn_ack_failed), 3840 hn_rx_stat_ulong_sysctl, "LU", "# of RXBUF ack failures"); 3841 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_cnt", 3842 CTLFLAG_RD, &sc->hn_rx_ring_cnt, 0, "# created RX rings"); 3843 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_inuse", 3844 CTLFLAG_RD, &sc->hn_rx_ring_inuse, 0, "# used RX rings"); 3845 3846 return (0); 3847 } 3848 3849 static void 3850 hn_destroy_rx_data(struct hn_softc *sc) 3851 { 3852 int i; 3853 3854 if (sc->hn_rxbuf != NULL) { 3855 if ((sc->hn_flags & HN_FLAG_RXBUF_REF) == 0) 3856 hyperv_dmamem_free(&sc->hn_rxbuf_dma, sc->hn_rxbuf); 3857 else 3858 device_printf(sc->hn_dev, "RXBUF is referenced\n"); 3859 sc->hn_rxbuf = NULL; 3860 } 3861 3862 if (sc->hn_rx_ring_cnt == 0) 3863 return; 3864 3865 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 3866 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 3867 3868 if (rxr->hn_br == NULL) 3869 continue; 3870 if ((rxr->hn_rx_flags & HN_RX_FLAG_BR_REF) == 0) { 3871 hyperv_dmamem_free(&rxr->hn_br_dma, rxr->hn_br); 3872 } else { 3873 device_printf(sc->hn_dev, 3874 "%dth channel bufring is referenced", i); 3875 } 3876 rxr->hn_br = NULL; 3877 3878 #if defined(INET) || defined(INET6) 3879 tcp_lro_free(&rxr->hn_lro); 3880 #endif 3881 free(rxr->hn_pktbuf, M_DEVBUF); 3882 } 3883 free(sc->hn_rx_ring, M_DEVBUF); 3884 sc->hn_rx_ring = NULL; 3885 3886 sc->hn_rx_ring_cnt = 0; 3887 sc->hn_rx_ring_inuse = 0; 3888 } 3889 3890 static int 3891 hn_tx_ring_create(struct hn_softc *sc, int id) 3892 { 3893 struct hn_tx_ring *txr = &sc->hn_tx_ring[id]; 3894 device_t dev = sc->hn_dev; 3895 bus_dma_tag_t parent_dtag; 3896 int error, i; 3897 3898 txr->hn_sc = sc; 3899 txr->hn_tx_idx = id; 3900 3901 #ifndef HN_USE_TXDESC_BUFRING 3902 mtx_init(&txr->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN); 3903 #endif 3904 mtx_init(&txr->hn_tx_lock, "hn tx", NULL, MTX_DEF); 3905 3906 txr->hn_txdesc_cnt = HN_TX_DESC_CNT; 3907 txr->hn_txdesc = malloc(sizeof(struct hn_txdesc) * txr->hn_txdesc_cnt, 3908 M_DEVBUF, M_WAITOK | M_ZERO); 3909 #ifndef HN_USE_TXDESC_BUFRING 3910 SLIST_INIT(&txr->hn_txlist); 3911 #else 3912 txr->hn_txdesc_br = buf_ring_alloc(txr->hn_txdesc_cnt, M_DEVBUF, 3913 M_WAITOK, &txr->hn_tx_lock); 3914 #endif 3915 3916 if (hn_tx_taskq_mode == HN_TX_TASKQ_M_EVTTQ) { 3917 txr->hn_tx_taskq = VMBUS_GET_EVENT_TASKQ( 3918 device_get_parent(dev), dev, HN_RING_IDX2CPU(sc, id)); 3919 } else { 3920 txr->hn_tx_taskq = sc->hn_tx_taskqs[id % hn_tx_taskq_cnt]; 3921 } 3922 3923 #ifdef HN_IFSTART_SUPPORT 3924 if (hn_use_if_start) { 3925 txr->hn_txeof = hn_start_txeof; 3926 TASK_INIT(&txr->hn_tx_task, 0, hn_start_taskfunc, txr); 3927 TASK_INIT(&txr->hn_txeof_task, 0, hn_start_txeof_taskfunc, txr); 3928 } else 3929 #endif 3930 { 3931 int br_depth; 3932 3933 txr->hn_txeof = hn_xmit_txeof; 3934 TASK_INIT(&txr->hn_tx_task, 0, hn_xmit_taskfunc, txr); 3935 TASK_INIT(&txr->hn_txeof_task, 0, hn_xmit_txeof_taskfunc, txr); 3936 3937 br_depth = hn_get_txswq_depth(txr); 3938 txr->hn_mbuf_br = buf_ring_alloc(br_depth, M_DEVBUF, 3939 M_WAITOK, &txr->hn_tx_lock); 3940 } 3941 3942 txr->hn_direct_tx_size = hn_direct_tx_size; 3943 3944 /* 3945 * Always schedule transmission instead of trying to do direct 3946 * transmission. This one gives the best performance so far. 3947 */ 3948 txr->hn_sched_tx = 1; 3949 3950 parent_dtag = bus_get_dma_tag(dev); 3951 3952 /* DMA tag for RNDIS packet messages. */ 3953 error = bus_dma_tag_create(parent_dtag, /* parent */ 3954 HN_RNDIS_PKT_ALIGN, /* alignment */ 3955 HN_RNDIS_PKT_BOUNDARY, /* boundary */ 3956 BUS_SPACE_MAXADDR, /* lowaddr */ 3957 BUS_SPACE_MAXADDR, /* highaddr */ 3958 NULL, NULL, /* filter, filterarg */ 3959 HN_RNDIS_PKT_LEN, /* maxsize */ 3960 1, /* nsegments */ 3961 HN_RNDIS_PKT_LEN, /* maxsegsize */ 3962 0, /* flags */ 3963 NULL, /* lockfunc */ 3964 NULL, /* lockfuncarg */ 3965 &txr->hn_tx_rndis_dtag); 3966 if (error) { 3967 device_printf(dev, "failed to create rndis dmatag\n"); 3968 return error; 3969 } 3970 3971 /* DMA tag for data. */ 3972 error = bus_dma_tag_create(parent_dtag, /* parent */ 3973 1, /* alignment */ 3974 HN_TX_DATA_BOUNDARY, /* boundary */ 3975 BUS_SPACE_MAXADDR, /* lowaddr */ 3976 BUS_SPACE_MAXADDR, /* highaddr */ 3977 NULL, NULL, /* filter, filterarg */ 3978 HN_TX_DATA_MAXSIZE, /* maxsize */ 3979 HN_TX_DATA_SEGCNT_MAX, /* nsegments */ 3980 HN_TX_DATA_SEGSIZE, /* maxsegsize */ 3981 0, /* flags */ 3982 NULL, /* lockfunc */ 3983 NULL, /* lockfuncarg */ 3984 &txr->hn_tx_data_dtag); 3985 if (error) { 3986 device_printf(dev, "failed to create data dmatag\n"); 3987 return error; 3988 } 3989 3990 for (i = 0; i < txr->hn_txdesc_cnt; ++i) { 3991 struct hn_txdesc *txd = &txr->hn_txdesc[i]; 3992 3993 txd->txr = txr; 3994 txd->chim_index = HN_NVS_CHIM_IDX_INVALID; 3995 STAILQ_INIT(&txd->agg_list); 3996 3997 /* 3998 * Allocate and load RNDIS packet message. 3999 */ 4000 error = bus_dmamem_alloc(txr->hn_tx_rndis_dtag, 4001 (void **)&txd->rndis_pkt, 4002 BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, 4003 &txd->rndis_pkt_dmap); 4004 if (error) { 4005 device_printf(dev, 4006 "failed to allocate rndis_packet_msg, %d\n", i); 4007 return error; 4008 } 4009 4010 error = bus_dmamap_load(txr->hn_tx_rndis_dtag, 4011 txd->rndis_pkt_dmap, 4012 txd->rndis_pkt, HN_RNDIS_PKT_LEN, 4013 hyperv_dma_map_paddr, &txd->rndis_pkt_paddr, 4014 BUS_DMA_NOWAIT); 4015 if (error) { 4016 device_printf(dev, 4017 "failed to load rndis_packet_msg, %d\n", i); 4018 bus_dmamem_free(txr->hn_tx_rndis_dtag, 4019 txd->rndis_pkt, txd->rndis_pkt_dmap); 4020 return error; 4021 } 4022 4023 /* DMA map for TX data. */ 4024 error = bus_dmamap_create(txr->hn_tx_data_dtag, 0, 4025 &txd->data_dmap); 4026 if (error) { 4027 device_printf(dev, 4028 "failed to allocate tx data dmamap\n"); 4029 bus_dmamap_unload(txr->hn_tx_rndis_dtag, 4030 txd->rndis_pkt_dmap); 4031 bus_dmamem_free(txr->hn_tx_rndis_dtag, 4032 txd->rndis_pkt, txd->rndis_pkt_dmap); 4033 return error; 4034 } 4035 4036 /* All set, put it to list */ 4037 txd->flags |= HN_TXD_FLAG_ONLIST; 4038 #ifndef HN_USE_TXDESC_BUFRING 4039 SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link); 4040 #else 4041 buf_ring_enqueue(txr->hn_txdesc_br, txd); 4042 #endif 4043 } 4044 txr->hn_txdesc_avail = txr->hn_txdesc_cnt; 4045 4046 if (sc->hn_tx_sysctl_tree != NULL) { 4047 struct sysctl_oid_list *child; 4048 struct sysctl_ctx_list *ctx; 4049 char name[16]; 4050 4051 /* 4052 * Create per TX ring sysctl tree: 4053 * dev.hn.UNIT.tx.RINGID 4054 */ 4055 ctx = device_get_sysctl_ctx(dev); 4056 child = SYSCTL_CHILDREN(sc->hn_tx_sysctl_tree); 4057 4058 snprintf(name, sizeof(name), "%d", id); 4059 txr->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, 4060 name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 4061 4062 if (txr->hn_tx_sysctl_tree != NULL) { 4063 child = SYSCTL_CHILDREN(txr->hn_tx_sysctl_tree); 4064 4065 #ifdef HN_DEBUG 4066 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail", 4067 CTLFLAG_RD, &txr->hn_txdesc_avail, 0, 4068 "# of available TX descs"); 4069 #endif 4070 #ifdef HN_IFSTART_SUPPORT 4071 if (!hn_use_if_start) 4072 #endif 4073 { 4074 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "oactive", 4075 CTLFLAG_RD, &txr->hn_oactive, 0, 4076 "over active"); 4077 } 4078 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "packets", 4079 CTLFLAG_RW, &txr->hn_pkts, 4080 "# of packets transmitted"); 4081 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "sends", 4082 CTLFLAG_RW, &txr->hn_sends, "# of sends"); 4083 } 4084 } 4085 4086 return 0; 4087 } 4088 4089 static void 4090 hn_txdesc_dmamap_destroy(struct hn_txdesc *txd) 4091 { 4092 struct hn_tx_ring *txr = txd->txr; 4093 4094 KASSERT(txd->m == NULL, ("still has mbuf installed")); 4095 KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("still dma mapped")); 4096 4097 bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_pkt_dmap); 4098 bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_pkt, 4099 txd->rndis_pkt_dmap); 4100 bus_dmamap_destroy(txr->hn_tx_data_dtag, txd->data_dmap); 4101 } 4102 4103 static void 4104 hn_txdesc_gc(struct hn_tx_ring *txr, struct hn_txdesc *txd) 4105 { 4106 4107 KASSERT(txd->refs == 0 || txd->refs == 1, 4108 ("invalid txd refs %d", txd->refs)); 4109 4110 /* Aggregated txds will be freed by their aggregating txd. */ 4111 if (txd->refs > 0 && (txd->flags & HN_TXD_FLAG_ONAGG) == 0) { 4112 int freed; 4113 4114 freed = hn_txdesc_put(txr, txd); 4115 KASSERT(freed, ("can't free txdesc")); 4116 } 4117 } 4118 4119 static void 4120 hn_tx_ring_destroy(struct hn_tx_ring *txr) 4121 { 4122 int i; 4123 4124 if (txr->hn_txdesc == NULL) 4125 return; 4126 4127 /* 4128 * NOTE: 4129 * Because the freeing of aggregated txds will be deferred 4130 * to the aggregating txd, two passes are used here: 4131 * - The first pass GCes any pending txds. This GC is necessary, 4132 * since if the channels are revoked, hypervisor will not 4133 * deliver send-done for all pending txds. 4134 * - The second pass frees the busdma stuffs, i.e. after all txds 4135 * were freed. 4136 */ 4137 for (i = 0; i < txr->hn_txdesc_cnt; ++i) 4138 hn_txdesc_gc(txr, &txr->hn_txdesc[i]); 4139 for (i = 0; i < txr->hn_txdesc_cnt; ++i) 4140 hn_txdesc_dmamap_destroy(&txr->hn_txdesc[i]); 4141 4142 if (txr->hn_tx_data_dtag != NULL) 4143 bus_dma_tag_destroy(txr->hn_tx_data_dtag); 4144 if (txr->hn_tx_rndis_dtag != NULL) 4145 bus_dma_tag_destroy(txr->hn_tx_rndis_dtag); 4146 4147 #ifdef HN_USE_TXDESC_BUFRING 4148 buf_ring_free(txr->hn_txdesc_br, M_DEVBUF); 4149 #endif 4150 4151 free(txr->hn_txdesc, M_DEVBUF); 4152 txr->hn_txdesc = NULL; 4153 4154 if (txr->hn_mbuf_br != NULL) 4155 buf_ring_free(txr->hn_mbuf_br, M_DEVBUF); 4156 4157 #ifndef HN_USE_TXDESC_BUFRING 4158 mtx_destroy(&txr->hn_txlist_spin); 4159 #endif 4160 mtx_destroy(&txr->hn_tx_lock); 4161 } 4162 4163 static int 4164 hn_create_tx_data(struct hn_softc *sc, int ring_cnt) 4165 { 4166 struct sysctl_oid_list *child; 4167 struct sysctl_ctx_list *ctx; 4168 int i; 4169 4170 /* 4171 * Create TXBUF for chimney sending. 4172 * 4173 * NOTE: It is shared by all channels. 4174 */ 4175 sc->hn_chim = hyperv_dmamem_alloc(bus_get_dma_tag(sc->hn_dev), 4176 PAGE_SIZE, 0, HN_CHIM_SIZE, &sc->hn_chim_dma, 4177 BUS_DMA_WAITOK | BUS_DMA_ZERO); 4178 if (sc->hn_chim == NULL) { 4179 device_printf(sc->hn_dev, "allocate txbuf failed\n"); 4180 return (ENOMEM); 4181 } 4182 4183 sc->hn_tx_ring_cnt = ring_cnt; 4184 sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt; 4185 4186 sc->hn_tx_ring = malloc(sizeof(struct hn_tx_ring) * sc->hn_tx_ring_cnt, 4187 M_DEVBUF, M_WAITOK | M_ZERO); 4188 4189 ctx = device_get_sysctl_ctx(sc->hn_dev); 4190 child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->hn_dev)); 4191 4192 /* Create dev.hn.UNIT.tx sysctl tree */ 4193 sc->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "tx", 4194 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 4195 4196 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 4197 int error; 4198 4199 error = hn_tx_ring_create(sc, i); 4200 if (error) 4201 return error; 4202 } 4203 4204 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "no_txdescs", 4205 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 4206 __offsetof(struct hn_tx_ring, hn_no_txdescs), 4207 hn_tx_stat_ulong_sysctl, "LU", "# of times short of TX descs"); 4208 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "send_failed", 4209 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 4210 __offsetof(struct hn_tx_ring, hn_send_failed), 4211 hn_tx_stat_ulong_sysctl, "LU", "# of hyper-v sending failure"); 4212 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txdma_failed", 4213 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 4214 __offsetof(struct hn_tx_ring, hn_txdma_failed), 4215 hn_tx_stat_ulong_sysctl, "LU", "# of TX DMA failure"); 4216 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_flush_failed", 4217 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 4218 __offsetof(struct hn_tx_ring, hn_flush_failed), 4219 hn_tx_stat_ulong_sysctl, "LU", 4220 "# of packet transmission aggregation flush failure"); 4221 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_collapsed", 4222 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 4223 __offsetof(struct hn_tx_ring, hn_tx_collapsed), 4224 hn_tx_stat_ulong_sysctl, "LU", "# of TX mbuf collapsed"); 4225 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney", 4226 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 4227 __offsetof(struct hn_tx_ring, hn_tx_chimney), 4228 hn_tx_stat_ulong_sysctl, "LU", "# of chimney send"); 4229 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_tried", 4230 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 4231 __offsetof(struct hn_tx_ring, hn_tx_chimney_tried), 4232 hn_tx_stat_ulong_sysctl, "LU", "# of chimney send tries"); 4233 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt", 4234 CTLFLAG_RD, &sc->hn_tx_ring[0].hn_txdesc_cnt, 0, 4235 "# of total TX descs"); 4236 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max", 4237 CTLFLAG_RD, &sc->hn_chim_szmax, 0, 4238 "Chimney send packet size upper boundary"); 4239 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size", 4240 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 4241 hn_chim_size_sysctl, "I", "Chimney send packet size limit"); 4242 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "direct_tx_size", 4243 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 4244 __offsetof(struct hn_tx_ring, hn_direct_tx_size), 4245 hn_tx_conf_int_sysctl, "I", 4246 "Size of the packet for direct transmission"); 4247 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "sched_tx", 4248 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 4249 __offsetof(struct hn_tx_ring, hn_sched_tx), 4250 hn_tx_conf_int_sysctl, "I", 4251 "Always schedule transmission " 4252 "instead of doing direct transmission"); 4253 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_cnt", 4254 CTLFLAG_RD, &sc->hn_tx_ring_cnt, 0, "# created TX rings"); 4255 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_inuse", 4256 CTLFLAG_RD, &sc->hn_tx_ring_inuse, 0, "# used TX rings"); 4257 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "agg_szmax", 4258 CTLFLAG_RD, &sc->hn_tx_ring[0].hn_agg_szmax, 0, 4259 "Applied packet transmission aggregation size"); 4260 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pktmax", 4261 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 4262 hn_txagg_pktmax_sysctl, "I", 4263 "Applied packet transmission aggregation packets"); 4264 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_align", 4265 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 4266 hn_txagg_align_sysctl, "I", 4267 "Applied packet transmission aggregation alignment"); 4268 4269 return 0; 4270 } 4271 4272 static void 4273 hn_set_chim_size(struct hn_softc *sc, int chim_size) 4274 { 4275 int i; 4276 4277 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 4278 sc->hn_tx_ring[i].hn_chim_size = chim_size; 4279 } 4280 4281 static void 4282 hn_set_tso_maxsize(struct hn_softc *sc, int tso_maxlen, int mtu) 4283 { 4284 struct ifnet *ifp = sc->hn_ifp; 4285 int tso_minlen; 4286 4287 if ((ifp->if_capabilities & (IFCAP_TSO4 | IFCAP_TSO6)) == 0) 4288 return; 4289 4290 KASSERT(sc->hn_ndis_tso_sgmin >= 2, 4291 ("invalid NDIS tso sgmin %d", sc->hn_ndis_tso_sgmin)); 4292 tso_minlen = sc->hn_ndis_tso_sgmin * mtu; 4293 4294 KASSERT(sc->hn_ndis_tso_szmax >= tso_minlen && 4295 sc->hn_ndis_tso_szmax <= IP_MAXPACKET, 4296 ("invalid NDIS tso szmax %d", sc->hn_ndis_tso_szmax)); 4297 4298 if (tso_maxlen < tso_minlen) 4299 tso_maxlen = tso_minlen; 4300 else if (tso_maxlen > IP_MAXPACKET) 4301 tso_maxlen = IP_MAXPACKET; 4302 if (tso_maxlen > sc->hn_ndis_tso_szmax) 4303 tso_maxlen = sc->hn_ndis_tso_szmax; 4304 ifp->if_hw_tsomax = tso_maxlen - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); 4305 if (bootverbose) 4306 if_printf(ifp, "TSO size max %u\n", ifp->if_hw_tsomax); 4307 } 4308 4309 static void 4310 hn_fixup_tx_data(struct hn_softc *sc) 4311 { 4312 uint64_t csum_assist; 4313 int i; 4314 4315 hn_set_chim_size(sc, sc->hn_chim_szmax); 4316 if (hn_tx_chimney_size > 0 && 4317 hn_tx_chimney_size < sc->hn_chim_szmax) 4318 hn_set_chim_size(sc, hn_tx_chimney_size); 4319 4320 csum_assist = 0; 4321 if (sc->hn_caps & HN_CAP_IPCS) 4322 csum_assist |= CSUM_IP; 4323 if (sc->hn_caps & HN_CAP_TCP4CS) 4324 csum_assist |= CSUM_IP_TCP; 4325 if (sc->hn_caps & HN_CAP_UDP4CS) 4326 csum_assist |= CSUM_IP_UDP; 4327 if (sc->hn_caps & HN_CAP_TCP6CS) 4328 csum_assist |= CSUM_IP6_TCP; 4329 if (sc->hn_caps & HN_CAP_UDP6CS) 4330 csum_assist |= CSUM_IP6_UDP; 4331 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 4332 sc->hn_tx_ring[i].hn_csum_assist = csum_assist; 4333 4334 if (sc->hn_caps & HN_CAP_HASHVAL) { 4335 /* 4336 * Support HASHVAL pktinfo on TX path. 4337 */ 4338 if (bootverbose) 4339 if_printf(sc->hn_ifp, "support HASHVAL pktinfo\n"); 4340 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 4341 sc->hn_tx_ring[i].hn_tx_flags |= HN_TX_FLAG_HASHVAL; 4342 } 4343 } 4344 4345 static void 4346 hn_destroy_tx_data(struct hn_softc *sc) 4347 { 4348 int i; 4349 4350 if (sc->hn_chim != NULL) { 4351 if ((sc->hn_flags & HN_FLAG_CHIM_REF) == 0) { 4352 hyperv_dmamem_free(&sc->hn_chim_dma, sc->hn_chim); 4353 } else { 4354 device_printf(sc->hn_dev, 4355 "chimney sending buffer is referenced"); 4356 } 4357 sc->hn_chim = NULL; 4358 } 4359 4360 if (sc->hn_tx_ring_cnt == 0) 4361 return; 4362 4363 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 4364 hn_tx_ring_destroy(&sc->hn_tx_ring[i]); 4365 4366 free(sc->hn_tx_ring, M_DEVBUF); 4367 sc->hn_tx_ring = NULL; 4368 4369 sc->hn_tx_ring_cnt = 0; 4370 sc->hn_tx_ring_inuse = 0; 4371 } 4372 4373 #ifdef HN_IFSTART_SUPPORT 4374 4375 static void 4376 hn_start_taskfunc(void *xtxr, int pending __unused) 4377 { 4378 struct hn_tx_ring *txr = xtxr; 4379 4380 mtx_lock(&txr->hn_tx_lock); 4381 hn_start_locked(txr, 0); 4382 mtx_unlock(&txr->hn_tx_lock); 4383 } 4384 4385 static int 4386 hn_start_locked(struct hn_tx_ring *txr, int len) 4387 { 4388 struct hn_softc *sc = txr->hn_sc; 4389 struct ifnet *ifp = sc->hn_ifp; 4390 int sched = 0; 4391 4392 KASSERT(hn_use_if_start, 4393 ("hn_start_locked is called, when if_start is disabled")); 4394 KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring")); 4395 mtx_assert(&txr->hn_tx_lock, MA_OWNED); 4396 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc")); 4397 4398 if (__predict_false(txr->hn_suspended)) 4399 return (0); 4400 4401 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != 4402 IFF_DRV_RUNNING) 4403 return (0); 4404 4405 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { 4406 struct hn_txdesc *txd; 4407 struct mbuf *m_head; 4408 int error; 4409 4410 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); 4411 if (m_head == NULL) 4412 break; 4413 4414 if (len > 0 && m_head->m_pkthdr.len > len) { 4415 /* 4416 * This sending could be time consuming; let callers 4417 * dispatch this packet sending (and sending of any 4418 * following up packets) to tx taskqueue. 4419 */ 4420 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 4421 sched = 1; 4422 break; 4423 } 4424 4425 #if defined(INET6) || defined(INET) 4426 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 4427 m_head = hn_tso_fixup(m_head); 4428 if (__predict_false(m_head == NULL)) { 4429 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 4430 continue; 4431 } 4432 } 4433 #endif 4434 4435 txd = hn_txdesc_get(txr); 4436 if (txd == NULL) { 4437 txr->hn_no_txdescs++; 4438 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 4439 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 4440 break; 4441 } 4442 4443 error = hn_encap(ifp, txr, txd, &m_head); 4444 if (error) { 4445 /* Both txd and m_head are freed */ 4446 KASSERT(txr->hn_agg_txd == NULL, 4447 ("encap failed w/ pending aggregating txdesc")); 4448 continue; 4449 } 4450 4451 if (txr->hn_agg_pktleft == 0) { 4452 if (txr->hn_agg_txd != NULL) { 4453 KASSERT(m_head == NULL, 4454 ("pending mbuf for aggregating txdesc")); 4455 error = hn_flush_txagg(ifp, txr); 4456 if (__predict_false(error)) { 4457 atomic_set_int(&ifp->if_drv_flags, 4458 IFF_DRV_OACTIVE); 4459 break; 4460 } 4461 } else { 4462 KASSERT(m_head != NULL, ("mbuf was freed")); 4463 error = hn_txpkt(ifp, txr, txd); 4464 if (__predict_false(error)) { 4465 /* txd is freed, but m_head is not */ 4466 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 4467 atomic_set_int(&ifp->if_drv_flags, 4468 IFF_DRV_OACTIVE); 4469 break; 4470 } 4471 } 4472 } 4473 #ifdef INVARIANTS 4474 else { 4475 KASSERT(txr->hn_agg_txd != NULL, 4476 ("no aggregating txdesc")); 4477 KASSERT(m_head == NULL, 4478 ("pending mbuf for aggregating txdesc")); 4479 } 4480 #endif 4481 } 4482 4483 /* Flush pending aggerated transmission. */ 4484 if (txr->hn_agg_txd != NULL) 4485 hn_flush_txagg(ifp, txr); 4486 return (sched); 4487 } 4488 4489 static void 4490 hn_start(struct ifnet *ifp) 4491 { 4492 struct hn_softc *sc = ifp->if_softc; 4493 struct hn_tx_ring *txr = &sc->hn_tx_ring[0]; 4494 4495 if (txr->hn_sched_tx) 4496 goto do_sched; 4497 4498 if (mtx_trylock(&txr->hn_tx_lock)) { 4499 int sched; 4500 4501 sched = hn_start_locked(txr, txr->hn_direct_tx_size); 4502 mtx_unlock(&txr->hn_tx_lock); 4503 if (!sched) 4504 return; 4505 } 4506 do_sched: 4507 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); 4508 } 4509 4510 static void 4511 hn_start_txeof_taskfunc(void *xtxr, int pending __unused) 4512 { 4513 struct hn_tx_ring *txr = xtxr; 4514 4515 mtx_lock(&txr->hn_tx_lock); 4516 atomic_clear_int(&txr->hn_sc->hn_ifp->if_drv_flags, IFF_DRV_OACTIVE); 4517 hn_start_locked(txr, 0); 4518 mtx_unlock(&txr->hn_tx_lock); 4519 } 4520 4521 static void 4522 hn_start_txeof(struct hn_tx_ring *txr) 4523 { 4524 struct hn_softc *sc = txr->hn_sc; 4525 struct ifnet *ifp = sc->hn_ifp; 4526 4527 KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring")); 4528 4529 if (txr->hn_sched_tx) 4530 goto do_sched; 4531 4532 if (mtx_trylock(&txr->hn_tx_lock)) { 4533 int sched; 4534 4535 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 4536 sched = hn_start_locked(txr, txr->hn_direct_tx_size); 4537 mtx_unlock(&txr->hn_tx_lock); 4538 if (sched) { 4539 taskqueue_enqueue(txr->hn_tx_taskq, 4540 &txr->hn_tx_task); 4541 } 4542 } else { 4543 do_sched: 4544 /* 4545 * Release the OACTIVE earlier, with the hope, that 4546 * others could catch up. The task will clear the 4547 * flag again with the hn_tx_lock to avoid possible 4548 * races. 4549 */ 4550 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 4551 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); 4552 } 4553 } 4554 4555 #endif /* HN_IFSTART_SUPPORT */ 4556 4557 static int 4558 hn_xmit(struct hn_tx_ring *txr, int len) 4559 { 4560 struct hn_softc *sc = txr->hn_sc; 4561 struct ifnet *ifp = sc->hn_ifp; 4562 struct mbuf *m_head; 4563 int sched = 0; 4564 4565 mtx_assert(&txr->hn_tx_lock, MA_OWNED); 4566 #ifdef HN_IFSTART_SUPPORT 4567 KASSERT(hn_use_if_start == 0, 4568 ("hn_xmit is called, when if_start is enabled")); 4569 #endif 4570 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc")); 4571 4572 if (__predict_false(txr->hn_suspended)) 4573 return (0); 4574 4575 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || txr->hn_oactive) 4576 return (0); 4577 4578 while ((m_head = drbr_peek(ifp, txr->hn_mbuf_br)) != NULL) { 4579 struct hn_txdesc *txd; 4580 int error; 4581 4582 if (len > 0 && m_head->m_pkthdr.len > len) { 4583 /* 4584 * This sending could be time consuming; let callers 4585 * dispatch this packet sending (and sending of any 4586 * following up packets) to tx taskqueue. 4587 */ 4588 drbr_putback(ifp, txr->hn_mbuf_br, m_head); 4589 sched = 1; 4590 break; 4591 } 4592 4593 txd = hn_txdesc_get(txr); 4594 if (txd == NULL) { 4595 txr->hn_no_txdescs++; 4596 drbr_putback(ifp, txr->hn_mbuf_br, m_head); 4597 txr->hn_oactive = 1; 4598 break; 4599 } 4600 4601 error = hn_encap(ifp, txr, txd, &m_head); 4602 if (error) { 4603 /* Both txd and m_head are freed; discard */ 4604 KASSERT(txr->hn_agg_txd == NULL, 4605 ("encap failed w/ pending aggregating txdesc")); 4606 drbr_advance(ifp, txr->hn_mbuf_br); 4607 continue; 4608 } 4609 4610 if (txr->hn_agg_pktleft == 0) { 4611 if (txr->hn_agg_txd != NULL) { 4612 KASSERT(m_head == NULL, 4613 ("pending mbuf for aggregating txdesc")); 4614 error = hn_flush_txagg(ifp, txr); 4615 if (__predict_false(error)) { 4616 txr->hn_oactive = 1; 4617 break; 4618 } 4619 } else { 4620 KASSERT(m_head != NULL, ("mbuf was freed")); 4621 error = hn_txpkt(ifp, txr, txd); 4622 if (__predict_false(error)) { 4623 /* txd is freed, but m_head is not */ 4624 drbr_putback(ifp, txr->hn_mbuf_br, 4625 m_head); 4626 txr->hn_oactive = 1; 4627 break; 4628 } 4629 } 4630 } 4631 #ifdef INVARIANTS 4632 else { 4633 KASSERT(txr->hn_agg_txd != NULL, 4634 ("no aggregating txdesc")); 4635 KASSERT(m_head == NULL, 4636 ("pending mbuf for aggregating txdesc")); 4637 } 4638 #endif 4639 4640 /* Sent */ 4641 drbr_advance(ifp, txr->hn_mbuf_br); 4642 } 4643 4644 /* Flush pending aggerated transmission. */ 4645 if (txr->hn_agg_txd != NULL) 4646 hn_flush_txagg(ifp, txr); 4647 return (sched); 4648 } 4649 4650 static int 4651 hn_transmit(struct ifnet *ifp, struct mbuf *m) 4652 { 4653 struct hn_softc *sc = ifp->if_softc; 4654 struct hn_tx_ring *txr; 4655 int error, idx = 0; 4656 4657 #if defined(INET6) || defined(INET) 4658 /* 4659 * Perform TSO packet header fixup now, since the TSO 4660 * packet header should be cache-hot. 4661 */ 4662 if (m->m_pkthdr.csum_flags & CSUM_TSO) { 4663 m = hn_tso_fixup(m); 4664 if (__predict_false(m == NULL)) { 4665 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 4666 return EIO; 4667 } 4668 } 4669 #endif 4670 4671 /* 4672 * Select the TX ring based on flowid 4673 */ 4674 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 4675 #ifdef RSS 4676 uint32_t bid; 4677 4678 if (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m), 4679 &bid) == 0) 4680 idx = bid % sc->hn_tx_ring_inuse; 4681 else 4682 #endif 4683 { 4684 #if defined(INET6) || defined(INET) 4685 int tcpsyn = 0; 4686 4687 if (m->m_pkthdr.len < 128 && 4688 (m->m_pkthdr.csum_flags & 4689 (CSUM_IP_TCP | CSUM_IP6_TCP)) && 4690 (m->m_pkthdr.csum_flags & CSUM_TSO) == 0) { 4691 m = hn_check_tcpsyn(m, &tcpsyn); 4692 if (__predict_false(m == NULL)) { 4693 if_inc_counter(ifp, 4694 IFCOUNTER_OERRORS, 1); 4695 return (EIO); 4696 } 4697 } 4698 #else 4699 const int tcpsyn = 0; 4700 #endif 4701 if (tcpsyn) 4702 idx = 0; 4703 else 4704 idx = m->m_pkthdr.flowid % sc->hn_tx_ring_inuse; 4705 } 4706 } 4707 txr = &sc->hn_tx_ring[idx]; 4708 4709 error = drbr_enqueue(ifp, txr->hn_mbuf_br, m); 4710 if (error) { 4711 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); 4712 return error; 4713 } 4714 4715 if (txr->hn_oactive) 4716 return 0; 4717 4718 if (txr->hn_sched_tx) 4719 goto do_sched; 4720 4721 if (mtx_trylock(&txr->hn_tx_lock)) { 4722 int sched; 4723 4724 sched = hn_xmit(txr, txr->hn_direct_tx_size); 4725 mtx_unlock(&txr->hn_tx_lock); 4726 if (!sched) 4727 return 0; 4728 } 4729 do_sched: 4730 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); 4731 return 0; 4732 } 4733 4734 static void 4735 hn_tx_ring_qflush(struct hn_tx_ring *txr) 4736 { 4737 struct mbuf *m; 4738 4739 mtx_lock(&txr->hn_tx_lock); 4740 while ((m = buf_ring_dequeue_sc(txr->hn_mbuf_br)) != NULL) 4741 m_freem(m); 4742 mtx_unlock(&txr->hn_tx_lock); 4743 } 4744 4745 static void 4746 hn_xmit_qflush(struct ifnet *ifp) 4747 { 4748 struct hn_softc *sc = ifp->if_softc; 4749 int i; 4750 4751 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) 4752 hn_tx_ring_qflush(&sc->hn_tx_ring[i]); 4753 if_qflush(ifp); 4754 } 4755 4756 static void 4757 hn_xmit_txeof(struct hn_tx_ring *txr) 4758 { 4759 4760 if (txr->hn_sched_tx) 4761 goto do_sched; 4762 4763 if (mtx_trylock(&txr->hn_tx_lock)) { 4764 int sched; 4765 4766 txr->hn_oactive = 0; 4767 sched = hn_xmit(txr, txr->hn_direct_tx_size); 4768 mtx_unlock(&txr->hn_tx_lock); 4769 if (sched) { 4770 taskqueue_enqueue(txr->hn_tx_taskq, 4771 &txr->hn_tx_task); 4772 } 4773 } else { 4774 do_sched: 4775 /* 4776 * Release the oactive earlier, with the hope, that 4777 * others could catch up. The task will clear the 4778 * oactive again with the hn_tx_lock to avoid possible 4779 * races. 4780 */ 4781 txr->hn_oactive = 0; 4782 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); 4783 } 4784 } 4785 4786 static void 4787 hn_xmit_taskfunc(void *xtxr, int pending __unused) 4788 { 4789 struct hn_tx_ring *txr = xtxr; 4790 4791 mtx_lock(&txr->hn_tx_lock); 4792 hn_xmit(txr, 0); 4793 mtx_unlock(&txr->hn_tx_lock); 4794 } 4795 4796 static void 4797 hn_xmit_txeof_taskfunc(void *xtxr, int pending __unused) 4798 { 4799 struct hn_tx_ring *txr = xtxr; 4800 4801 mtx_lock(&txr->hn_tx_lock); 4802 txr->hn_oactive = 0; 4803 hn_xmit(txr, 0); 4804 mtx_unlock(&txr->hn_tx_lock); 4805 } 4806 4807 static int 4808 hn_chan_attach(struct hn_softc *sc, struct vmbus_channel *chan) 4809 { 4810 struct vmbus_chan_br cbr; 4811 struct hn_rx_ring *rxr; 4812 struct hn_tx_ring *txr = NULL; 4813 int idx, error; 4814 4815 idx = vmbus_chan_subidx(chan); 4816 4817 /* 4818 * Link this channel to RX/TX ring. 4819 */ 4820 KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse, 4821 ("invalid channel index %d, should > 0 && < %d", 4822 idx, sc->hn_rx_ring_inuse)); 4823 rxr = &sc->hn_rx_ring[idx]; 4824 KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED) == 0, 4825 ("RX ring %d already attached", idx)); 4826 rxr->hn_rx_flags |= HN_RX_FLAG_ATTACHED; 4827 rxr->hn_chan = chan; 4828 4829 if (bootverbose) { 4830 if_printf(sc->hn_ifp, "link RX ring %d to chan%u\n", 4831 idx, vmbus_chan_id(chan)); 4832 } 4833 4834 if (idx < sc->hn_tx_ring_inuse) { 4835 txr = &sc->hn_tx_ring[idx]; 4836 KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED) == 0, 4837 ("TX ring %d already attached", idx)); 4838 txr->hn_tx_flags |= HN_TX_FLAG_ATTACHED; 4839 4840 txr->hn_chan = chan; 4841 if (bootverbose) { 4842 if_printf(sc->hn_ifp, "link TX ring %d to chan%u\n", 4843 idx, vmbus_chan_id(chan)); 4844 } 4845 } 4846 4847 /* Bind this channel to a proper CPU. */ 4848 vmbus_chan_cpu_set(chan, HN_RING_IDX2CPU(sc, idx)); 4849 4850 /* 4851 * Open this channel 4852 */ 4853 cbr.cbr = rxr->hn_br; 4854 cbr.cbr_paddr = rxr->hn_br_dma.hv_paddr; 4855 cbr.cbr_txsz = HN_TXBR_SIZE; 4856 cbr.cbr_rxsz = HN_RXBR_SIZE; 4857 error = vmbus_chan_open_br(chan, &cbr, NULL, 0, hn_chan_callback, rxr); 4858 if (error) { 4859 if (error == EISCONN) { 4860 if_printf(sc->hn_ifp, "bufring is connected after " 4861 "chan%u open failure\n", vmbus_chan_id(chan)); 4862 rxr->hn_rx_flags |= HN_RX_FLAG_BR_REF; 4863 } else { 4864 if_printf(sc->hn_ifp, "open chan%u failed: %d\n", 4865 vmbus_chan_id(chan), error); 4866 } 4867 } 4868 return (error); 4869 } 4870 4871 static void 4872 hn_chan_detach(struct hn_softc *sc, struct vmbus_channel *chan) 4873 { 4874 struct hn_rx_ring *rxr; 4875 int idx, error; 4876 4877 idx = vmbus_chan_subidx(chan); 4878 4879 /* 4880 * Link this channel to RX/TX ring. 4881 */ 4882 KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse, 4883 ("invalid channel index %d, should > 0 && < %d", 4884 idx, sc->hn_rx_ring_inuse)); 4885 rxr = &sc->hn_rx_ring[idx]; 4886 KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED), 4887 ("RX ring %d is not attached", idx)); 4888 rxr->hn_rx_flags &= ~HN_RX_FLAG_ATTACHED; 4889 4890 if (idx < sc->hn_tx_ring_inuse) { 4891 struct hn_tx_ring *txr = &sc->hn_tx_ring[idx]; 4892 4893 KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED), 4894 ("TX ring %d is not attached attached", idx)); 4895 txr->hn_tx_flags &= ~HN_TX_FLAG_ATTACHED; 4896 } 4897 4898 /* 4899 * Close this channel. 4900 * 4901 * NOTE: 4902 * Channel closing does _not_ destroy the target channel. 4903 */ 4904 error = vmbus_chan_close_direct(chan); 4905 if (error == EISCONN) { 4906 if_printf(sc->hn_ifp, "chan%u bufring is connected " 4907 "after being closed\n", vmbus_chan_id(chan)); 4908 rxr->hn_rx_flags |= HN_RX_FLAG_BR_REF; 4909 } else if (error) { 4910 if_printf(sc->hn_ifp, "chan%u close failed: %d\n", 4911 vmbus_chan_id(chan), error); 4912 } 4913 } 4914 4915 static int 4916 hn_attach_subchans(struct hn_softc *sc) 4917 { 4918 struct vmbus_channel **subchans; 4919 int subchan_cnt = sc->hn_rx_ring_inuse - 1; 4920 int i, error = 0; 4921 4922 KASSERT(subchan_cnt > 0, ("no sub-channels")); 4923 4924 /* Attach the sub-channels. */ 4925 subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt); 4926 for (i = 0; i < subchan_cnt; ++i) { 4927 int error1; 4928 4929 error1 = hn_chan_attach(sc, subchans[i]); 4930 if (error1) { 4931 error = error1; 4932 /* Move on; all channels will be detached later. */ 4933 } 4934 } 4935 vmbus_subchan_rel(subchans, subchan_cnt); 4936 4937 if (error) { 4938 if_printf(sc->hn_ifp, "sub-channels attach failed: %d\n", error); 4939 } else { 4940 if (bootverbose) { 4941 if_printf(sc->hn_ifp, "%d sub-channels attached\n", 4942 subchan_cnt); 4943 } 4944 } 4945 return (error); 4946 } 4947 4948 static void 4949 hn_detach_allchans(struct hn_softc *sc) 4950 { 4951 struct vmbus_channel **subchans; 4952 int subchan_cnt = sc->hn_rx_ring_inuse - 1; 4953 int i; 4954 4955 if (subchan_cnt == 0) 4956 goto back; 4957 4958 /* Detach the sub-channels. */ 4959 subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt); 4960 for (i = 0; i < subchan_cnt; ++i) 4961 hn_chan_detach(sc, subchans[i]); 4962 vmbus_subchan_rel(subchans, subchan_cnt); 4963 4964 back: 4965 /* 4966 * Detach the primary channel, _after_ all sub-channels 4967 * are detached. 4968 */ 4969 hn_chan_detach(sc, sc->hn_prichan); 4970 4971 /* Wait for sub-channels to be destroyed, if any. */ 4972 vmbus_subchan_drain(sc->hn_prichan); 4973 4974 #ifdef INVARIANTS 4975 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4976 KASSERT((sc->hn_rx_ring[i].hn_rx_flags & 4977 HN_RX_FLAG_ATTACHED) == 0, 4978 ("%dth RX ring is still attached", i)); 4979 } 4980 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 4981 KASSERT((sc->hn_tx_ring[i].hn_tx_flags & 4982 HN_TX_FLAG_ATTACHED) == 0, 4983 ("%dth TX ring is still attached", i)); 4984 } 4985 #endif 4986 } 4987 4988 static int 4989 hn_synth_alloc_subchans(struct hn_softc *sc, int *nsubch) 4990 { 4991 struct vmbus_channel **subchans; 4992 int nchan, rxr_cnt, error; 4993 4994 nchan = *nsubch + 1; 4995 if (nchan == 1) { 4996 /* 4997 * Multiple RX/TX rings are not requested. 4998 */ 4999 *nsubch = 0; 5000 return (0); 5001 } 5002 5003 /* 5004 * Query RSS capabilities, e.g. # of RX rings, and # of indirect 5005 * table entries. 5006 */ 5007 error = hn_rndis_query_rsscaps(sc, &rxr_cnt); 5008 if (error) { 5009 /* No RSS; this is benign. */ 5010 *nsubch = 0; 5011 return (0); 5012 } 5013 if (bootverbose) { 5014 if_printf(sc->hn_ifp, "RX rings offered %u, requested %d\n", 5015 rxr_cnt, nchan); 5016 } 5017 5018 if (nchan > rxr_cnt) 5019 nchan = rxr_cnt; 5020 if (nchan == 1) { 5021 if_printf(sc->hn_ifp, "only 1 channel is supported, no vRSS\n"); 5022 *nsubch = 0; 5023 return (0); 5024 } 5025 5026 /* 5027 * Allocate sub-channels from NVS. 5028 */ 5029 *nsubch = nchan - 1; 5030 error = hn_nvs_alloc_subchans(sc, nsubch); 5031 if (error || *nsubch == 0) { 5032 /* Failed to allocate sub-channels. */ 5033 *nsubch = 0; 5034 return (0); 5035 } 5036 5037 /* 5038 * Wait for all sub-channels to become ready before moving on. 5039 */ 5040 subchans = vmbus_subchan_get(sc->hn_prichan, *nsubch); 5041 vmbus_subchan_rel(subchans, *nsubch); 5042 return (0); 5043 } 5044 5045 static bool 5046 hn_synth_attachable(const struct hn_softc *sc) 5047 { 5048 int i; 5049 5050 if (sc->hn_flags & HN_FLAG_ERRORS) 5051 return (false); 5052 5053 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 5054 const struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 5055 5056 if (rxr->hn_rx_flags & HN_RX_FLAG_BR_REF) 5057 return (false); 5058 } 5059 return (true); 5060 } 5061 5062 /* 5063 * Make sure that the RX filter is zero after the successful 5064 * RNDIS initialization. 5065 * 5066 * NOTE: 5067 * Under certain conditions on certain versions of Hyper-V, 5068 * the RNDIS rxfilter is _not_ zero on the hypervisor side 5069 * after the successful RNDIS initialization, which breaks 5070 * the assumption of any following code (well, it breaks the 5071 * RNDIS API contract actually). Clear the RNDIS rxfilter 5072 * explicitly, drain packets sneaking through, and drain the 5073 * interrupt taskqueues scheduled due to the stealth packets. 5074 */ 5075 static void 5076 hn_rndis_init_fixat(struct hn_softc *sc, int nchan) 5077 { 5078 5079 hn_disable_rx(sc); 5080 hn_drain_rxtx(sc, nchan); 5081 } 5082 5083 static int 5084 hn_synth_attach(struct hn_softc *sc, int mtu) 5085 { 5086 #define ATTACHED_NVS 0x0002 5087 #define ATTACHED_RNDIS 0x0004 5088 5089 struct ndis_rssprm_toeplitz *rss = &sc->hn_rss; 5090 int error, nsubch, nchan = 1, i, rndis_inited; 5091 uint32_t old_caps, attached = 0; 5092 5093 KASSERT((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0, 5094 ("synthetic parts were attached")); 5095 5096 if (!hn_synth_attachable(sc)) 5097 return (ENXIO); 5098 5099 /* Save capabilities for later verification. */ 5100 old_caps = sc->hn_caps; 5101 sc->hn_caps = 0; 5102 5103 /* Clear RSS stuffs. */ 5104 sc->hn_rss_ind_size = 0; 5105 sc->hn_rss_hash = 0; 5106 5107 /* 5108 * Attach the primary channel _before_ attaching NVS and RNDIS. 5109 */ 5110 error = hn_chan_attach(sc, sc->hn_prichan); 5111 if (error) 5112 goto failed; 5113 5114 /* 5115 * Attach NVS. 5116 */ 5117 error = hn_nvs_attach(sc, mtu); 5118 if (error) 5119 goto failed; 5120 attached |= ATTACHED_NVS; 5121 5122 /* 5123 * Attach RNDIS _after_ NVS is attached. 5124 */ 5125 error = hn_rndis_attach(sc, mtu, &rndis_inited); 5126 if (rndis_inited) 5127 attached |= ATTACHED_RNDIS; 5128 if (error) 5129 goto failed; 5130 5131 /* 5132 * Make sure capabilities are not changed. 5133 */ 5134 if (device_is_attached(sc->hn_dev) && old_caps != sc->hn_caps) { 5135 if_printf(sc->hn_ifp, "caps mismatch old 0x%08x, new 0x%08x\n", 5136 old_caps, sc->hn_caps); 5137 error = ENXIO; 5138 goto failed; 5139 } 5140 5141 /* 5142 * Allocate sub-channels for multi-TX/RX rings. 5143 * 5144 * NOTE: 5145 * The # of RX rings that can be used is equivalent to the # of 5146 * channels to be requested. 5147 */ 5148 nsubch = sc->hn_rx_ring_cnt - 1; 5149 error = hn_synth_alloc_subchans(sc, &nsubch); 5150 if (error) 5151 goto failed; 5152 /* NOTE: _Full_ synthetic parts detach is required now. */ 5153 sc->hn_flags |= HN_FLAG_SYNTH_ATTACHED; 5154 5155 /* 5156 * Set the # of TX/RX rings that could be used according to 5157 * the # of channels that NVS offered. 5158 */ 5159 nchan = nsubch + 1; 5160 hn_set_ring_inuse(sc, nchan); 5161 if (nchan == 1) { 5162 /* Only the primary channel can be used; done */ 5163 goto back; 5164 } 5165 5166 /* 5167 * Attach the sub-channels. 5168 * 5169 * NOTE: hn_set_ring_inuse() _must_ have been called. 5170 */ 5171 error = hn_attach_subchans(sc); 5172 if (error) 5173 goto failed; 5174 5175 /* 5176 * Configure RSS key and indirect table _after_ all sub-channels 5177 * are attached. 5178 */ 5179 if ((sc->hn_flags & HN_FLAG_HAS_RSSKEY) == 0) { 5180 /* 5181 * RSS key is not set yet; set it to the default RSS key. 5182 */ 5183 if (bootverbose) 5184 if_printf(sc->hn_ifp, "setup default RSS key\n"); 5185 #ifdef RSS 5186 rss_getkey(rss->rss_key); 5187 #else 5188 memcpy(rss->rss_key, hn_rss_key_default, sizeof(rss->rss_key)); 5189 #endif 5190 sc->hn_flags |= HN_FLAG_HAS_RSSKEY; 5191 } 5192 5193 if ((sc->hn_flags & HN_FLAG_HAS_RSSIND) == 0) { 5194 /* 5195 * RSS indirect table is not set yet; set it up in round- 5196 * robin fashion. 5197 */ 5198 if (bootverbose) { 5199 if_printf(sc->hn_ifp, "setup default RSS indirect " 5200 "table\n"); 5201 } 5202 for (i = 0; i < NDIS_HASH_INDCNT; ++i) { 5203 uint32_t subidx; 5204 5205 #ifdef RSS 5206 subidx = rss_get_indirection_to_bucket(i); 5207 #else 5208 subidx = i; 5209 #endif 5210 rss->rss_ind[i] = subidx % nchan; 5211 } 5212 sc->hn_flags |= HN_FLAG_HAS_RSSIND; 5213 } else { 5214 /* 5215 * # of usable channels may be changed, so we have to 5216 * make sure that all entries in RSS indirect table 5217 * are valid. 5218 * 5219 * NOTE: hn_set_ring_inuse() _must_ have been called. 5220 */ 5221 hn_rss_ind_fixup(sc); 5222 } 5223 5224 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE); 5225 if (error) 5226 goto failed; 5227 back: 5228 /* 5229 * Fixup transmission aggregation setup. 5230 */ 5231 hn_set_txagg(sc); 5232 hn_rndis_init_fixat(sc, nchan); 5233 return (0); 5234 5235 failed: 5236 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) { 5237 hn_rndis_init_fixat(sc, nchan); 5238 hn_synth_detach(sc); 5239 } else { 5240 if (attached & ATTACHED_RNDIS) { 5241 hn_rndis_init_fixat(sc, nchan); 5242 hn_rndis_detach(sc); 5243 } 5244 if (attached & ATTACHED_NVS) 5245 hn_nvs_detach(sc); 5246 hn_chan_detach(sc, sc->hn_prichan); 5247 /* Restore old capabilities. */ 5248 sc->hn_caps = old_caps; 5249 } 5250 return (error); 5251 5252 #undef ATTACHED_RNDIS 5253 #undef ATTACHED_NVS 5254 } 5255 5256 /* 5257 * NOTE: 5258 * The interface must have been suspended though hn_suspend(), before 5259 * this function get called. 5260 */ 5261 static void 5262 hn_synth_detach(struct hn_softc *sc) 5263 { 5264 5265 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, 5266 ("synthetic parts were not attached")); 5267 5268 /* Detach the RNDIS first. */ 5269 hn_rndis_detach(sc); 5270 5271 /* Detach NVS. */ 5272 hn_nvs_detach(sc); 5273 5274 /* Detach all of the channels. */ 5275 hn_detach_allchans(sc); 5276 5277 sc->hn_flags &= ~HN_FLAG_SYNTH_ATTACHED; 5278 } 5279 5280 static void 5281 hn_set_ring_inuse(struct hn_softc *sc, int ring_cnt) 5282 { 5283 KASSERT(ring_cnt > 0 && ring_cnt <= sc->hn_rx_ring_cnt, 5284 ("invalid ring count %d", ring_cnt)); 5285 5286 if (sc->hn_tx_ring_cnt > ring_cnt) 5287 sc->hn_tx_ring_inuse = ring_cnt; 5288 else 5289 sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt; 5290 sc->hn_rx_ring_inuse = ring_cnt; 5291 5292 #ifdef RSS 5293 if (sc->hn_rx_ring_inuse != rss_getnumbuckets()) { 5294 if_printf(sc->hn_ifp, "# of RX rings (%d) does not match " 5295 "# of RSS buckets (%d)\n", sc->hn_rx_ring_inuse, 5296 rss_getnumbuckets()); 5297 } 5298 #endif 5299 5300 if (bootverbose) { 5301 if_printf(sc->hn_ifp, "%d TX ring, %d RX ring\n", 5302 sc->hn_tx_ring_inuse, sc->hn_rx_ring_inuse); 5303 } 5304 } 5305 5306 static void 5307 hn_chan_drain(struct hn_softc *sc, struct vmbus_channel *chan) 5308 { 5309 5310 /* 5311 * NOTE: 5312 * The TX bufring will not be drained by the hypervisor, 5313 * if the primary channel is revoked. 5314 */ 5315 while (!vmbus_chan_rx_empty(chan) || 5316 (!vmbus_chan_is_revoked(sc->hn_prichan) && 5317 !vmbus_chan_tx_empty(chan))) 5318 pause("waitch", 1); 5319 vmbus_chan_intr_drain(chan); 5320 } 5321 5322 static void 5323 hn_disable_rx(struct hn_softc *sc) 5324 { 5325 5326 /* 5327 * Disable RX by clearing RX filter forcefully. 5328 */ 5329 sc->hn_rx_filter = NDIS_PACKET_TYPE_NONE; 5330 hn_rndis_set_rxfilter(sc, sc->hn_rx_filter); /* ignore error */ 5331 5332 /* 5333 * Give RNDIS enough time to flush all pending data packets. 5334 */ 5335 pause("waitrx", (200 * hz) / 1000); 5336 } 5337 5338 /* 5339 * NOTE: 5340 * RX/TX _must_ have been suspended/disabled, before this function 5341 * is called. 5342 */ 5343 static void 5344 hn_drain_rxtx(struct hn_softc *sc, int nchan) 5345 { 5346 struct vmbus_channel **subch = NULL; 5347 int nsubch; 5348 5349 /* 5350 * Drain RX/TX bufrings and interrupts. 5351 */ 5352 nsubch = nchan - 1; 5353 if (nsubch > 0) 5354 subch = vmbus_subchan_get(sc->hn_prichan, nsubch); 5355 5356 if (subch != NULL) { 5357 int i; 5358 5359 for (i = 0; i < nsubch; ++i) 5360 hn_chan_drain(sc, subch[i]); 5361 } 5362 hn_chan_drain(sc, sc->hn_prichan); 5363 5364 if (subch != NULL) 5365 vmbus_subchan_rel(subch, nsubch); 5366 } 5367 5368 static void 5369 hn_suspend_data(struct hn_softc *sc) 5370 { 5371 struct hn_tx_ring *txr; 5372 int i; 5373 5374 HN_LOCK_ASSERT(sc); 5375 5376 /* 5377 * Suspend TX. 5378 */ 5379 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { 5380 txr = &sc->hn_tx_ring[i]; 5381 5382 mtx_lock(&txr->hn_tx_lock); 5383 txr->hn_suspended = 1; 5384 mtx_unlock(&txr->hn_tx_lock); 5385 /* No one is able send more packets now. */ 5386 5387 /* 5388 * Wait for all pending sends to finish. 5389 * 5390 * NOTE: 5391 * We will _not_ receive all pending send-done, if the 5392 * primary channel is revoked. 5393 */ 5394 while (hn_tx_ring_pending(txr) && 5395 !vmbus_chan_is_revoked(sc->hn_prichan)) 5396 pause("hnwtx", 1 /* 1 tick */); 5397 } 5398 5399 /* 5400 * Disable RX. 5401 */ 5402 hn_disable_rx(sc); 5403 5404 /* 5405 * Drain RX/TX. 5406 */ 5407 hn_drain_rxtx(sc, sc->hn_rx_ring_inuse); 5408 5409 /* 5410 * Drain any pending TX tasks. 5411 * 5412 * NOTE: 5413 * The above hn_drain_rxtx() can dispatch TX tasks, so the TX 5414 * tasks will have to be drained _after_ the above hn_drain_rxtx(). 5415 */ 5416 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { 5417 txr = &sc->hn_tx_ring[i]; 5418 5419 taskqueue_drain(txr->hn_tx_taskq, &txr->hn_tx_task); 5420 taskqueue_drain(txr->hn_tx_taskq, &txr->hn_txeof_task); 5421 } 5422 } 5423 5424 static void 5425 hn_suspend_mgmt_taskfunc(void *xsc, int pending __unused) 5426 { 5427 5428 ((struct hn_softc *)xsc)->hn_mgmt_taskq = NULL; 5429 } 5430 5431 static void 5432 hn_suspend_mgmt(struct hn_softc *sc) 5433 { 5434 struct task task; 5435 5436 HN_LOCK_ASSERT(sc); 5437 5438 /* 5439 * Make sure that hn_mgmt_taskq0 can nolonger be accessed 5440 * through hn_mgmt_taskq. 5441 */ 5442 TASK_INIT(&task, 0, hn_suspend_mgmt_taskfunc, sc); 5443 vmbus_chan_run_task(sc->hn_prichan, &task); 5444 5445 /* 5446 * Make sure that all pending management tasks are completed. 5447 */ 5448 taskqueue_drain(sc->hn_mgmt_taskq0, &sc->hn_netchg_init); 5449 taskqueue_drain_timeout(sc->hn_mgmt_taskq0, &sc->hn_netchg_status); 5450 taskqueue_drain_all(sc->hn_mgmt_taskq0); 5451 } 5452 5453 static void 5454 hn_suspend(struct hn_softc *sc) 5455 { 5456 5457 /* Disable polling. */ 5458 hn_polling(sc, 0); 5459 5460 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) || 5461 (sc->hn_flags & HN_FLAG_RXVF)) 5462 hn_suspend_data(sc); 5463 hn_suspend_mgmt(sc); 5464 } 5465 5466 static void 5467 hn_resume_tx(struct hn_softc *sc, int tx_ring_cnt) 5468 { 5469 int i; 5470 5471 KASSERT(tx_ring_cnt <= sc->hn_tx_ring_cnt, 5472 ("invalid TX ring count %d", tx_ring_cnt)); 5473 5474 for (i = 0; i < tx_ring_cnt; ++i) { 5475 struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; 5476 5477 mtx_lock(&txr->hn_tx_lock); 5478 txr->hn_suspended = 0; 5479 mtx_unlock(&txr->hn_tx_lock); 5480 } 5481 } 5482 5483 static void 5484 hn_resume_data(struct hn_softc *sc) 5485 { 5486 int i; 5487 5488 HN_LOCK_ASSERT(sc); 5489 5490 /* 5491 * Re-enable RX. 5492 */ 5493 hn_rxfilter_config(sc); 5494 5495 /* 5496 * Make sure to clear suspend status on "all" TX rings, 5497 * since hn_tx_ring_inuse can be changed after 5498 * hn_suspend_data(). 5499 */ 5500 hn_resume_tx(sc, sc->hn_tx_ring_cnt); 5501 5502 #ifdef HN_IFSTART_SUPPORT 5503 if (!hn_use_if_start) 5504 #endif 5505 { 5506 /* 5507 * Flush unused drbrs, since hn_tx_ring_inuse may be 5508 * reduced. 5509 */ 5510 for (i = sc->hn_tx_ring_inuse; i < sc->hn_tx_ring_cnt; ++i) 5511 hn_tx_ring_qflush(&sc->hn_tx_ring[i]); 5512 } 5513 5514 /* 5515 * Kick start TX. 5516 */ 5517 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { 5518 struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; 5519 5520 /* 5521 * Use txeof task, so that any pending oactive can be 5522 * cleared properly. 5523 */ 5524 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); 5525 } 5526 } 5527 5528 static void 5529 hn_resume_mgmt(struct hn_softc *sc) 5530 { 5531 5532 sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0; 5533 5534 /* 5535 * Kick off network change detection, if it was pending. 5536 * If no network change was pending, start link status 5537 * checks, which is more lightweight than network change 5538 * detection. 5539 */ 5540 if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG) 5541 hn_change_network(sc); 5542 else 5543 hn_update_link_status(sc); 5544 } 5545 5546 static void 5547 hn_resume(struct hn_softc *sc) 5548 { 5549 5550 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) || 5551 (sc->hn_flags & HN_FLAG_RXVF)) 5552 hn_resume_data(sc); 5553 5554 /* 5555 * When the VF is activated, the synthetic interface is changed 5556 * to DOWN in hn_rxvf_change(). Here, if the VF is still active, 5557 * we don't call hn_resume_mgmt() until the VF is deactivated in 5558 * hn_rxvf_change(). 5559 */ 5560 if (!(sc->hn_flags & HN_FLAG_RXVF)) 5561 hn_resume_mgmt(sc); 5562 5563 /* 5564 * Re-enable polling if this interface is running and 5565 * the polling is requested. 5566 */ 5567 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) && sc->hn_pollhz > 0) 5568 hn_polling(sc, sc->hn_pollhz); 5569 } 5570 5571 static void 5572 hn_rndis_rx_status(struct hn_softc *sc, const void *data, int dlen) 5573 { 5574 const struct rndis_status_msg *msg; 5575 int ofs; 5576 5577 if (dlen < sizeof(*msg)) { 5578 if_printf(sc->hn_ifp, "invalid RNDIS status\n"); 5579 return; 5580 } 5581 msg = data; 5582 5583 switch (msg->rm_status) { 5584 case RNDIS_STATUS_MEDIA_CONNECT: 5585 case RNDIS_STATUS_MEDIA_DISCONNECT: 5586 hn_update_link_status(sc); 5587 break; 5588 5589 case RNDIS_STATUS_TASK_OFFLOAD_CURRENT_CONFIG: 5590 case RNDIS_STATUS_LINK_SPEED_CHANGE: 5591 /* Not really useful; ignore. */ 5592 break; 5593 5594 case RNDIS_STATUS_NETWORK_CHANGE: 5595 ofs = RNDIS_STBUFOFFSET_ABS(msg->rm_stbufoffset); 5596 if (dlen < ofs + msg->rm_stbuflen || 5597 msg->rm_stbuflen < sizeof(uint32_t)) { 5598 if_printf(sc->hn_ifp, "network changed\n"); 5599 } else { 5600 uint32_t change; 5601 5602 memcpy(&change, ((const uint8_t *)msg) + ofs, 5603 sizeof(change)); 5604 if_printf(sc->hn_ifp, "network changed, change %u\n", 5605 change); 5606 } 5607 hn_change_network(sc); 5608 break; 5609 5610 default: 5611 if_printf(sc->hn_ifp, "unknown RNDIS status 0x%08x\n", 5612 msg->rm_status); 5613 break; 5614 } 5615 } 5616 5617 static int 5618 hn_rndis_rxinfo(const void *info_data, int info_dlen, struct hn_rxinfo *info) 5619 { 5620 const struct rndis_pktinfo *pi = info_data; 5621 uint32_t mask = 0; 5622 5623 while (info_dlen != 0) { 5624 const void *data; 5625 uint32_t dlen; 5626 5627 if (__predict_false(info_dlen < sizeof(*pi))) 5628 return (EINVAL); 5629 if (__predict_false(info_dlen < pi->rm_size)) 5630 return (EINVAL); 5631 info_dlen -= pi->rm_size; 5632 5633 if (__predict_false(pi->rm_size & RNDIS_PKTINFO_SIZE_ALIGNMASK)) 5634 return (EINVAL); 5635 if (__predict_false(pi->rm_size < pi->rm_pktinfooffset)) 5636 return (EINVAL); 5637 dlen = pi->rm_size - pi->rm_pktinfooffset; 5638 data = pi->rm_data; 5639 5640 switch (pi->rm_type) { 5641 case NDIS_PKTINFO_TYPE_VLAN: 5642 if (__predict_false(dlen < NDIS_VLAN_INFO_SIZE)) 5643 return (EINVAL); 5644 info->vlan_info = *((const uint32_t *)data); 5645 mask |= HN_RXINFO_VLAN; 5646 break; 5647 5648 case NDIS_PKTINFO_TYPE_CSUM: 5649 if (__predict_false(dlen < NDIS_RXCSUM_INFO_SIZE)) 5650 return (EINVAL); 5651 info->csum_info = *((const uint32_t *)data); 5652 mask |= HN_RXINFO_CSUM; 5653 break; 5654 5655 case HN_NDIS_PKTINFO_TYPE_HASHVAL: 5656 if (__predict_false(dlen < HN_NDIS_HASH_VALUE_SIZE)) 5657 return (EINVAL); 5658 info->hash_value = *((const uint32_t *)data); 5659 mask |= HN_RXINFO_HASHVAL; 5660 break; 5661 5662 case HN_NDIS_PKTINFO_TYPE_HASHINF: 5663 if (__predict_false(dlen < HN_NDIS_HASH_INFO_SIZE)) 5664 return (EINVAL); 5665 info->hash_info = *((const uint32_t *)data); 5666 mask |= HN_RXINFO_HASHINF; 5667 break; 5668 5669 default: 5670 goto next; 5671 } 5672 5673 if (mask == HN_RXINFO_ALL) { 5674 /* All found; done */ 5675 break; 5676 } 5677 next: 5678 pi = (const struct rndis_pktinfo *) 5679 ((const uint8_t *)pi + pi->rm_size); 5680 } 5681 5682 /* 5683 * Final fixup. 5684 * - If there is no hash value, invalidate the hash info. 5685 */ 5686 if ((mask & HN_RXINFO_HASHVAL) == 0) 5687 info->hash_info = HN_NDIS_HASH_INFO_INVALID; 5688 return (0); 5689 } 5690 5691 static __inline bool 5692 hn_rndis_check_overlap(int off, int len, int check_off, int check_len) 5693 { 5694 5695 if (off < check_off) { 5696 if (__predict_true(off + len <= check_off)) 5697 return (false); 5698 } else if (off > check_off) { 5699 if (__predict_true(check_off + check_len <= off)) 5700 return (false); 5701 } 5702 return (true); 5703 } 5704 5705 static void 5706 hn_rndis_rx_data(struct hn_rx_ring *rxr, const void *data, int dlen) 5707 { 5708 const struct rndis_packet_msg *pkt; 5709 struct hn_rxinfo info; 5710 int data_off, pktinfo_off, data_len, pktinfo_len; 5711 5712 /* 5713 * Check length. 5714 */ 5715 if (__predict_false(dlen < sizeof(*pkt))) { 5716 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg\n"); 5717 return; 5718 } 5719 pkt = data; 5720 5721 if (__predict_false(dlen < pkt->rm_len)) { 5722 if_printf(rxr->hn_ifp, "truncated RNDIS packet msg, " 5723 "dlen %d, msglen %u\n", dlen, pkt->rm_len); 5724 return; 5725 } 5726 if (__predict_false(pkt->rm_len < 5727 pkt->rm_datalen + pkt->rm_oobdatalen + pkt->rm_pktinfolen)) { 5728 if_printf(rxr->hn_ifp, "invalid RNDIS packet msglen, " 5729 "msglen %u, data %u, oob %u, pktinfo %u\n", 5730 pkt->rm_len, pkt->rm_datalen, pkt->rm_oobdatalen, 5731 pkt->rm_pktinfolen); 5732 return; 5733 } 5734 if (__predict_false(pkt->rm_datalen == 0)) { 5735 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, no data\n"); 5736 return; 5737 } 5738 5739 /* 5740 * Check offests. 5741 */ 5742 #define IS_OFFSET_INVALID(ofs) \ 5743 ((ofs) < RNDIS_PACKET_MSG_OFFSET_MIN || \ 5744 ((ofs) & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK)) 5745 5746 /* XXX Hyper-V does not meet data offset alignment requirement */ 5747 if (__predict_false(pkt->rm_dataoffset < RNDIS_PACKET_MSG_OFFSET_MIN)) { 5748 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 5749 "data offset %u\n", pkt->rm_dataoffset); 5750 return; 5751 } 5752 if (__predict_false(pkt->rm_oobdataoffset > 0 && 5753 IS_OFFSET_INVALID(pkt->rm_oobdataoffset))) { 5754 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 5755 "oob offset %u\n", pkt->rm_oobdataoffset); 5756 return; 5757 } 5758 if (__predict_true(pkt->rm_pktinfooffset > 0) && 5759 __predict_false(IS_OFFSET_INVALID(pkt->rm_pktinfooffset))) { 5760 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 5761 "pktinfo offset %u\n", pkt->rm_pktinfooffset); 5762 return; 5763 } 5764 5765 #undef IS_OFFSET_INVALID 5766 5767 data_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_dataoffset); 5768 data_len = pkt->rm_datalen; 5769 pktinfo_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_pktinfooffset); 5770 pktinfo_len = pkt->rm_pktinfolen; 5771 5772 /* 5773 * Check OOB coverage. 5774 */ 5775 if (__predict_false(pkt->rm_oobdatalen != 0)) { 5776 int oob_off, oob_len; 5777 5778 if_printf(rxr->hn_ifp, "got oobdata\n"); 5779 oob_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_oobdataoffset); 5780 oob_len = pkt->rm_oobdatalen; 5781 5782 if (__predict_false(oob_off + oob_len > pkt->rm_len)) { 5783 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 5784 "oob overflow, msglen %u, oob abs %d len %d\n", 5785 pkt->rm_len, oob_off, oob_len); 5786 return; 5787 } 5788 5789 /* 5790 * Check against data. 5791 */ 5792 if (hn_rndis_check_overlap(oob_off, oob_len, 5793 data_off, data_len)) { 5794 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 5795 "oob overlaps data, oob abs %d len %d, " 5796 "data abs %d len %d\n", 5797 oob_off, oob_len, data_off, data_len); 5798 return; 5799 } 5800 5801 /* 5802 * Check against pktinfo. 5803 */ 5804 if (pktinfo_len != 0 && 5805 hn_rndis_check_overlap(oob_off, oob_len, 5806 pktinfo_off, pktinfo_len)) { 5807 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 5808 "oob overlaps pktinfo, oob abs %d len %d, " 5809 "pktinfo abs %d len %d\n", 5810 oob_off, oob_len, pktinfo_off, pktinfo_len); 5811 return; 5812 } 5813 } 5814 5815 /* 5816 * Check per-packet-info coverage and find useful per-packet-info. 5817 */ 5818 info.vlan_info = HN_NDIS_VLAN_INFO_INVALID; 5819 info.csum_info = HN_NDIS_RXCSUM_INFO_INVALID; 5820 info.hash_info = HN_NDIS_HASH_INFO_INVALID; 5821 if (__predict_true(pktinfo_len != 0)) { 5822 bool overlap; 5823 int error; 5824 5825 if (__predict_false(pktinfo_off + pktinfo_len > pkt->rm_len)) { 5826 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 5827 "pktinfo overflow, msglen %u, " 5828 "pktinfo abs %d len %d\n", 5829 pkt->rm_len, pktinfo_off, pktinfo_len); 5830 return; 5831 } 5832 5833 /* 5834 * Check packet info coverage. 5835 */ 5836 overlap = hn_rndis_check_overlap(pktinfo_off, pktinfo_len, 5837 data_off, data_len); 5838 if (__predict_false(overlap)) { 5839 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 5840 "pktinfo overlap data, pktinfo abs %d len %d, " 5841 "data abs %d len %d\n", 5842 pktinfo_off, pktinfo_len, data_off, data_len); 5843 return; 5844 } 5845 5846 /* 5847 * Find useful per-packet-info. 5848 */ 5849 error = hn_rndis_rxinfo(((const uint8_t *)pkt) + pktinfo_off, 5850 pktinfo_len, &info); 5851 if (__predict_false(error)) { 5852 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg " 5853 "pktinfo\n"); 5854 return; 5855 } 5856 } 5857 5858 if (__predict_false(data_off + data_len > pkt->rm_len)) { 5859 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 5860 "data overflow, msglen %u, data abs %d len %d\n", 5861 pkt->rm_len, data_off, data_len); 5862 return; 5863 } 5864 hn_rxpkt(rxr, ((const uint8_t *)pkt) + data_off, data_len, &info); 5865 } 5866 5867 static __inline void 5868 hn_rndis_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen) 5869 { 5870 const struct rndis_msghdr *hdr; 5871 5872 if (__predict_false(dlen < sizeof(*hdr))) { 5873 if_printf(rxr->hn_ifp, "invalid RNDIS msg\n"); 5874 return; 5875 } 5876 hdr = data; 5877 5878 if (__predict_true(hdr->rm_type == REMOTE_NDIS_PACKET_MSG)) { 5879 /* Hot data path. */ 5880 hn_rndis_rx_data(rxr, data, dlen); 5881 /* Done! */ 5882 return; 5883 } 5884 5885 if (hdr->rm_type == REMOTE_NDIS_INDICATE_STATUS_MSG) 5886 hn_rndis_rx_status(rxr->hn_ifp->if_softc, data, dlen); 5887 else 5888 hn_rndis_rx_ctrl(rxr->hn_ifp->if_softc, data, dlen); 5889 } 5890 5891 static void 5892 hn_nvs_handle_notify(struct hn_softc *sc, const struct vmbus_chanpkt_hdr *pkt) 5893 { 5894 const struct hn_nvs_hdr *hdr; 5895 5896 if (VMBUS_CHANPKT_DATALEN(pkt) < sizeof(*hdr)) { 5897 if_printf(sc->hn_ifp, "invalid nvs notify\n"); 5898 return; 5899 } 5900 hdr = VMBUS_CHANPKT_CONST_DATA(pkt); 5901 5902 if (hdr->nvs_type == HN_NVS_TYPE_TXTBL_NOTE) { 5903 /* Useless; ignore */ 5904 return; 5905 } 5906 if_printf(sc->hn_ifp, "got notify, nvs type %u\n", hdr->nvs_type); 5907 } 5908 5909 static void 5910 hn_nvs_handle_comp(struct hn_softc *sc, struct vmbus_channel *chan, 5911 const struct vmbus_chanpkt_hdr *pkt) 5912 { 5913 struct hn_nvs_sendctx *sndc; 5914 5915 sndc = (struct hn_nvs_sendctx *)(uintptr_t)pkt->cph_xactid; 5916 sndc->hn_cb(sndc, sc, chan, VMBUS_CHANPKT_CONST_DATA(pkt), 5917 VMBUS_CHANPKT_DATALEN(pkt)); 5918 /* 5919 * NOTE: 5920 * 'sndc' CAN NOT be accessed anymore, since it can be freed by 5921 * its callback. 5922 */ 5923 } 5924 5925 static void 5926 hn_nvs_handle_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan, 5927 const struct vmbus_chanpkt_hdr *pkthdr) 5928 { 5929 const struct vmbus_chanpkt_rxbuf *pkt; 5930 const struct hn_nvs_hdr *nvs_hdr; 5931 int count, i, hlen; 5932 5933 if (__predict_false(VMBUS_CHANPKT_DATALEN(pkthdr) < sizeof(*nvs_hdr))) { 5934 if_printf(rxr->hn_ifp, "invalid nvs RNDIS\n"); 5935 return; 5936 } 5937 nvs_hdr = VMBUS_CHANPKT_CONST_DATA(pkthdr); 5938 5939 /* Make sure that this is a RNDIS message. */ 5940 if (__predict_false(nvs_hdr->nvs_type != HN_NVS_TYPE_RNDIS)) { 5941 if_printf(rxr->hn_ifp, "nvs type %u, not RNDIS\n", 5942 nvs_hdr->nvs_type); 5943 return; 5944 } 5945 5946 hlen = VMBUS_CHANPKT_GETLEN(pkthdr->cph_hlen); 5947 if (__predict_false(hlen < sizeof(*pkt))) { 5948 if_printf(rxr->hn_ifp, "invalid rxbuf chanpkt\n"); 5949 return; 5950 } 5951 pkt = (const struct vmbus_chanpkt_rxbuf *)pkthdr; 5952 5953 if (__predict_false(pkt->cp_rxbuf_id != HN_NVS_RXBUF_SIG)) { 5954 if_printf(rxr->hn_ifp, "invalid rxbuf_id 0x%08x\n", 5955 pkt->cp_rxbuf_id); 5956 return; 5957 } 5958 5959 count = pkt->cp_rxbuf_cnt; 5960 if (__predict_false(hlen < 5961 __offsetof(struct vmbus_chanpkt_rxbuf, cp_rxbuf[count]))) { 5962 if_printf(rxr->hn_ifp, "invalid rxbuf_cnt %d\n", count); 5963 return; 5964 } 5965 5966 /* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */ 5967 for (i = 0; i < count; ++i) { 5968 int ofs, len; 5969 5970 ofs = pkt->cp_rxbuf[i].rb_ofs; 5971 len = pkt->cp_rxbuf[i].rb_len; 5972 if (__predict_false(ofs + len > HN_RXBUF_SIZE)) { 5973 if_printf(rxr->hn_ifp, "%dth RNDIS msg overflow rxbuf, " 5974 "ofs %d, len %d\n", i, ofs, len); 5975 continue; 5976 } 5977 hn_rndis_rxpkt(rxr, rxr->hn_rxbuf + ofs, len); 5978 } 5979 5980 /* 5981 * Ack the consumed RXBUF associated w/ this channel packet, 5982 * so that this RXBUF can be recycled by the hypervisor. 5983 */ 5984 hn_nvs_ack_rxbuf(rxr, chan, pkt->cp_hdr.cph_xactid); 5985 } 5986 5987 static void 5988 hn_nvs_ack_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan, 5989 uint64_t tid) 5990 { 5991 struct hn_nvs_rndis_ack ack; 5992 int retries, error; 5993 5994 ack.nvs_type = HN_NVS_TYPE_RNDIS_ACK; 5995 ack.nvs_status = HN_NVS_STATUS_OK; 5996 5997 retries = 0; 5998 again: 5999 error = vmbus_chan_send(chan, VMBUS_CHANPKT_TYPE_COMP, 6000 VMBUS_CHANPKT_FLAG_NONE, &ack, sizeof(ack), tid); 6001 if (__predict_false(error == EAGAIN)) { 6002 /* 6003 * NOTE: 6004 * This should _not_ happen in real world, since the 6005 * consumption of the TX bufring from the TX path is 6006 * controlled. 6007 */ 6008 if (rxr->hn_ack_failed == 0) 6009 if_printf(rxr->hn_ifp, "RXBUF ack retry\n"); 6010 rxr->hn_ack_failed++; 6011 retries++; 6012 if (retries < 10) { 6013 DELAY(100); 6014 goto again; 6015 } 6016 /* RXBUF leaks! */ 6017 if_printf(rxr->hn_ifp, "RXBUF ack failed\n"); 6018 } 6019 } 6020 6021 static void 6022 hn_chan_callback(struct vmbus_channel *chan, void *xrxr) 6023 { 6024 struct hn_rx_ring *rxr = xrxr; 6025 struct hn_softc *sc = rxr->hn_ifp->if_softc; 6026 6027 for (;;) { 6028 struct vmbus_chanpkt_hdr *pkt = rxr->hn_pktbuf; 6029 int error, pktlen; 6030 6031 pktlen = rxr->hn_pktbuf_len; 6032 error = vmbus_chan_recv_pkt(chan, pkt, &pktlen); 6033 if (__predict_false(error == ENOBUFS)) { 6034 void *nbuf; 6035 int nlen; 6036 6037 /* 6038 * Expand channel packet buffer. 6039 * 6040 * XXX 6041 * Use M_WAITOK here, since allocation failure 6042 * is fatal. 6043 */ 6044 nlen = rxr->hn_pktbuf_len * 2; 6045 while (nlen < pktlen) 6046 nlen *= 2; 6047 nbuf = malloc(nlen, M_DEVBUF, M_WAITOK); 6048 6049 if_printf(rxr->hn_ifp, "expand pktbuf %d -> %d\n", 6050 rxr->hn_pktbuf_len, nlen); 6051 6052 free(rxr->hn_pktbuf, M_DEVBUF); 6053 rxr->hn_pktbuf = nbuf; 6054 rxr->hn_pktbuf_len = nlen; 6055 /* Retry! */ 6056 continue; 6057 } else if (__predict_false(error == EAGAIN)) { 6058 /* No more channel packets; done! */ 6059 break; 6060 } 6061 KASSERT(!error, ("vmbus_chan_recv_pkt failed: %d", error)); 6062 6063 switch (pkt->cph_type) { 6064 case VMBUS_CHANPKT_TYPE_COMP: 6065 hn_nvs_handle_comp(sc, chan, pkt); 6066 break; 6067 6068 case VMBUS_CHANPKT_TYPE_RXBUF: 6069 hn_nvs_handle_rxbuf(rxr, chan, pkt); 6070 break; 6071 6072 case VMBUS_CHANPKT_TYPE_INBAND: 6073 hn_nvs_handle_notify(sc, pkt); 6074 break; 6075 6076 default: 6077 if_printf(rxr->hn_ifp, "unknown chan pkt %u\n", 6078 pkt->cph_type); 6079 break; 6080 } 6081 } 6082 hn_chan_rollup(rxr, rxr->hn_txr); 6083 } 6084 6085 static void 6086 hn_sysinit(void *arg __unused) 6087 { 6088 int i; 6089 6090 /* 6091 * Initialize VF map. 6092 */ 6093 rm_init_flags(&hn_vfmap_lock, "hn_vfmap", RM_SLEEPABLE); 6094 hn_vfmap_size = HN_VFMAP_SIZE_DEF; 6095 hn_vfmap = malloc(sizeof(struct ifnet *) * hn_vfmap_size, M_DEVBUF, 6096 M_WAITOK | M_ZERO); 6097 6098 /* 6099 * Fix the # of TX taskqueues. 6100 */ 6101 if (hn_tx_taskq_cnt <= 0) 6102 hn_tx_taskq_cnt = 1; 6103 else if (hn_tx_taskq_cnt > mp_ncpus) 6104 hn_tx_taskq_cnt = mp_ncpus; 6105 6106 /* 6107 * Fix the TX taskqueue mode. 6108 */ 6109 switch (hn_tx_taskq_mode) { 6110 case HN_TX_TASKQ_M_INDEP: 6111 case HN_TX_TASKQ_M_GLOBAL: 6112 case HN_TX_TASKQ_M_EVTTQ: 6113 break; 6114 default: 6115 hn_tx_taskq_mode = HN_TX_TASKQ_M_INDEP; 6116 break; 6117 } 6118 6119 if (vm_guest != VM_GUEST_HV) 6120 return; 6121 6122 if (hn_tx_taskq_mode != HN_TX_TASKQ_M_GLOBAL) 6123 return; 6124 6125 hn_tx_taskque = malloc(hn_tx_taskq_cnt * sizeof(struct taskqueue *), 6126 M_DEVBUF, M_WAITOK); 6127 for (i = 0; i < hn_tx_taskq_cnt; ++i) { 6128 hn_tx_taskque[i] = taskqueue_create("hn_tx", M_WAITOK, 6129 taskqueue_thread_enqueue, &hn_tx_taskque[i]); 6130 taskqueue_start_threads(&hn_tx_taskque[i], 1, PI_NET, 6131 "hn tx%d", i); 6132 } 6133 } 6134 SYSINIT(hn_sysinit, SI_SUB_DRIVERS, SI_ORDER_SECOND, hn_sysinit, NULL); 6135 6136 static void 6137 hn_sysuninit(void *arg __unused) 6138 { 6139 6140 if (hn_tx_taskque != NULL) { 6141 int i; 6142 6143 for (i = 0; i < hn_tx_taskq_cnt; ++i) 6144 taskqueue_free(hn_tx_taskque[i]); 6145 free(hn_tx_taskque, M_DEVBUF); 6146 } 6147 6148 if (hn_vfmap != NULL) 6149 free(hn_vfmap, M_DEVBUF); 6150 rm_destroy(&hn_vfmap_lock); 6151 } 6152 SYSUNINIT(hn_sysuninit, SI_SUB_DRIVERS, SI_ORDER_SECOND, hn_sysuninit, NULL); 6153