1 /*- 2 * Copyright (c) 2010-2012 Citrix Inc. 3 * Copyright (c) 2009-2012,2016 Microsoft Corp. 4 * Copyright (c) 2012 NetApp Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /*- 30 * Copyright (c) 2004-2006 Kip Macy 31 * All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 45 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 52 * SUCH DAMAGE. 53 */ 54 55 #include <sys/cdefs.h> 56 __FBSDID("$FreeBSD$"); 57 58 #include "opt_hn.h" 59 #include "opt_inet6.h" 60 #include "opt_inet.h" 61 #include "opt_rss.h" 62 63 #include <sys/param.h> 64 #include <sys/bus.h> 65 #include <sys/kernel.h> 66 #include <sys/limits.h> 67 #include <sys/malloc.h> 68 #include <sys/mbuf.h> 69 #include <sys/module.h> 70 #include <sys/queue.h> 71 #include <sys/lock.h> 72 #include <sys/smp.h> 73 #include <sys/socket.h> 74 #include <sys/sockio.h> 75 #include <sys/sx.h> 76 #include <sys/sysctl.h> 77 #include <sys/systm.h> 78 #include <sys/taskqueue.h> 79 #include <sys/buf_ring.h> 80 #include <sys/eventhandler.h> 81 82 #include <machine/atomic.h> 83 #include <machine/in_cksum.h> 84 85 #include <net/bpf.h> 86 #include <net/ethernet.h> 87 #include <net/if.h> 88 #include <net/if_dl.h> 89 #include <net/if_media.h> 90 #include <net/if_types.h> 91 #include <net/if_var.h> 92 #include <net/rndis.h> 93 #ifdef RSS 94 #include <net/rss_config.h> 95 #endif 96 97 #include <netinet/in_systm.h> 98 #include <netinet/in.h> 99 #include <netinet/ip.h> 100 #include <netinet/ip6.h> 101 #include <netinet/tcp.h> 102 #include <netinet/tcp_lro.h> 103 #include <netinet/udp.h> 104 105 #include <dev/hyperv/include/hyperv.h> 106 #include <dev/hyperv/include/hyperv_busdma.h> 107 #include <dev/hyperv/include/vmbus.h> 108 #include <dev/hyperv/include/vmbus_xact.h> 109 110 #include <dev/hyperv/netvsc/ndis.h> 111 #include <dev/hyperv/netvsc/if_hnreg.h> 112 #include <dev/hyperv/netvsc/if_hnvar.h> 113 #include <dev/hyperv/netvsc/hn_nvs.h> 114 #include <dev/hyperv/netvsc/hn_rndis.h> 115 116 #include "vmbus_if.h" 117 118 #define HN_IFSTART_SUPPORT 119 120 #define HN_RING_CNT_DEF_MAX 8 121 122 /* YYY should get it from the underlying channel */ 123 #define HN_TX_DESC_CNT 512 124 125 #define HN_RNDIS_PKT_LEN \ 126 (sizeof(struct rndis_packet_msg) + \ 127 HN_RNDIS_PKTINFO_SIZE(HN_NDIS_HASH_VALUE_SIZE) + \ 128 HN_RNDIS_PKTINFO_SIZE(NDIS_VLAN_INFO_SIZE) + \ 129 HN_RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) + \ 130 HN_RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE)) 131 #define HN_RNDIS_PKT_BOUNDARY PAGE_SIZE 132 #define HN_RNDIS_PKT_ALIGN CACHE_LINE_SIZE 133 134 #define HN_TX_DATA_BOUNDARY PAGE_SIZE 135 #define HN_TX_DATA_MAXSIZE IP_MAXPACKET 136 #define HN_TX_DATA_SEGSIZE PAGE_SIZE 137 /* -1 for RNDIS packet message */ 138 #define HN_TX_DATA_SEGCNT_MAX (HN_GPACNT_MAX - 1) 139 140 #define HN_DIRECT_TX_SIZE_DEF 128 141 142 #define HN_EARLY_TXEOF_THRESH 8 143 144 #define HN_PKTBUF_LEN_DEF (16 * 1024) 145 146 #define HN_LROENT_CNT_DEF 128 147 148 #define HN_LRO_LENLIM_MULTIRX_DEF (12 * ETHERMTU) 149 #define HN_LRO_LENLIM_DEF (25 * ETHERMTU) 150 /* YYY 2*MTU is a bit rough, but should be good enough. */ 151 #define HN_LRO_LENLIM_MIN(ifp) (2 * (ifp)->if_mtu) 152 153 #define HN_LRO_ACKCNT_DEF 1 154 155 #define HN_LOCK_INIT(sc) \ 156 sx_init(&(sc)->hn_lock, device_get_nameunit((sc)->hn_dev)) 157 #define HN_LOCK_DESTROY(sc) sx_destroy(&(sc)->hn_lock) 158 #define HN_LOCK_ASSERT(sc) sx_assert(&(sc)->hn_lock, SA_XLOCKED) 159 #define HN_LOCK(sc) \ 160 do { \ 161 while (sx_try_xlock(&(sc)->hn_lock) == 0) \ 162 DELAY(1000); \ 163 } while (0) 164 #define HN_UNLOCK(sc) sx_xunlock(&(sc)->hn_lock) 165 166 #define HN_CSUM_IP_MASK (CSUM_IP | CSUM_IP_TCP | CSUM_IP_UDP) 167 #define HN_CSUM_IP6_MASK (CSUM_IP6_TCP | CSUM_IP6_UDP) 168 #define HN_CSUM_IP_HWASSIST(sc) \ 169 ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP_MASK) 170 #define HN_CSUM_IP6_HWASSIST(sc) \ 171 ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP6_MASK) 172 173 #define HN_PKTSIZE_MIN(align) \ 174 roundup2(ETHER_MIN_LEN + ETHER_VLAN_ENCAP_LEN - ETHER_CRC_LEN + \ 175 HN_RNDIS_PKT_LEN, (align)) 176 #define HN_PKTSIZE(m, align) \ 177 roundup2((m)->m_pkthdr.len + HN_RNDIS_PKT_LEN, (align)) 178 179 #ifdef RSS 180 #define HN_RING_IDX2CPU(sc, idx) rss_getcpu((idx) % rss_getnumbuckets()) 181 #else 182 #define HN_RING_IDX2CPU(sc, idx) (((sc)->hn_cpu + (idx)) % mp_ncpus) 183 #endif 184 185 struct hn_txdesc { 186 #ifndef HN_USE_TXDESC_BUFRING 187 SLIST_ENTRY(hn_txdesc) link; 188 #endif 189 STAILQ_ENTRY(hn_txdesc) agg_link; 190 191 /* Aggregated txdescs, in sending order. */ 192 STAILQ_HEAD(, hn_txdesc) agg_list; 193 194 /* The oldest packet, if transmission aggregation happens. */ 195 struct mbuf *m; 196 struct hn_tx_ring *txr; 197 int refs; 198 uint32_t flags; /* HN_TXD_FLAG_ */ 199 struct hn_nvs_sendctx send_ctx; 200 uint32_t chim_index; 201 int chim_size; 202 203 bus_dmamap_t data_dmap; 204 205 bus_addr_t rndis_pkt_paddr; 206 struct rndis_packet_msg *rndis_pkt; 207 bus_dmamap_t rndis_pkt_dmap; 208 }; 209 210 #define HN_TXD_FLAG_ONLIST 0x0001 211 #define HN_TXD_FLAG_DMAMAP 0x0002 212 #define HN_TXD_FLAG_ONAGG 0x0004 213 214 struct hn_rxinfo { 215 uint32_t vlan_info; 216 uint32_t csum_info; 217 uint32_t hash_info; 218 uint32_t hash_value; 219 }; 220 221 struct hn_update_vf { 222 struct hn_rx_ring *rxr; 223 struct ifnet *vf; 224 }; 225 226 #define HN_RXINFO_VLAN 0x0001 227 #define HN_RXINFO_CSUM 0x0002 228 #define HN_RXINFO_HASHINF 0x0004 229 #define HN_RXINFO_HASHVAL 0x0008 230 #define HN_RXINFO_ALL \ 231 (HN_RXINFO_VLAN | \ 232 HN_RXINFO_CSUM | \ 233 HN_RXINFO_HASHINF | \ 234 HN_RXINFO_HASHVAL) 235 236 #define HN_NDIS_VLAN_INFO_INVALID 0xffffffff 237 #define HN_NDIS_RXCSUM_INFO_INVALID 0 238 #define HN_NDIS_HASH_INFO_INVALID 0 239 240 static int hn_probe(device_t); 241 static int hn_attach(device_t); 242 static int hn_detach(device_t); 243 static int hn_shutdown(device_t); 244 static void hn_chan_callback(struct vmbus_channel *, 245 void *); 246 247 static void hn_init(void *); 248 static int hn_ioctl(struct ifnet *, u_long, caddr_t); 249 #ifdef HN_IFSTART_SUPPORT 250 static void hn_start(struct ifnet *); 251 #endif 252 static int hn_transmit(struct ifnet *, struct mbuf *); 253 static void hn_xmit_qflush(struct ifnet *); 254 static int hn_ifmedia_upd(struct ifnet *); 255 static void hn_ifmedia_sts(struct ifnet *, 256 struct ifmediareq *); 257 258 static int hn_rndis_rxinfo(const void *, int, 259 struct hn_rxinfo *); 260 static void hn_rndis_rx_data(struct hn_rx_ring *, 261 const void *, int); 262 static void hn_rndis_rx_status(struct hn_softc *, 263 const void *, int); 264 static void hn_rndis_init_fixat(struct hn_softc *, int); 265 266 static void hn_nvs_handle_notify(struct hn_softc *, 267 const struct vmbus_chanpkt_hdr *); 268 static void hn_nvs_handle_comp(struct hn_softc *, 269 struct vmbus_channel *, 270 const struct vmbus_chanpkt_hdr *); 271 static void hn_nvs_handle_rxbuf(struct hn_rx_ring *, 272 struct vmbus_channel *, 273 const struct vmbus_chanpkt_hdr *); 274 static void hn_nvs_ack_rxbuf(struct hn_rx_ring *, 275 struct vmbus_channel *, uint64_t); 276 277 #if __FreeBSD_version >= 1100099 278 static int hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS); 279 static int hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS); 280 #endif 281 static int hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS); 282 static int hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS); 283 #if __FreeBSD_version < 1100095 284 static int hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS); 285 #else 286 static int hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS); 287 #endif 288 static int hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS); 289 static int hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS); 290 static int hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS); 291 static int hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS); 292 static int hn_caps_sysctl(SYSCTL_HANDLER_ARGS); 293 static int hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS); 294 static int hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS); 295 #ifndef RSS 296 static int hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS); 297 static int hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS); 298 #endif 299 static int hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS); 300 static int hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS); 301 static int hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS); 302 static int hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS); 303 static int hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS); 304 static int hn_polling_sysctl(SYSCTL_HANDLER_ARGS); 305 static int hn_vf_sysctl(SYSCTL_HANDLER_ARGS); 306 307 static void hn_stop(struct hn_softc *, bool); 308 static void hn_init_locked(struct hn_softc *); 309 static int hn_chan_attach(struct hn_softc *, 310 struct vmbus_channel *); 311 static void hn_chan_detach(struct hn_softc *, 312 struct vmbus_channel *); 313 static int hn_attach_subchans(struct hn_softc *); 314 static void hn_detach_allchans(struct hn_softc *); 315 static void hn_chan_rollup(struct hn_rx_ring *, 316 struct hn_tx_ring *); 317 static void hn_set_ring_inuse(struct hn_softc *, int); 318 static int hn_synth_attach(struct hn_softc *, int); 319 static void hn_synth_detach(struct hn_softc *); 320 static int hn_synth_alloc_subchans(struct hn_softc *, 321 int *); 322 static bool hn_synth_attachable(const struct hn_softc *); 323 static void hn_suspend(struct hn_softc *); 324 static void hn_suspend_data(struct hn_softc *); 325 static void hn_suspend_mgmt(struct hn_softc *); 326 static void hn_resume(struct hn_softc *); 327 static void hn_resume_data(struct hn_softc *); 328 static void hn_resume_mgmt(struct hn_softc *); 329 static void hn_suspend_mgmt_taskfunc(void *, int); 330 static void hn_chan_drain(struct hn_softc *, 331 struct vmbus_channel *); 332 static void hn_disable_rx(struct hn_softc *); 333 static void hn_drain_rxtx(struct hn_softc *, int); 334 static void hn_polling(struct hn_softc *, u_int); 335 static void hn_chan_polling(struct vmbus_channel *, u_int); 336 337 static void hn_update_link_status(struct hn_softc *); 338 static void hn_change_network(struct hn_softc *); 339 static void hn_link_taskfunc(void *, int); 340 static void hn_netchg_init_taskfunc(void *, int); 341 static void hn_netchg_status_taskfunc(void *, int); 342 static void hn_link_status(struct hn_softc *); 343 344 static int hn_create_rx_data(struct hn_softc *, int); 345 static void hn_destroy_rx_data(struct hn_softc *); 346 static int hn_check_iplen(const struct mbuf *, int); 347 static int hn_set_rxfilter(struct hn_softc *, uint32_t); 348 static int hn_rxfilter_config(struct hn_softc *); 349 #ifndef RSS 350 static int hn_rss_reconfig(struct hn_softc *); 351 #endif 352 static void hn_rss_ind_fixup(struct hn_softc *); 353 static int hn_rxpkt(struct hn_rx_ring *, const void *, 354 int, const struct hn_rxinfo *); 355 356 static int hn_tx_ring_create(struct hn_softc *, int); 357 static void hn_tx_ring_destroy(struct hn_tx_ring *); 358 static int hn_create_tx_data(struct hn_softc *, int); 359 static void hn_fixup_tx_data(struct hn_softc *); 360 static void hn_destroy_tx_data(struct hn_softc *); 361 static void hn_txdesc_dmamap_destroy(struct hn_txdesc *); 362 static void hn_txdesc_gc(struct hn_tx_ring *, 363 struct hn_txdesc *); 364 static int hn_encap(struct ifnet *, struct hn_tx_ring *, 365 struct hn_txdesc *, struct mbuf **); 366 static int hn_txpkt(struct ifnet *, struct hn_tx_ring *, 367 struct hn_txdesc *); 368 static void hn_set_chim_size(struct hn_softc *, int); 369 static void hn_set_tso_maxsize(struct hn_softc *, int, int); 370 static bool hn_tx_ring_pending(struct hn_tx_ring *); 371 static void hn_tx_ring_qflush(struct hn_tx_ring *); 372 static void hn_resume_tx(struct hn_softc *, int); 373 static void hn_set_txagg(struct hn_softc *); 374 static void *hn_try_txagg(struct ifnet *, 375 struct hn_tx_ring *, struct hn_txdesc *, 376 int); 377 static int hn_get_txswq_depth(const struct hn_tx_ring *); 378 static void hn_txpkt_done(struct hn_nvs_sendctx *, 379 struct hn_softc *, struct vmbus_channel *, 380 const void *, int); 381 static int hn_txpkt_sglist(struct hn_tx_ring *, 382 struct hn_txdesc *); 383 static int hn_txpkt_chim(struct hn_tx_ring *, 384 struct hn_txdesc *); 385 static int hn_xmit(struct hn_tx_ring *, int); 386 static void hn_xmit_taskfunc(void *, int); 387 static void hn_xmit_txeof(struct hn_tx_ring *); 388 static void hn_xmit_txeof_taskfunc(void *, int); 389 #ifdef HN_IFSTART_SUPPORT 390 static int hn_start_locked(struct hn_tx_ring *, int); 391 static void hn_start_taskfunc(void *, int); 392 static void hn_start_txeof(struct hn_tx_ring *); 393 static void hn_start_txeof_taskfunc(void *, int); 394 #endif 395 396 SYSCTL_NODE(_hw, OID_AUTO, hn, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 397 "Hyper-V network interface"); 398 399 /* Trust tcp segements verification on host side. */ 400 static int hn_trust_hosttcp = 1; 401 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hosttcp, CTLFLAG_RDTUN, 402 &hn_trust_hosttcp, 0, 403 "Trust tcp segement verification on host side, " 404 "when csum info is missing (global setting)"); 405 406 /* Trust udp datagrams verification on host side. */ 407 static int hn_trust_hostudp = 1; 408 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostudp, CTLFLAG_RDTUN, 409 &hn_trust_hostudp, 0, 410 "Trust udp datagram verification on host side, " 411 "when csum info is missing (global setting)"); 412 413 /* Trust ip packets verification on host side. */ 414 static int hn_trust_hostip = 1; 415 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostip, CTLFLAG_RDTUN, 416 &hn_trust_hostip, 0, 417 "Trust ip packet verification on host side, " 418 "when csum info is missing (global setting)"); 419 420 /* Limit TSO burst size */ 421 static int hn_tso_maxlen = IP_MAXPACKET; 422 SYSCTL_INT(_hw_hn, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN, 423 &hn_tso_maxlen, 0, "TSO burst limit"); 424 425 /* Limit chimney send size */ 426 static int hn_tx_chimney_size = 0; 427 SYSCTL_INT(_hw_hn, OID_AUTO, tx_chimney_size, CTLFLAG_RDTUN, 428 &hn_tx_chimney_size, 0, "Chimney send packet size limit"); 429 430 /* Limit the size of packet for direct transmission */ 431 static int hn_direct_tx_size = HN_DIRECT_TX_SIZE_DEF; 432 SYSCTL_INT(_hw_hn, OID_AUTO, direct_tx_size, CTLFLAG_RDTUN, 433 &hn_direct_tx_size, 0, "Size of the packet for direct transmission"); 434 435 /* # of LRO entries per RX ring */ 436 #if defined(INET) || defined(INET6) 437 #if __FreeBSD_version >= 1100095 438 static int hn_lro_entry_count = HN_LROENT_CNT_DEF; 439 SYSCTL_INT(_hw_hn, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN, 440 &hn_lro_entry_count, 0, "LRO entry count"); 441 #endif 442 #endif 443 444 static int hn_tx_taskq_cnt = 1; 445 SYSCTL_INT(_hw_hn, OID_AUTO, tx_taskq_cnt, CTLFLAG_RDTUN, 446 &hn_tx_taskq_cnt, 0, "# of TX taskqueues"); 447 448 #define HN_TX_TASKQ_M_INDEP 0 449 #define HN_TX_TASKQ_M_GLOBAL 1 450 #define HN_TX_TASKQ_M_EVTTQ 2 451 452 static int hn_tx_taskq_mode = HN_TX_TASKQ_M_INDEP; 453 SYSCTL_INT(_hw_hn, OID_AUTO, tx_taskq_mode, CTLFLAG_RDTUN, 454 &hn_tx_taskq_mode, 0, "TX taskqueue modes: " 455 "0 - independent, 1 - share global tx taskqs, 2 - share event taskqs"); 456 457 #ifndef HN_USE_TXDESC_BUFRING 458 static int hn_use_txdesc_bufring = 0; 459 #else 460 static int hn_use_txdesc_bufring = 1; 461 #endif 462 SYSCTL_INT(_hw_hn, OID_AUTO, use_txdesc_bufring, CTLFLAG_RD, 463 &hn_use_txdesc_bufring, 0, "Use buf_ring for TX descriptors"); 464 465 #ifdef HN_IFSTART_SUPPORT 466 /* Use ifnet.if_start instead of ifnet.if_transmit */ 467 static int hn_use_if_start = 0; 468 SYSCTL_INT(_hw_hn, OID_AUTO, use_if_start, CTLFLAG_RDTUN, 469 &hn_use_if_start, 0, "Use if_start TX method"); 470 #endif 471 472 /* # of channels to use */ 473 static int hn_chan_cnt = 0; 474 SYSCTL_INT(_hw_hn, OID_AUTO, chan_cnt, CTLFLAG_RDTUN, 475 &hn_chan_cnt, 0, 476 "# of channels to use; each channel has one RX ring and one TX ring"); 477 478 /* # of transmit rings to use */ 479 static int hn_tx_ring_cnt = 0; 480 SYSCTL_INT(_hw_hn, OID_AUTO, tx_ring_cnt, CTLFLAG_RDTUN, 481 &hn_tx_ring_cnt, 0, "# of TX rings to use"); 482 483 /* Software TX ring deptch */ 484 static int hn_tx_swq_depth = 0; 485 SYSCTL_INT(_hw_hn, OID_AUTO, tx_swq_depth, CTLFLAG_RDTUN, 486 &hn_tx_swq_depth, 0, "Depth of IFQ or BUFRING"); 487 488 /* Enable sorted LRO, and the depth of the per-channel mbuf queue */ 489 #if __FreeBSD_version >= 1100095 490 static u_int hn_lro_mbufq_depth = 0; 491 SYSCTL_UINT(_hw_hn, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN, 492 &hn_lro_mbufq_depth, 0, "Depth of LRO mbuf queue"); 493 #endif 494 495 /* Packet transmission aggregation size limit */ 496 static int hn_tx_agg_size = -1; 497 SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_size, CTLFLAG_RDTUN, 498 &hn_tx_agg_size, 0, "Packet transmission aggregation size limit"); 499 500 /* Packet transmission aggregation count limit */ 501 static int hn_tx_agg_pkts = -1; 502 SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_pkts, CTLFLAG_RDTUN, 503 &hn_tx_agg_pkts, 0, "Packet transmission aggregation packet limit"); 504 505 static u_int hn_cpu_index; /* next CPU for channel */ 506 static struct taskqueue **hn_tx_taskque;/* shared TX taskqueues */ 507 508 #ifndef RSS 509 static const uint8_t 510 hn_rss_key_default[NDIS_HASH_KEYSIZE_TOEPLITZ] = { 511 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 512 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 513 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, 514 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, 515 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa 516 }; 517 #endif /* !RSS */ 518 519 static device_method_t hn_methods[] = { 520 /* Device interface */ 521 DEVMETHOD(device_probe, hn_probe), 522 DEVMETHOD(device_attach, hn_attach), 523 DEVMETHOD(device_detach, hn_detach), 524 DEVMETHOD(device_shutdown, hn_shutdown), 525 DEVMETHOD_END 526 }; 527 528 static driver_t hn_driver = { 529 "hn", 530 hn_methods, 531 sizeof(struct hn_softc) 532 }; 533 534 static devclass_t hn_devclass; 535 536 DRIVER_MODULE(hn, vmbus, hn_driver, hn_devclass, 0, 0); 537 MODULE_VERSION(hn, 1); 538 MODULE_DEPEND(hn, vmbus, 1, 1, 1); 539 540 #if __FreeBSD_version >= 1100099 541 static void 542 hn_set_lro_lenlim(struct hn_softc *sc, int lenlim) 543 { 544 int i; 545 546 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 547 sc->hn_rx_ring[i].hn_lro.lro_length_lim = lenlim; 548 } 549 #endif 550 551 static int 552 hn_txpkt_sglist(struct hn_tx_ring *txr, struct hn_txdesc *txd) 553 { 554 555 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID && 556 txd->chim_size == 0, ("invalid rndis sglist txd")); 557 return (hn_nvs_send_rndis_sglist(txr->hn_chan, HN_NVS_RNDIS_MTYPE_DATA, 558 &txd->send_ctx, txr->hn_gpa, txr->hn_gpa_cnt)); 559 } 560 561 static int 562 hn_txpkt_chim(struct hn_tx_ring *txr, struct hn_txdesc *txd) 563 { 564 struct hn_nvs_rndis rndis; 565 566 KASSERT(txd->chim_index != HN_NVS_CHIM_IDX_INVALID && 567 txd->chim_size > 0, ("invalid rndis chim txd")); 568 569 rndis.nvs_type = HN_NVS_TYPE_RNDIS; 570 rndis.nvs_rndis_mtype = HN_NVS_RNDIS_MTYPE_DATA; 571 rndis.nvs_chim_idx = txd->chim_index; 572 rndis.nvs_chim_sz = txd->chim_size; 573 574 return (hn_nvs_send(txr->hn_chan, VMBUS_CHANPKT_FLAG_RC, 575 &rndis, sizeof(rndis), &txd->send_ctx)); 576 } 577 578 static __inline uint32_t 579 hn_chim_alloc(struct hn_softc *sc) 580 { 581 int i, bmap_cnt = sc->hn_chim_bmap_cnt; 582 u_long *bmap = sc->hn_chim_bmap; 583 uint32_t ret = HN_NVS_CHIM_IDX_INVALID; 584 585 for (i = 0; i < bmap_cnt; ++i) { 586 int idx; 587 588 idx = ffsl(~bmap[i]); 589 if (idx == 0) 590 continue; 591 592 --idx; /* ffsl is 1-based */ 593 KASSERT(i * LONG_BIT + idx < sc->hn_chim_cnt, 594 ("invalid i %d and idx %d", i, idx)); 595 596 if (atomic_testandset_long(&bmap[i], idx)) 597 continue; 598 599 ret = i * LONG_BIT + idx; 600 break; 601 } 602 return (ret); 603 } 604 605 static __inline void 606 hn_chim_free(struct hn_softc *sc, uint32_t chim_idx) 607 { 608 u_long mask; 609 uint32_t idx; 610 611 idx = chim_idx / LONG_BIT; 612 KASSERT(idx < sc->hn_chim_bmap_cnt, 613 ("invalid chimney index 0x%x", chim_idx)); 614 615 mask = 1UL << (chim_idx % LONG_BIT); 616 KASSERT(sc->hn_chim_bmap[idx] & mask, 617 ("index bitmap 0x%lx, chimney index %u, " 618 "bitmap idx %d, bitmask 0x%lx", 619 sc->hn_chim_bmap[idx], chim_idx, idx, mask)); 620 621 atomic_clear_long(&sc->hn_chim_bmap[idx], mask); 622 } 623 624 #if defined(INET6) || defined(INET) 625 626 #define PULLUP_HDR(m, len) \ 627 do { \ 628 if (__predict_false((m)->m_len < (len))) { \ 629 (m) = m_pullup((m), (len)); \ 630 if ((m) == NULL) \ 631 return (NULL); \ 632 } \ 633 } while (0) 634 635 /* 636 * NOTE: If this function failed, the m_head would be freed. 637 */ 638 static __inline struct mbuf * 639 hn_tso_fixup(struct mbuf *m_head) 640 { 641 struct ether_vlan_header *evl; 642 struct tcphdr *th; 643 int ehlen; 644 645 KASSERT(M_WRITABLE(m_head), ("TSO mbuf not writable")); 646 647 PULLUP_HDR(m_head, sizeof(*evl)); 648 evl = mtod(m_head, struct ether_vlan_header *); 649 if (evl->evl_encap_proto == ntohs(ETHERTYPE_VLAN)) 650 ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 651 else 652 ehlen = ETHER_HDR_LEN; 653 654 #ifdef INET 655 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) { 656 struct ip *ip; 657 int iphlen; 658 659 PULLUP_HDR(m_head, ehlen + sizeof(*ip)); 660 ip = mtodo(m_head, ehlen); 661 iphlen = ip->ip_hl << 2; 662 663 PULLUP_HDR(m_head, ehlen + iphlen + sizeof(*th)); 664 th = mtodo(m_head, ehlen + iphlen); 665 666 ip->ip_len = 0; 667 ip->ip_sum = 0; 668 th->th_sum = in_pseudo(ip->ip_src.s_addr, 669 ip->ip_dst.s_addr, htons(IPPROTO_TCP)); 670 } 671 #endif 672 #if defined(INET6) && defined(INET) 673 else 674 #endif 675 #ifdef INET6 676 { 677 struct ip6_hdr *ip6; 678 679 PULLUP_HDR(m_head, ehlen + sizeof(*ip6)); 680 ip6 = mtodo(m_head, ehlen); 681 if (ip6->ip6_nxt != IPPROTO_TCP) { 682 m_freem(m_head); 683 return (NULL); 684 } 685 686 PULLUP_HDR(m_head, ehlen + sizeof(*ip6) + sizeof(*th)); 687 th = mtodo(m_head, ehlen + sizeof(*ip6)); 688 689 ip6->ip6_plen = 0; 690 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); 691 } 692 #endif 693 return (m_head); 694 695 } 696 697 /* 698 * NOTE: If this function failed, the m_head would be freed. 699 */ 700 static __inline struct mbuf * 701 hn_check_tcpsyn(struct mbuf *m_head, int *tcpsyn) 702 { 703 const struct ether_vlan_header *evl; 704 const struct tcphdr *th; 705 int ehlen; 706 707 *tcpsyn = 0; 708 709 PULLUP_HDR(m_head, sizeof(*evl)); 710 evl = mtod(m_head, const struct ether_vlan_header *); 711 if (evl->evl_encap_proto == ntohs(ETHERTYPE_VLAN)) 712 ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 713 else 714 ehlen = ETHER_HDR_LEN; 715 716 #ifdef INET 717 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TCP) { 718 const struct ip *ip; 719 int iphlen; 720 721 PULLUP_HDR(m_head, ehlen + sizeof(*ip)); 722 ip = mtodo(m_head, ehlen); 723 iphlen = ip->ip_hl << 2; 724 725 PULLUP_HDR(m_head, ehlen + iphlen + sizeof(*th)); 726 th = mtodo(m_head, ehlen + iphlen); 727 if (th->th_flags & TH_SYN) 728 *tcpsyn = 1; 729 } 730 #endif 731 #if defined(INET6) && defined(INET) 732 else 733 #endif 734 #ifdef INET6 735 { 736 const struct ip6_hdr *ip6; 737 738 PULLUP_HDR(m_head, ehlen + sizeof(*ip6)); 739 ip6 = mtodo(m_head, ehlen); 740 if (ip6->ip6_nxt != IPPROTO_TCP) 741 return (m_head); 742 743 PULLUP_HDR(m_head, ehlen + sizeof(*ip6) + sizeof(*th)); 744 th = mtodo(m_head, ehlen + sizeof(*ip6)); 745 if (th->th_flags & TH_SYN) 746 *tcpsyn = 1; 747 } 748 #endif 749 return (m_head); 750 } 751 752 #undef PULLUP_HDR 753 754 #endif /* INET6 || INET */ 755 756 static int 757 hn_set_rxfilter(struct hn_softc *sc, uint32_t filter) 758 { 759 int error = 0; 760 761 HN_LOCK_ASSERT(sc); 762 763 if (sc->hn_rx_filter != filter) { 764 error = hn_rndis_set_rxfilter(sc, filter); 765 if (!error) 766 sc->hn_rx_filter = filter; 767 } 768 return (error); 769 } 770 771 static int 772 hn_rxfilter_config(struct hn_softc *sc) 773 { 774 struct ifnet *ifp = sc->hn_ifp; 775 uint32_t filter; 776 777 HN_LOCK_ASSERT(sc); 778 779 if ((ifp->if_flags & IFF_PROMISC) || 780 (sc->hn_flags & HN_FLAG_VF)) { 781 filter = NDIS_PACKET_TYPE_PROMISCUOUS; 782 } else { 783 filter = NDIS_PACKET_TYPE_DIRECTED; 784 if (ifp->if_flags & IFF_BROADCAST) 785 filter |= NDIS_PACKET_TYPE_BROADCAST; 786 /* TODO: support multicast list */ 787 if ((ifp->if_flags & IFF_ALLMULTI) || 788 !TAILQ_EMPTY(&ifp->if_multiaddrs)) 789 filter |= NDIS_PACKET_TYPE_ALL_MULTICAST; 790 } 791 return (hn_set_rxfilter(sc, filter)); 792 } 793 794 static void 795 hn_set_txagg(struct hn_softc *sc) 796 { 797 uint32_t size, pkts; 798 int i; 799 800 /* 801 * Setup aggregation size. 802 */ 803 if (sc->hn_agg_size < 0) 804 size = UINT32_MAX; 805 else 806 size = sc->hn_agg_size; 807 808 if (sc->hn_rndis_agg_size < size) 809 size = sc->hn_rndis_agg_size; 810 811 /* NOTE: We only aggregate packets using chimney sending buffers. */ 812 if (size > (uint32_t)sc->hn_chim_szmax) 813 size = sc->hn_chim_szmax; 814 815 if (size <= 2 * HN_PKTSIZE_MIN(sc->hn_rndis_agg_align)) { 816 /* Disable */ 817 size = 0; 818 pkts = 0; 819 goto done; 820 } 821 822 /* NOTE: Type of the per TX ring setting is 'int'. */ 823 if (size > INT_MAX) 824 size = INT_MAX; 825 826 /* 827 * Setup aggregation packet count. 828 */ 829 if (sc->hn_agg_pkts < 0) 830 pkts = UINT32_MAX; 831 else 832 pkts = sc->hn_agg_pkts; 833 834 if (sc->hn_rndis_agg_pkts < pkts) 835 pkts = sc->hn_rndis_agg_pkts; 836 837 if (pkts <= 1) { 838 /* Disable */ 839 size = 0; 840 pkts = 0; 841 goto done; 842 } 843 844 /* NOTE: Type of the per TX ring setting is 'short'. */ 845 if (pkts > SHRT_MAX) 846 pkts = SHRT_MAX; 847 848 done: 849 /* NOTE: Type of the per TX ring setting is 'short'. */ 850 if (sc->hn_rndis_agg_align > SHRT_MAX) { 851 /* Disable */ 852 size = 0; 853 pkts = 0; 854 } 855 856 if (bootverbose) { 857 if_printf(sc->hn_ifp, "TX agg size %u, pkts %u, align %u\n", 858 size, pkts, sc->hn_rndis_agg_align); 859 } 860 861 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 862 struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; 863 864 mtx_lock(&txr->hn_tx_lock); 865 txr->hn_agg_szmax = size; 866 txr->hn_agg_pktmax = pkts; 867 txr->hn_agg_align = sc->hn_rndis_agg_align; 868 mtx_unlock(&txr->hn_tx_lock); 869 } 870 } 871 872 static int 873 hn_get_txswq_depth(const struct hn_tx_ring *txr) 874 { 875 876 KASSERT(txr->hn_txdesc_cnt > 0, ("tx ring is not setup yet")); 877 if (hn_tx_swq_depth < txr->hn_txdesc_cnt) 878 return txr->hn_txdesc_cnt; 879 return hn_tx_swq_depth; 880 } 881 882 #ifndef RSS 883 static int 884 hn_rss_reconfig(struct hn_softc *sc) 885 { 886 int error; 887 888 HN_LOCK_ASSERT(sc); 889 890 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) 891 return (ENXIO); 892 893 /* 894 * Disable RSS first. 895 * 896 * NOTE: 897 * Direct reconfiguration by setting the UNCHG flags does 898 * _not_ work properly. 899 */ 900 if (bootverbose) 901 if_printf(sc->hn_ifp, "disable RSS\n"); 902 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_DISABLE); 903 if (error) { 904 if_printf(sc->hn_ifp, "RSS disable failed\n"); 905 return (error); 906 } 907 908 /* 909 * Reenable the RSS w/ the updated RSS key or indirect 910 * table. 911 */ 912 if (bootverbose) 913 if_printf(sc->hn_ifp, "reconfig RSS\n"); 914 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE); 915 if (error) { 916 if_printf(sc->hn_ifp, "RSS reconfig failed\n"); 917 return (error); 918 } 919 return (0); 920 } 921 #endif /* !RSS */ 922 923 static void 924 hn_rss_ind_fixup(struct hn_softc *sc) 925 { 926 struct ndis_rssprm_toeplitz *rss = &sc->hn_rss; 927 int i, nchan; 928 929 nchan = sc->hn_rx_ring_inuse; 930 KASSERT(nchan > 1, ("invalid # of channels %d", nchan)); 931 932 /* 933 * Check indirect table to make sure that all channels in it 934 * can be used. 935 */ 936 for (i = 0; i < NDIS_HASH_INDCNT; ++i) { 937 if (rss->rss_ind[i] >= nchan) { 938 if_printf(sc->hn_ifp, 939 "RSS indirect table %d fixup: %u -> %d\n", 940 i, rss->rss_ind[i], nchan - 1); 941 rss->rss_ind[i] = nchan - 1; 942 } 943 } 944 } 945 946 static int 947 hn_ifmedia_upd(struct ifnet *ifp __unused) 948 { 949 950 return EOPNOTSUPP; 951 } 952 953 static void 954 hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) 955 { 956 struct hn_softc *sc = ifp->if_softc; 957 958 ifmr->ifm_status = IFM_AVALID; 959 ifmr->ifm_active = IFM_ETHER; 960 961 if ((sc->hn_link_flags & HN_LINK_FLAG_LINKUP) == 0) { 962 ifmr->ifm_active |= IFM_NONE; 963 return; 964 } 965 ifmr->ifm_status |= IFM_ACTIVE; 966 ifmr->ifm_active |= IFM_10G_T | IFM_FDX; 967 } 968 969 static void 970 hn_update_vf_task(void *arg, int pending __unused) 971 { 972 struct hn_update_vf *uv = arg; 973 974 uv->rxr->hn_vf = uv->vf; 975 } 976 977 static void 978 hn_update_vf(struct hn_softc *sc, struct ifnet *vf) 979 { 980 struct hn_rx_ring *rxr; 981 struct hn_update_vf uv; 982 struct task task; 983 int i; 984 985 HN_LOCK_ASSERT(sc); 986 987 TASK_INIT(&task, 0, hn_update_vf_task, &uv); 988 989 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 990 rxr = &sc->hn_rx_ring[i]; 991 992 if (i < sc->hn_rx_ring_inuse) { 993 uv.rxr = rxr; 994 uv.vf = vf; 995 vmbus_chan_run_task(rxr->hn_chan, &task); 996 } else { 997 rxr->hn_vf = vf; 998 } 999 } 1000 } 1001 1002 static void 1003 hn_set_vf(struct hn_softc *sc, struct ifnet *ifp, bool vf) 1004 { 1005 struct ifnet *hn_ifp; 1006 1007 HN_LOCK(sc); 1008 1009 if (!(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED)) 1010 goto out; 1011 1012 hn_ifp = sc->hn_ifp; 1013 1014 if (ifp == hn_ifp) 1015 goto out; 1016 1017 if (ifp->if_alloctype != IFT_ETHER) 1018 goto out; 1019 1020 /* Ignore lagg/vlan interfaces */ 1021 if (strcmp(ifp->if_dname, "lagg") == 0 || 1022 strcmp(ifp->if_dname, "vlan") == 0) 1023 goto out; 1024 1025 if (bcmp(IF_LLADDR(ifp), IF_LLADDR(hn_ifp), ETHER_ADDR_LEN) != 0) 1026 goto out; 1027 1028 /* Now we're sure 'ifp' is a real VF device. */ 1029 if (vf) { 1030 if (sc->hn_flags & HN_FLAG_VF) 1031 goto out; 1032 1033 sc->hn_flags |= HN_FLAG_VF; 1034 hn_rxfilter_config(sc); 1035 } else { 1036 if (!(sc->hn_flags & HN_FLAG_VF)) 1037 goto out; 1038 1039 sc->hn_flags &= ~HN_FLAG_VF; 1040 if (sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) 1041 hn_rxfilter_config(sc); 1042 else 1043 hn_set_rxfilter(sc, NDIS_PACKET_TYPE_NONE); 1044 } 1045 1046 hn_nvs_set_datapath(sc, 1047 vf ? HN_NVS_DATAPATH_VF : HN_NVS_DATAPATH_SYNTHETIC); 1048 1049 hn_update_vf(sc, vf ? ifp : NULL); 1050 1051 if (vf) { 1052 hn_suspend_mgmt(sc); 1053 sc->hn_link_flags &= 1054 ~(HN_LINK_FLAG_LINKUP | HN_LINK_FLAG_NETCHG); 1055 if_link_state_change(sc->hn_ifp, LINK_STATE_DOWN); 1056 } else { 1057 hn_resume_mgmt(sc); 1058 } 1059 1060 devctl_notify("HYPERV_NIC_VF", if_name(hn_ifp), 1061 vf ? "VF_UP" : "VF_DOWN", NULL); 1062 1063 if (bootverbose) 1064 if_printf(hn_ifp, "Data path is switched %s %s\n", 1065 vf ? "to" : "from", if_name(ifp)); 1066 out: 1067 HN_UNLOCK(sc); 1068 } 1069 1070 static void 1071 hn_ifnet_event(void *arg, struct ifnet *ifp, int event) 1072 { 1073 if (event != IFNET_EVENT_UP && event != IFNET_EVENT_DOWN) 1074 return; 1075 1076 hn_set_vf(arg, ifp, event == IFNET_EVENT_UP); 1077 } 1078 1079 static void 1080 hn_ifaddr_event(void *arg, struct ifnet *ifp) 1081 { 1082 hn_set_vf(arg, ifp, ifp->if_flags & IFF_UP); 1083 } 1084 1085 /* {F8615163-DF3E-46c5-913F-F2D2F965ED0E} */ 1086 static const struct hyperv_guid g_net_vsc_device_type = { 1087 .hv_guid = {0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46, 1088 0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E} 1089 }; 1090 1091 static int 1092 hn_probe(device_t dev) 1093 { 1094 1095 if (VMBUS_PROBE_GUID(device_get_parent(dev), dev, 1096 &g_net_vsc_device_type) == 0) { 1097 device_set_desc(dev, "Hyper-V Network Interface"); 1098 return BUS_PROBE_DEFAULT; 1099 } 1100 return ENXIO; 1101 } 1102 1103 static int 1104 hn_attach(device_t dev) 1105 { 1106 struct hn_softc *sc = device_get_softc(dev); 1107 struct sysctl_oid_list *child; 1108 struct sysctl_ctx_list *ctx; 1109 uint8_t eaddr[ETHER_ADDR_LEN]; 1110 struct ifnet *ifp = NULL; 1111 int error, ring_cnt, tx_ring_cnt; 1112 1113 sc->hn_dev = dev; 1114 sc->hn_prichan = vmbus_get_channel(dev); 1115 HN_LOCK_INIT(sc); 1116 1117 /* 1118 * Initialize these tunables once. 1119 */ 1120 sc->hn_agg_size = hn_tx_agg_size; 1121 sc->hn_agg_pkts = hn_tx_agg_pkts; 1122 1123 /* 1124 * Setup taskqueue for transmission. 1125 */ 1126 if (hn_tx_taskq_mode == HN_TX_TASKQ_M_INDEP) { 1127 int i; 1128 1129 sc->hn_tx_taskqs = 1130 malloc(hn_tx_taskq_cnt * sizeof(struct taskqueue *), 1131 M_DEVBUF, M_WAITOK); 1132 for (i = 0; i < hn_tx_taskq_cnt; ++i) { 1133 sc->hn_tx_taskqs[i] = taskqueue_create("hn_tx", 1134 M_WAITOK, taskqueue_thread_enqueue, 1135 &sc->hn_tx_taskqs[i]); 1136 taskqueue_start_threads(&sc->hn_tx_taskqs[i], 1, PI_NET, 1137 "%s tx%d", device_get_nameunit(dev), i); 1138 } 1139 } else if (hn_tx_taskq_mode == HN_TX_TASKQ_M_GLOBAL) { 1140 sc->hn_tx_taskqs = hn_tx_taskque; 1141 } 1142 1143 /* 1144 * Setup taskqueue for mangement tasks, e.g. link status. 1145 */ 1146 sc->hn_mgmt_taskq0 = taskqueue_create("hn_mgmt", M_WAITOK, 1147 taskqueue_thread_enqueue, &sc->hn_mgmt_taskq0); 1148 taskqueue_start_threads(&sc->hn_mgmt_taskq0, 1, PI_NET, "%s mgmt", 1149 device_get_nameunit(dev)); 1150 TASK_INIT(&sc->hn_link_task, 0, hn_link_taskfunc, sc); 1151 TASK_INIT(&sc->hn_netchg_init, 0, hn_netchg_init_taskfunc, sc); 1152 TIMEOUT_TASK_INIT(sc->hn_mgmt_taskq0, &sc->hn_netchg_status, 0, 1153 hn_netchg_status_taskfunc, sc); 1154 1155 /* 1156 * Allocate ifnet and setup its name earlier, so that if_printf 1157 * can be used by functions, which will be called after 1158 * ether_ifattach(). 1159 */ 1160 ifp = sc->hn_ifp = if_alloc(IFT_ETHER); 1161 ifp->if_softc = sc; 1162 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 1163 1164 /* 1165 * Initialize ifmedia earlier so that it can be unconditionally 1166 * destroyed, if error happened later on. 1167 */ 1168 ifmedia_init(&sc->hn_media, 0, hn_ifmedia_upd, hn_ifmedia_sts); 1169 1170 /* 1171 * Figure out the # of RX rings (ring_cnt) and the # of TX rings 1172 * to use (tx_ring_cnt). 1173 * 1174 * NOTE: 1175 * The # of RX rings to use is same as the # of channels to use. 1176 */ 1177 ring_cnt = hn_chan_cnt; 1178 if (ring_cnt <= 0) { 1179 /* Default */ 1180 ring_cnt = mp_ncpus; 1181 if (ring_cnt > HN_RING_CNT_DEF_MAX) 1182 ring_cnt = HN_RING_CNT_DEF_MAX; 1183 } else if (ring_cnt > mp_ncpus) { 1184 ring_cnt = mp_ncpus; 1185 } 1186 #ifdef RSS 1187 if (ring_cnt > rss_getnumbuckets()) 1188 ring_cnt = rss_getnumbuckets(); 1189 #endif 1190 1191 tx_ring_cnt = hn_tx_ring_cnt; 1192 if (tx_ring_cnt <= 0 || tx_ring_cnt > ring_cnt) 1193 tx_ring_cnt = ring_cnt; 1194 #ifdef HN_IFSTART_SUPPORT 1195 if (hn_use_if_start) { 1196 /* ifnet.if_start only needs one TX ring. */ 1197 tx_ring_cnt = 1; 1198 } 1199 #endif 1200 1201 /* 1202 * Set the leader CPU for channels. 1203 */ 1204 sc->hn_cpu = atomic_fetchadd_int(&hn_cpu_index, ring_cnt) % mp_ncpus; 1205 1206 /* 1207 * Create enough TX/RX rings, even if only limited number of 1208 * channels can be allocated. 1209 */ 1210 error = hn_create_tx_data(sc, tx_ring_cnt); 1211 if (error) 1212 goto failed; 1213 error = hn_create_rx_data(sc, ring_cnt); 1214 if (error) 1215 goto failed; 1216 1217 /* 1218 * Create transaction context for NVS and RNDIS transactions. 1219 */ 1220 sc->hn_xact = vmbus_xact_ctx_create(bus_get_dma_tag(dev), 1221 HN_XACT_REQ_SIZE, HN_XACT_RESP_SIZE, 0); 1222 if (sc->hn_xact == NULL) { 1223 error = ENXIO; 1224 goto failed; 1225 } 1226 1227 /* 1228 * Install orphan handler for the revocation of this device's 1229 * primary channel. 1230 * 1231 * NOTE: 1232 * The processing order is critical here: 1233 * Install the orphan handler, _before_ testing whether this 1234 * device's primary channel has been revoked or not. 1235 */ 1236 vmbus_chan_set_orphan(sc->hn_prichan, sc->hn_xact); 1237 if (vmbus_chan_is_revoked(sc->hn_prichan)) { 1238 error = ENXIO; 1239 goto failed; 1240 } 1241 1242 /* 1243 * Attach the synthetic parts, i.e. NVS and RNDIS. 1244 */ 1245 error = hn_synth_attach(sc, ETHERMTU); 1246 if (error) 1247 goto failed; 1248 1249 error = hn_rndis_get_eaddr(sc, eaddr); 1250 if (error) 1251 goto failed; 1252 1253 #if __FreeBSD_version >= 1100099 1254 if (sc->hn_rx_ring_inuse > 1) { 1255 /* 1256 * Reduce TCP segment aggregation limit for multiple 1257 * RX rings to increase ACK timeliness. 1258 */ 1259 hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MULTIRX_DEF); 1260 } 1261 #endif 1262 1263 /* 1264 * Fixup TX stuffs after synthetic parts are attached. 1265 */ 1266 hn_fixup_tx_data(sc); 1267 1268 ctx = device_get_sysctl_ctx(dev); 1269 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); 1270 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "nvs_version", CTLFLAG_RD, 1271 &sc->hn_nvs_ver, 0, "NVS version"); 1272 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "ndis_version", 1273 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 1274 hn_ndis_version_sysctl, "A", "NDIS version"); 1275 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "caps", 1276 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 1277 hn_caps_sysctl, "A", "capabilities"); 1278 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "hwassist", 1279 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 1280 hn_hwassist_sysctl, "A", "hwassist"); 1281 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxfilter", 1282 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 1283 hn_rxfilter_sysctl, "A", "rxfilter"); 1284 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_hash", 1285 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 1286 hn_rss_hash_sysctl, "A", "RSS hash"); 1287 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rss_ind_size", 1288 CTLFLAG_RD, &sc->hn_rss_ind_size, 0, "RSS indirect entry count"); 1289 #ifndef RSS 1290 /* 1291 * Don't allow RSS key/indirect table changes, if RSS is defined. 1292 */ 1293 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_key", 1294 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 1295 hn_rss_key_sysctl, "IU", "RSS key"); 1296 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_ind", 1297 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 1298 hn_rss_ind_sysctl, "IU", "RSS indirect table"); 1299 #endif 1300 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_size", 1301 CTLFLAG_RD, &sc->hn_rndis_agg_size, 0, 1302 "RNDIS offered packet transmission aggregation size limit"); 1303 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_pkts", 1304 CTLFLAG_RD, &sc->hn_rndis_agg_pkts, 0, 1305 "RNDIS offered packet transmission aggregation count limit"); 1306 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_align", 1307 CTLFLAG_RD, &sc->hn_rndis_agg_align, 0, 1308 "RNDIS packet transmission aggregation alignment"); 1309 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_size", 1310 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 1311 hn_txagg_size_sysctl, "I", 1312 "Packet transmission aggregation size, 0 -- disable, -1 -- auto"); 1313 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pkts", 1314 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 1315 hn_txagg_pkts_sysctl, "I", 1316 "Packet transmission aggregation packets, " 1317 "0 -- disable, -1 -- auto"); 1318 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "polling", 1319 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 1320 hn_polling_sysctl, "I", 1321 "Polling frequency: [100,1000000], 0 disable polling"); 1322 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf", 1323 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 1324 hn_vf_sysctl, "A", "Virtual Function's name"); 1325 1326 /* 1327 * Setup the ifmedia, which has been initialized earlier. 1328 */ 1329 ifmedia_add(&sc->hn_media, IFM_ETHER | IFM_AUTO, 0, NULL); 1330 ifmedia_set(&sc->hn_media, IFM_ETHER | IFM_AUTO); 1331 /* XXX ifmedia_set really should do this for us */ 1332 sc->hn_media.ifm_media = sc->hn_media.ifm_cur->ifm_media; 1333 1334 /* 1335 * Setup the ifnet for this interface. 1336 */ 1337 1338 ifp->if_baudrate = IF_Gbps(10); 1339 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 1340 ifp->if_ioctl = hn_ioctl; 1341 ifp->if_init = hn_init; 1342 #ifdef HN_IFSTART_SUPPORT 1343 if (hn_use_if_start) { 1344 int qdepth = hn_get_txswq_depth(&sc->hn_tx_ring[0]); 1345 1346 ifp->if_start = hn_start; 1347 IFQ_SET_MAXLEN(&ifp->if_snd, qdepth); 1348 ifp->if_snd.ifq_drv_maxlen = qdepth - 1; 1349 IFQ_SET_READY(&ifp->if_snd); 1350 } else 1351 #endif 1352 { 1353 ifp->if_transmit = hn_transmit; 1354 ifp->if_qflush = hn_xmit_qflush; 1355 } 1356 1357 ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_LRO; 1358 #ifdef foo 1359 /* We can't diff IPv6 packets from IPv4 packets on RX path. */ 1360 ifp->if_capabilities |= IFCAP_RXCSUM_IPV6; 1361 #endif 1362 if (sc->hn_caps & HN_CAP_VLAN) { 1363 /* XXX not sure about VLAN_MTU. */ 1364 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU; 1365 } 1366 1367 ifp->if_hwassist = sc->hn_tx_ring[0].hn_csum_assist; 1368 if (ifp->if_hwassist & HN_CSUM_IP_MASK) 1369 ifp->if_capabilities |= IFCAP_TXCSUM; 1370 if (ifp->if_hwassist & HN_CSUM_IP6_MASK) 1371 ifp->if_capabilities |= IFCAP_TXCSUM_IPV6; 1372 if (sc->hn_caps & HN_CAP_TSO4) { 1373 ifp->if_capabilities |= IFCAP_TSO4; 1374 ifp->if_hwassist |= CSUM_IP_TSO; 1375 } 1376 if (sc->hn_caps & HN_CAP_TSO6) { 1377 ifp->if_capabilities |= IFCAP_TSO6; 1378 ifp->if_hwassist |= CSUM_IP6_TSO; 1379 } 1380 1381 /* Enable all available capabilities by default. */ 1382 ifp->if_capenable = ifp->if_capabilities; 1383 1384 /* 1385 * Disable IPv6 TSO and TXCSUM by default, they still can 1386 * be enabled through SIOCSIFCAP. 1387 */ 1388 ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6 | IFCAP_TSO6); 1389 ifp->if_hwassist &= ~(HN_CSUM_IP6_MASK | CSUM_IP6_TSO); 1390 1391 if (ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) { 1392 hn_set_tso_maxsize(sc, hn_tso_maxlen, ETHERMTU); 1393 ifp->if_hw_tsomaxsegcount = HN_TX_DATA_SEGCNT_MAX; 1394 ifp->if_hw_tsomaxsegsize = PAGE_SIZE; 1395 } 1396 1397 ether_ifattach(ifp, eaddr); 1398 1399 if ((ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) && bootverbose) { 1400 if_printf(ifp, "TSO segcnt %u segsz %u\n", 1401 ifp->if_hw_tsomaxsegcount, ifp->if_hw_tsomaxsegsize); 1402 } 1403 1404 /* Inform the upper layer about the long frame support. */ 1405 ifp->if_hdrlen = sizeof(struct ether_vlan_header); 1406 1407 /* 1408 * Kick off link status check. 1409 */ 1410 sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0; 1411 hn_update_link_status(sc); 1412 1413 sc->hn_ifnet_evthand = EVENTHANDLER_REGISTER(ifnet_event, 1414 hn_ifnet_event, sc, EVENTHANDLER_PRI_ANY); 1415 1416 sc->hn_ifaddr_evthand = EVENTHANDLER_REGISTER(ifaddr_event, 1417 hn_ifaddr_event, sc, EVENTHANDLER_PRI_ANY); 1418 1419 return (0); 1420 failed: 1421 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) 1422 hn_synth_detach(sc); 1423 hn_detach(dev); 1424 return (error); 1425 } 1426 1427 static int 1428 hn_detach(device_t dev) 1429 { 1430 struct hn_softc *sc = device_get_softc(dev); 1431 struct ifnet *ifp = sc->hn_ifp; 1432 1433 if (sc->hn_ifaddr_evthand != NULL) 1434 EVENTHANDLER_DEREGISTER(ifaddr_event, sc->hn_ifaddr_evthand); 1435 if (sc->hn_ifnet_evthand != NULL) 1436 EVENTHANDLER_DEREGISTER(ifnet_event, sc->hn_ifnet_evthand); 1437 1438 if (sc->hn_xact != NULL && vmbus_chan_is_revoked(sc->hn_prichan)) { 1439 /* 1440 * In case that the vmbus missed the orphan handler 1441 * installation. 1442 */ 1443 vmbus_xact_ctx_orphan(sc->hn_xact); 1444 } 1445 1446 if (device_is_attached(dev)) { 1447 HN_LOCK(sc); 1448 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) { 1449 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 1450 hn_stop(sc, true); 1451 /* 1452 * NOTE: 1453 * hn_stop() only suspends data, so managment 1454 * stuffs have to be suspended manually here. 1455 */ 1456 hn_suspend_mgmt(sc); 1457 hn_synth_detach(sc); 1458 } 1459 HN_UNLOCK(sc); 1460 ether_ifdetach(ifp); 1461 } 1462 1463 ifmedia_removeall(&sc->hn_media); 1464 hn_destroy_rx_data(sc); 1465 hn_destroy_tx_data(sc); 1466 1467 if (sc->hn_tx_taskqs != NULL && sc->hn_tx_taskqs != hn_tx_taskque) { 1468 int i; 1469 1470 for (i = 0; i < hn_tx_taskq_cnt; ++i) 1471 taskqueue_free(sc->hn_tx_taskqs[i]); 1472 free(sc->hn_tx_taskqs, M_DEVBUF); 1473 } 1474 taskqueue_free(sc->hn_mgmt_taskq0); 1475 1476 if (sc->hn_xact != NULL) { 1477 /* 1478 * Uninstall the orphan handler _before_ the xact is 1479 * destructed. 1480 */ 1481 vmbus_chan_unset_orphan(sc->hn_prichan); 1482 vmbus_xact_ctx_destroy(sc->hn_xact); 1483 } 1484 1485 if_free(ifp); 1486 1487 HN_LOCK_DESTROY(sc); 1488 return (0); 1489 } 1490 1491 static int 1492 hn_shutdown(device_t dev) 1493 { 1494 1495 return (0); 1496 } 1497 1498 static void 1499 hn_link_status(struct hn_softc *sc) 1500 { 1501 uint32_t link_status; 1502 int error; 1503 1504 error = hn_rndis_get_linkstatus(sc, &link_status); 1505 if (error) { 1506 /* XXX what to do? */ 1507 return; 1508 } 1509 1510 if (link_status == NDIS_MEDIA_STATE_CONNECTED) 1511 sc->hn_link_flags |= HN_LINK_FLAG_LINKUP; 1512 else 1513 sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP; 1514 if_link_state_change(sc->hn_ifp, 1515 (sc->hn_link_flags & HN_LINK_FLAG_LINKUP) ? 1516 LINK_STATE_UP : LINK_STATE_DOWN); 1517 } 1518 1519 static void 1520 hn_link_taskfunc(void *xsc, int pending __unused) 1521 { 1522 struct hn_softc *sc = xsc; 1523 1524 if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG) 1525 return; 1526 hn_link_status(sc); 1527 } 1528 1529 static void 1530 hn_netchg_init_taskfunc(void *xsc, int pending __unused) 1531 { 1532 struct hn_softc *sc = xsc; 1533 1534 /* Prevent any link status checks from running. */ 1535 sc->hn_link_flags |= HN_LINK_FLAG_NETCHG; 1536 1537 /* 1538 * Fake up a [link down --> link up] state change; 5 seconds 1539 * delay is used, which closely simulates miibus reaction 1540 * upon link down event. 1541 */ 1542 sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP; 1543 if_link_state_change(sc->hn_ifp, LINK_STATE_DOWN); 1544 taskqueue_enqueue_timeout(sc->hn_mgmt_taskq0, 1545 &sc->hn_netchg_status, 5 * hz); 1546 } 1547 1548 static void 1549 hn_netchg_status_taskfunc(void *xsc, int pending __unused) 1550 { 1551 struct hn_softc *sc = xsc; 1552 1553 /* Re-allow link status checks. */ 1554 sc->hn_link_flags &= ~HN_LINK_FLAG_NETCHG; 1555 hn_link_status(sc); 1556 } 1557 1558 static void 1559 hn_update_link_status(struct hn_softc *sc) 1560 { 1561 1562 if (sc->hn_mgmt_taskq != NULL) 1563 taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_link_task); 1564 } 1565 1566 static void 1567 hn_change_network(struct hn_softc *sc) 1568 { 1569 1570 if (sc->hn_mgmt_taskq != NULL) 1571 taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_netchg_init); 1572 } 1573 1574 static __inline int 1575 hn_txdesc_dmamap_load(struct hn_tx_ring *txr, struct hn_txdesc *txd, 1576 struct mbuf **m_head, bus_dma_segment_t *segs, int *nsegs) 1577 { 1578 struct mbuf *m = *m_head; 1579 int error; 1580 1581 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID, ("txd uses chim")); 1582 1583 error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap, 1584 m, segs, nsegs, BUS_DMA_NOWAIT); 1585 if (error == EFBIG) { 1586 struct mbuf *m_new; 1587 1588 m_new = m_collapse(m, M_NOWAIT, HN_TX_DATA_SEGCNT_MAX); 1589 if (m_new == NULL) 1590 return ENOBUFS; 1591 else 1592 *m_head = m = m_new; 1593 txr->hn_tx_collapsed++; 1594 1595 error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, 1596 txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT); 1597 } 1598 if (!error) { 1599 bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap, 1600 BUS_DMASYNC_PREWRITE); 1601 txd->flags |= HN_TXD_FLAG_DMAMAP; 1602 } 1603 return error; 1604 } 1605 1606 static __inline int 1607 hn_txdesc_put(struct hn_tx_ring *txr, struct hn_txdesc *txd) 1608 { 1609 1610 KASSERT((txd->flags & HN_TXD_FLAG_ONLIST) == 0, 1611 ("put an onlist txd %#x", txd->flags)); 1612 KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0, 1613 ("put an onagg txd %#x", txd->flags)); 1614 1615 KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs)); 1616 if (atomic_fetchadd_int(&txd->refs, -1) != 1) 1617 return 0; 1618 1619 if (!STAILQ_EMPTY(&txd->agg_list)) { 1620 struct hn_txdesc *tmp_txd; 1621 1622 while ((tmp_txd = STAILQ_FIRST(&txd->agg_list)) != NULL) { 1623 int freed; 1624 1625 KASSERT(STAILQ_EMPTY(&tmp_txd->agg_list), 1626 ("resursive aggregation on aggregated txdesc")); 1627 KASSERT((tmp_txd->flags & HN_TXD_FLAG_ONAGG), 1628 ("not aggregated txdesc")); 1629 KASSERT((tmp_txd->flags & HN_TXD_FLAG_DMAMAP) == 0, 1630 ("aggregated txdesc uses dmamap")); 1631 KASSERT(tmp_txd->chim_index == HN_NVS_CHIM_IDX_INVALID, 1632 ("aggregated txdesc consumes " 1633 "chimney sending buffer")); 1634 KASSERT(tmp_txd->chim_size == 0, 1635 ("aggregated txdesc has non-zero " 1636 "chimney sending size")); 1637 1638 STAILQ_REMOVE_HEAD(&txd->agg_list, agg_link); 1639 tmp_txd->flags &= ~HN_TXD_FLAG_ONAGG; 1640 freed = hn_txdesc_put(txr, tmp_txd); 1641 KASSERT(freed, ("failed to free aggregated txdesc")); 1642 } 1643 } 1644 1645 if (txd->chim_index != HN_NVS_CHIM_IDX_INVALID) { 1646 KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, 1647 ("chim txd uses dmamap")); 1648 hn_chim_free(txr->hn_sc, txd->chim_index); 1649 txd->chim_index = HN_NVS_CHIM_IDX_INVALID; 1650 txd->chim_size = 0; 1651 } else if (txd->flags & HN_TXD_FLAG_DMAMAP) { 1652 bus_dmamap_sync(txr->hn_tx_data_dtag, 1653 txd->data_dmap, BUS_DMASYNC_POSTWRITE); 1654 bus_dmamap_unload(txr->hn_tx_data_dtag, 1655 txd->data_dmap); 1656 txd->flags &= ~HN_TXD_FLAG_DMAMAP; 1657 } 1658 1659 if (txd->m != NULL) { 1660 m_freem(txd->m); 1661 txd->m = NULL; 1662 } 1663 1664 txd->flags |= HN_TXD_FLAG_ONLIST; 1665 #ifndef HN_USE_TXDESC_BUFRING 1666 mtx_lock_spin(&txr->hn_txlist_spin); 1667 KASSERT(txr->hn_txdesc_avail >= 0 && 1668 txr->hn_txdesc_avail < txr->hn_txdesc_cnt, 1669 ("txdesc_put: invalid txd avail %d", txr->hn_txdesc_avail)); 1670 txr->hn_txdesc_avail++; 1671 SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link); 1672 mtx_unlock_spin(&txr->hn_txlist_spin); 1673 #else /* HN_USE_TXDESC_BUFRING */ 1674 #ifdef HN_DEBUG 1675 atomic_add_int(&txr->hn_txdesc_avail, 1); 1676 #endif 1677 buf_ring_enqueue(txr->hn_txdesc_br, txd); 1678 #endif /* !HN_USE_TXDESC_BUFRING */ 1679 1680 return 1; 1681 } 1682 1683 static __inline struct hn_txdesc * 1684 hn_txdesc_get(struct hn_tx_ring *txr) 1685 { 1686 struct hn_txdesc *txd; 1687 1688 #ifndef HN_USE_TXDESC_BUFRING 1689 mtx_lock_spin(&txr->hn_txlist_spin); 1690 txd = SLIST_FIRST(&txr->hn_txlist); 1691 if (txd != NULL) { 1692 KASSERT(txr->hn_txdesc_avail > 0, 1693 ("txdesc_get: invalid txd avail %d", txr->hn_txdesc_avail)); 1694 txr->hn_txdesc_avail--; 1695 SLIST_REMOVE_HEAD(&txr->hn_txlist, link); 1696 } 1697 mtx_unlock_spin(&txr->hn_txlist_spin); 1698 #else 1699 txd = buf_ring_dequeue_sc(txr->hn_txdesc_br); 1700 #endif 1701 1702 if (txd != NULL) { 1703 #ifdef HN_USE_TXDESC_BUFRING 1704 #ifdef HN_DEBUG 1705 atomic_subtract_int(&txr->hn_txdesc_avail, 1); 1706 #endif 1707 #endif /* HN_USE_TXDESC_BUFRING */ 1708 KASSERT(txd->m == NULL && txd->refs == 0 && 1709 STAILQ_EMPTY(&txd->agg_list) && 1710 txd->chim_index == HN_NVS_CHIM_IDX_INVALID && 1711 txd->chim_size == 0 && 1712 (txd->flags & HN_TXD_FLAG_ONLIST) && 1713 (txd->flags & HN_TXD_FLAG_ONAGG) == 0 && 1714 (txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("invalid txd")); 1715 txd->flags &= ~HN_TXD_FLAG_ONLIST; 1716 txd->refs = 1; 1717 } 1718 return txd; 1719 } 1720 1721 static __inline void 1722 hn_txdesc_hold(struct hn_txdesc *txd) 1723 { 1724 1725 /* 0->1 transition will never work */ 1726 KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs)); 1727 atomic_add_int(&txd->refs, 1); 1728 } 1729 1730 static __inline void 1731 hn_txdesc_agg(struct hn_txdesc *agg_txd, struct hn_txdesc *txd) 1732 { 1733 1734 KASSERT((agg_txd->flags & HN_TXD_FLAG_ONAGG) == 0, 1735 ("recursive aggregation on aggregating txdesc")); 1736 1737 KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0, 1738 ("already aggregated")); 1739 KASSERT(STAILQ_EMPTY(&txd->agg_list), 1740 ("recursive aggregation on to-be-aggregated txdesc")); 1741 1742 txd->flags |= HN_TXD_FLAG_ONAGG; 1743 STAILQ_INSERT_TAIL(&agg_txd->agg_list, txd, agg_link); 1744 } 1745 1746 static bool 1747 hn_tx_ring_pending(struct hn_tx_ring *txr) 1748 { 1749 bool pending = false; 1750 1751 #ifndef HN_USE_TXDESC_BUFRING 1752 mtx_lock_spin(&txr->hn_txlist_spin); 1753 if (txr->hn_txdesc_avail != txr->hn_txdesc_cnt) 1754 pending = true; 1755 mtx_unlock_spin(&txr->hn_txlist_spin); 1756 #else 1757 if (!buf_ring_full(txr->hn_txdesc_br)) 1758 pending = true; 1759 #endif 1760 return (pending); 1761 } 1762 1763 static __inline void 1764 hn_txeof(struct hn_tx_ring *txr) 1765 { 1766 txr->hn_has_txeof = 0; 1767 txr->hn_txeof(txr); 1768 } 1769 1770 static void 1771 hn_txpkt_done(struct hn_nvs_sendctx *sndc, struct hn_softc *sc, 1772 struct vmbus_channel *chan, const void *data __unused, int dlen __unused) 1773 { 1774 struct hn_txdesc *txd = sndc->hn_cbarg; 1775 struct hn_tx_ring *txr; 1776 1777 txr = txd->txr; 1778 KASSERT(txr->hn_chan == chan, 1779 ("channel mismatch, on chan%u, should be chan%u", 1780 vmbus_chan_id(chan), vmbus_chan_id(txr->hn_chan))); 1781 1782 txr->hn_has_txeof = 1; 1783 hn_txdesc_put(txr, txd); 1784 1785 ++txr->hn_txdone_cnt; 1786 if (txr->hn_txdone_cnt >= HN_EARLY_TXEOF_THRESH) { 1787 txr->hn_txdone_cnt = 0; 1788 if (txr->hn_oactive) 1789 hn_txeof(txr); 1790 } 1791 } 1792 1793 static void 1794 hn_chan_rollup(struct hn_rx_ring *rxr, struct hn_tx_ring *txr) 1795 { 1796 #if defined(INET) || defined(INET6) 1797 tcp_lro_flush_all(&rxr->hn_lro); 1798 #endif 1799 1800 /* 1801 * NOTE: 1802 * 'txr' could be NULL, if multiple channels and 1803 * ifnet.if_start method are enabled. 1804 */ 1805 if (txr == NULL || !txr->hn_has_txeof) 1806 return; 1807 1808 txr->hn_txdone_cnt = 0; 1809 hn_txeof(txr); 1810 } 1811 1812 static __inline uint32_t 1813 hn_rndis_pktmsg_offset(uint32_t ofs) 1814 { 1815 1816 KASSERT(ofs >= sizeof(struct rndis_packet_msg), 1817 ("invalid RNDIS packet msg offset %u", ofs)); 1818 return (ofs - __offsetof(struct rndis_packet_msg, rm_dataoffset)); 1819 } 1820 1821 static __inline void * 1822 hn_rndis_pktinfo_append(struct rndis_packet_msg *pkt, size_t pktsize, 1823 size_t pi_dlen, uint32_t pi_type) 1824 { 1825 const size_t pi_size = HN_RNDIS_PKTINFO_SIZE(pi_dlen); 1826 struct rndis_pktinfo *pi; 1827 1828 KASSERT((pi_size & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK) == 0, 1829 ("unaligned pktinfo size %zu, pktinfo dlen %zu", pi_size, pi_dlen)); 1830 1831 /* 1832 * Per-packet-info does not move; it only grows. 1833 * 1834 * NOTE: 1835 * rm_pktinfooffset in this phase counts from the beginning 1836 * of rndis_packet_msg. 1837 */ 1838 KASSERT(pkt->rm_pktinfooffset + pkt->rm_pktinfolen + pi_size <= pktsize, 1839 ("%u pktinfo overflows RNDIS packet msg", pi_type)); 1840 pi = (struct rndis_pktinfo *)((uint8_t *)pkt + pkt->rm_pktinfooffset + 1841 pkt->rm_pktinfolen); 1842 pkt->rm_pktinfolen += pi_size; 1843 1844 pi->rm_size = pi_size; 1845 pi->rm_type = pi_type; 1846 pi->rm_pktinfooffset = RNDIS_PKTINFO_OFFSET; 1847 1848 return (pi->rm_data); 1849 } 1850 1851 static __inline int 1852 hn_flush_txagg(struct ifnet *ifp, struct hn_tx_ring *txr) 1853 { 1854 struct hn_txdesc *txd; 1855 struct mbuf *m; 1856 int error, pkts; 1857 1858 txd = txr->hn_agg_txd; 1859 KASSERT(txd != NULL, ("no aggregate txdesc")); 1860 1861 /* 1862 * Since hn_txpkt() will reset this temporary stat, save 1863 * it now, so that oerrors can be updated properly, if 1864 * hn_txpkt() ever fails. 1865 */ 1866 pkts = txr->hn_stat_pkts; 1867 1868 /* 1869 * Since txd's mbuf will _not_ be freed upon hn_txpkt() 1870 * failure, save it for later freeing, if hn_txpkt() ever 1871 * fails. 1872 */ 1873 m = txd->m; 1874 error = hn_txpkt(ifp, txr, txd); 1875 if (__predict_false(error)) { 1876 /* txd is freed, but m is not. */ 1877 m_freem(m); 1878 1879 txr->hn_flush_failed++; 1880 if_inc_counter(ifp, IFCOUNTER_OERRORS, pkts); 1881 } 1882 1883 /* Reset all aggregation states. */ 1884 txr->hn_agg_txd = NULL; 1885 txr->hn_agg_szleft = 0; 1886 txr->hn_agg_pktleft = 0; 1887 txr->hn_agg_prevpkt = NULL; 1888 1889 return (error); 1890 } 1891 1892 static void * 1893 hn_try_txagg(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd, 1894 int pktsize) 1895 { 1896 void *chim; 1897 1898 if (txr->hn_agg_txd != NULL) { 1899 if (txr->hn_agg_pktleft >= 1 && txr->hn_agg_szleft > pktsize) { 1900 struct hn_txdesc *agg_txd = txr->hn_agg_txd; 1901 struct rndis_packet_msg *pkt = txr->hn_agg_prevpkt; 1902 int olen; 1903 1904 /* 1905 * Update the previous RNDIS packet's total length, 1906 * it can be increased due to the mandatory alignment 1907 * padding for this RNDIS packet. And update the 1908 * aggregating txdesc's chimney sending buffer size 1909 * accordingly. 1910 * 1911 * XXX 1912 * Zero-out the padding, as required by the RNDIS spec. 1913 */ 1914 olen = pkt->rm_len; 1915 pkt->rm_len = roundup2(olen, txr->hn_agg_align); 1916 agg_txd->chim_size += pkt->rm_len - olen; 1917 1918 /* Link this txdesc to the parent. */ 1919 hn_txdesc_agg(agg_txd, txd); 1920 1921 chim = (uint8_t *)pkt + pkt->rm_len; 1922 /* Save the current packet for later fixup. */ 1923 txr->hn_agg_prevpkt = chim; 1924 1925 txr->hn_agg_pktleft--; 1926 txr->hn_agg_szleft -= pktsize; 1927 if (txr->hn_agg_szleft <= 1928 HN_PKTSIZE_MIN(txr->hn_agg_align)) { 1929 /* 1930 * Probably can't aggregate more packets, 1931 * flush this aggregating txdesc proactively. 1932 */ 1933 txr->hn_agg_pktleft = 0; 1934 } 1935 /* Done! */ 1936 return (chim); 1937 } 1938 hn_flush_txagg(ifp, txr); 1939 } 1940 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc")); 1941 1942 txr->hn_tx_chimney_tried++; 1943 txd->chim_index = hn_chim_alloc(txr->hn_sc); 1944 if (txd->chim_index == HN_NVS_CHIM_IDX_INVALID) 1945 return (NULL); 1946 txr->hn_tx_chimney++; 1947 1948 chim = txr->hn_sc->hn_chim + 1949 (txd->chim_index * txr->hn_sc->hn_chim_szmax); 1950 1951 if (txr->hn_agg_pktmax > 1 && 1952 txr->hn_agg_szmax > pktsize + HN_PKTSIZE_MIN(txr->hn_agg_align)) { 1953 txr->hn_agg_txd = txd; 1954 txr->hn_agg_pktleft = txr->hn_agg_pktmax - 1; 1955 txr->hn_agg_szleft = txr->hn_agg_szmax - pktsize; 1956 txr->hn_agg_prevpkt = chim; 1957 } 1958 return (chim); 1959 } 1960 1961 /* 1962 * NOTE: 1963 * If this function fails, then both txd and m_head0 will be freed. 1964 */ 1965 static int 1966 hn_encap(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd, 1967 struct mbuf **m_head0) 1968 { 1969 bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX]; 1970 int error, nsegs, i; 1971 struct mbuf *m_head = *m_head0; 1972 struct rndis_packet_msg *pkt; 1973 uint32_t *pi_data; 1974 void *chim = NULL; 1975 int pkt_hlen, pkt_size; 1976 1977 pkt = txd->rndis_pkt; 1978 pkt_size = HN_PKTSIZE(m_head, txr->hn_agg_align); 1979 if (pkt_size < txr->hn_chim_size) { 1980 chim = hn_try_txagg(ifp, txr, txd, pkt_size); 1981 if (chim != NULL) 1982 pkt = chim; 1983 } else { 1984 if (txr->hn_agg_txd != NULL) 1985 hn_flush_txagg(ifp, txr); 1986 } 1987 1988 pkt->rm_type = REMOTE_NDIS_PACKET_MSG; 1989 pkt->rm_len = m_head->m_pkthdr.len; 1990 pkt->rm_dataoffset = 0; 1991 pkt->rm_datalen = m_head->m_pkthdr.len; 1992 pkt->rm_oobdataoffset = 0; 1993 pkt->rm_oobdatalen = 0; 1994 pkt->rm_oobdataelements = 0; 1995 pkt->rm_pktinfooffset = sizeof(*pkt); 1996 pkt->rm_pktinfolen = 0; 1997 pkt->rm_vchandle = 0; 1998 pkt->rm_reserved = 0; 1999 2000 if (txr->hn_tx_flags & HN_TX_FLAG_HASHVAL) { 2001 /* 2002 * Set the hash value for this packet, so that the host could 2003 * dispatch the TX done event for this packet back to this TX 2004 * ring's channel. 2005 */ 2006 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 2007 HN_NDIS_HASH_VALUE_SIZE, HN_NDIS_PKTINFO_TYPE_HASHVAL); 2008 *pi_data = txr->hn_tx_idx; 2009 } 2010 2011 if (m_head->m_flags & M_VLANTAG) { 2012 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 2013 NDIS_VLAN_INFO_SIZE, NDIS_PKTINFO_TYPE_VLAN); 2014 *pi_data = NDIS_VLAN_INFO_MAKE( 2015 EVL_VLANOFTAG(m_head->m_pkthdr.ether_vtag), 2016 EVL_PRIOFTAG(m_head->m_pkthdr.ether_vtag), 2017 EVL_CFIOFTAG(m_head->m_pkthdr.ether_vtag)); 2018 } 2019 2020 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 2021 #if defined(INET6) || defined(INET) 2022 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 2023 NDIS_LSO2_INFO_SIZE, NDIS_PKTINFO_TYPE_LSO); 2024 #ifdef INET 2025 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) { 2026 *pi_data = NDIS_LSO2_INFO_MAKEIPV4(0, 2027 m_head->m_pkthdr.tso_segsz); 2028 } 2029 #endif 2030 #if defined(INET6) && defined(INET) 2031 else 2032 #endif 2033 #ifdef INET6 2034 { 2035 *pi_data = NDIS_LSO2_INFO_MAKEIPV6(0, 2036 m_head->m_pkthdr.tso_segsz); 2037 } 2038 #endif 2039 #endif /* INET6 || INET */ 2040 } else if (m_head->m_pkthdr.csum_flags & txr->hn_csum_assist) { 2041 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 2042 NDIS_TXCSUM_INFO_SIZE, NDIS_PKTINFO_TYPE_CSUM); 2043 if (m_head->m_pkthdr.csum_flags & 2044 (CSUM_IP6_TCP | CSUM_IP6_UDP)) { 2045 *pi_data = NDIS_TXCSUM_INFO_IPV6; 2046 } else { 2047 *pi_data = NDIS_TXCSUM_INFO_IPV4; 2048 if (m_head->m_pkthdr.csum_flags & CSUM_IP) 2049 *pi_data |= NDIS_TXCSUM_INFO_IPCS; 2050 } 2051 2052 if (m_head->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP)) 2053 *pi_data |= NDIS_TXCSUM_INFO_TCPCS; 2054 else if (m_head->m_pkthdr.csum_flags & 2055 (CSUM_IP_UDP | CSUM_IP6_UDP)) 2056 *pi_data |= NDIS_TXCSUM_INFO_UDPCS; 2057 } 2058 2059 pkt_hlen = pkt->rm_pktinfooffset + pkt->rm_pktinfolen; 2060 /* Fixup RNDIS packet message total length */ 2061 pkt->rm_len += pkt_hlen; 2062 /* Convert RNDIS packet message offsets */ 2063 pkt->rm_dataoffset = hn_rndis_pktmsg_offset(pkt_hlen); 2064 pkt->rm_pktinfooffset = hn_rndis_pktmsg_offset(pkt->rm_pktinfooffset); 2065 2066 /* 2067 * Fast path: Chimney sending. 2068 */ 2069 if (chim != NULL) { 2070 struct hn_txdesc *tgt_txd = txd; 2071 2072 if (txr->hn_agg_txd != NULL) { 2073 tgt_txd = txr->hn_agg_txd; 2074 #ifdef INVARIANTS 2075 *m_head0 = NULL; 2076 #endif 2077 } 2078 2079 KASSERT(pkt == chim, 2080 ("RNDIS pkt not in chimney sending buffer")); 2081 KASSERT(tgt_txd->chim_index != HN_NVS_CHIM_IDX_INVALID, 2082 ("chimney sending buffer is not used")); 2083 tgt_txd->chim_size += pkt->rm_len; 2084 2085 m_copydata(m_head, 0, m_head->m_pkthdr.len, 2086 ((uint8_t *)chim) + pkt_hlen); 2087 2088 txr->hn_gpa_cnt = 0; 2089 txr->hn_sendpkt = hn_txpkt_chim; 2090 goto done; 2091 } 2092 2093 KASSERT(txr->hn_agg_txd == NULL, ("aggregating sglist txdesc")); 2094 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID, 2095 ("chimney buffer is used")); 2096 KASSERT(pkt == txd->rndis_pkt, ("RNDIS pkt not in txdesc")); 2097 2098 error = hn_txdesc_dmamap_load(txr, txd, &m_head, segs, &nsegs); 2099 if (__predict_false(error)) { 2100 int freed; 2101 2102 /* 2103 * This mbuf is not linked w/ the txd yet, so free it now. 2104 */ 2105 m_freem(m_head); 2106 *m_head0 = NULL; 2107 2108 freed = hn_txdesc_put(txr, txd); 2109 KASSERT(freed != 0, 2110 ("fail to free txd upon txdma error")); 2111 2112 txr->hn_txdma_failed++; 2113 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2114 return error; 2115 } 2116 *m_head0 = m_head; 2117 2118 /* +1 RNDIS packet message */ 2119 txr->hn_gpa_cnt = nsegs + 1; 2120 2121 /* send packet with page buffer */ 2122 txr->hn_gpa[0].gpa_page = atop(txd->rndis_pkt_paddr); 2123 txr->hn_gpa[0].gpa_ofs = txd->rndis_pkt_paddr & PAGE_MASK; 2124 txr->hn_gpa[0].gpa_len = pkt_hlen; 2125 2126 /* 2127 * Fill the page buffers with mbuf info after the page 2128 * buffer for RNDIS packet message. 2129 */ 2130 for (i = 0; i < nsegs; ++i) { 2131 struct vmbus_gpa *gpa = &txr->hn_gpa[i + 1]; 2132 2133 gpa->gpa_page = atop(segs[i].ds_addr); 2134 gpa->gpa_ofs = segs[i].ds_addr & PAGE_MASK; 2135 gpa->gpa_len = segs[i].ds_len; 2136 } 2137 2138 txd->chim_index = HN_NVS_CHIM_IDX_INVALID; 2139 txd->chim_size = 0; 2140 txr->hn_sendpkt = hn_txpkt_sglist; 2141 done: 2142 txd->m = m_head; 2143 2144 /* Set the completion routine */ 2145 hn_nvs_sendctx_init(&txd->send_ctx, hn_txpkt_done, txd); 2146 2147 /* Update temporary stats for later use. */ 2148 txr->hn_stat_pkts++; 2149 txr->hn_stat_size += m_head->m_pkthdr.len; 2150 if (m_head->m_flags & M_MCAST) 2151 txr->hn_stat_mcasts++; 2152 2153 return 0; 2154 } 2155 2156 /* 2157 * NOTE: 2158 * If this function fails, then txd will be freed, but the mbuf 2159 * associated w/ the txd will _not_ be freed. 2160 */ 2161 static int 2162 hn_txpkt(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd) 2163 { 2164 int error, send_failed = 0, has_bpf; 2165 2166 again: 2167 has_bpf = bpf_peers_present(ifp->if_bpf); 2168 if (has_bpf) { 2169 /* 2170 * Make sure that this txd and any aggregated txds are not 2171 * freed before ETHER_BPF_MTAP. 2172 */ 2173 hn_txdesc_hold(txd); 2174 } 2175 error = txr->hn_sendpkt(txr, txd); 2176 if (!error) { 2177 if (has_bpf) { 2178 const struct hn_txdesc *tmp_txd; 2179 2180 ETHER_BPF_MTAP(ifp, txd->m); 2181 STAILQ_FOREACH(tmp_txd, &txd->agg_list, agg_link) 2182 ETHER_BPF_MTAP(ifp, tmp_txd->m); 2183 } 2184 2185 if_inc_counter(ifp, IFCOUNTER_OPACKETS, txr->hn_stat_pkts); 2186 #ifdef HN_IFSTART_SUPPORT 2187 if (!hn_use_if_start) 2188 #endif 2189 { 2190 if_inc_counter(ifp, IFCOUNTER_OBYTES, 2191 txr->hn_stat_size); 2192 if (txr->hn_stat_mcasts != 0) { 2193 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 2194 txr->hn_stat_mcasts); 2195 } 2196 } 2197 txr->hn_pkts += txr->hn_stat_pkts; 2198 txr->hn_sends++; 2199 } 2200 if (has_bpf) 2201 hn_txdesc_put(txr, txd); 2202 2203 if (__predict_false(error)) { 2204 int freed; 2205 2206 /* 2207 * This should "really rarely" happen. 2208 * 2209 * XXX Too many RX to be acked or too many sideband 2210 * commands to run? Ask netvsc_channel_rollup() 2211 * to kick start later. 2212 */ 2213 txr->hn_has_txeof = 1; 2214 if (!send_failed) { 2215 txr->hn_send_failed++; 2216 send_failed = 1; 2217 /* 2218 * Try sending again after set hn_has_txeof; 2219 * in case that we missed the last 2220 * netvsc_channel_rollup(). 2221 */ 2222 goto again; 2223 } 2224 if_printf(ifp, "send failed\n"); 2225 2226 /* 2227 * Caller will perform further processing on the 2228 * associated mbuf, so don't free it in hn_txdesc_put(); 2229 * only unload it from the DMA map in hn_txdesc_put(), 2230 * if it was loaded. 2231 */ 2232 txd->m = NULL; 2233 freed = hn_txdesc_put(txr, txd); 2234 KASSERT(freed != 0, 2235 ("fail to free txd upon send error")); 2236 2237 txr->hn_send_failed++; 2238 } 2239 2240 /* Reset temporary stats, after this sending is done. */ 2241 txr->hn_stat_size = 0; 2242 txr->hn_stat_pkts = 0; 2243 txr->hn_stat_mcasts = 0; 2244 2245 return (error); 2246 } 2247 2248 /* 2249 * Append the specified data to the indicated mbuf chain, 2250 * Extend the mbuf chain if the new data does not fit in 2251 * existing space. 2252 * 2253 * This is a minor rewrite of m_append() from sys/kern/uipc_mbuf.c. 2254 * There should be an equivalent in the kernel mbuf code, 2255 * but there does not appear to be one yet. 2256 * 2257 * Differs from m_append() in that additional mbufs are 2258 * allocated with cluster size MJUMPAGESIZE, and filled 2259 * accordingly. 2260 * 2261 * Return 1 if able to complete the job; otherwise 0. 2262 */ 2263 static int 2264 hv_m_append(struct mbuf *m0, int len, c_caddr_t cp) 2265 { 2266 struct mbuf *m, *n; 2267 int remainder, space; 2268 2269 for (m = m0; m->m_next != NULL; m = m->m_next) 2270 ; 2271 remainder = len; 2272 space = M_TRAILINGSPACE(m); 2273 if (space > 0) { 2274 /* 2275 * Copy into available space. 2276 */ 2277 if (space > remainder) 2278 space = remainder; 2279 bcopy(cp, mtod(m, caddr_t) + m->m_len, space); 2280 m->m_len += space; 2281 cp += space; 2282 remainder -= space; 2283 } 2284 while (remainder > 0) { 2285 /* 2286 * Allocate a new mbuf; could check space 2287 * and allocate a cluster instead. 2288 */ 2289 n = m_getjcl(M_NOWAIT, m->m_type, 0, MJUMPAGESIZE); 2290 if (n == NULL) 2291 break; 2292 n->m_len = min(MJUMPAGESIZE, remainder); 2293 bcopy(cp, mtod(n, caddr_t), n->m_len); 2294 cp += n->m_len; 2295 remainder -= n->m_len; 2296 m->m_next = n; 2297 m = n; 2298 } 2299 if (m0->m_flags & M_PKTHDR) 2300 m0->m_pkthdr.len += len - remainder; 2301 2302 return (remainder == 0); 2303 } 2304 2305 #if defined(INET) || defined(INET6) 2306 static __inline int 2307 hn_lro_rx(struct lro_ctrl *lc, struct mbuf *m) 2308 { 2309 #if __FreeBSD_version >= 1100095 2310 if (hn_lro_mbufq_depth) { 2311 tcp_lro_queue_mbuf(lc, m); 2312 return 0; 2313 } 2314 #endif 2315 return tcp_lro_rx(lc, m, 0); 2316 } 2317 #endif 2318 2319 static int 2320 hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen, 2321 const struct hn_rxinfo *info) 2322 { 2323 struct ifnet *ifp; 2324 struct mbuf *m_new; 2325 int size, do_lro = 0, do_csum = 1; 2326 int hash_type; 2327 2328 /* If the VF is active, inject the packet through the VF */ 2329 ifp = rxr->hn_vf ? rxr->hn_vf : rxr->hn_ifp; 2330 2331 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 2332 /* 2333 * NOTE: 2334 * See the NOTE of hn_rndis_init_fixat(). This 2335 * function can be reached, immediately after the 2336 * RNDIS is initialized but before the ifnet is 2337 * setup on the hn_attach() path; drop the unexpected 2338 * packets. 2339 */ 2340 return (0); 2341 } 2342 2343 if (dlen <= MHLEN) { 2344 m_new = m_gethdr(M_NOWAIT, MT_DATA); 2345 if (m_new == NULL) { 2346 if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); 2347 return (0); 2348 } 2349 memcpy(mtod(m_new, void *), data, dlen); 2350 m_new->m_pkthdr.len = m_new->m_len = dlen; 2351 rxr->hn_small_pkts++; 2352 } else { 2353 /* 2354 * Get an mbuf with a cluster. For packets 2K or less, 2355 * get a standard 2K cluster. For anything larger, get a 2356 * 4K cluster. Any buffers larger than 4K can cause problems 2357 * if looped around to the Hyper-V TX channel, so avoid them. 2358 */ 2359 size = MCLBYTES; 2360 if (dlen > MCLBYTES) { 2361 /* 4096 */ 2362 size = MJUMPAGESIZE; 2363 } 2364 2365 m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size); 2366 if (m_new == NULL) { 2367 if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); 2368 return (0); 2369 } 2370 2371 hv_m_append(m_new, dlen, data); 2372 } 2373 m_new->m_pkthdr.rcvif = ifp; 2374 2375 if (__predict_false((ifp->if_capenable & IFCAP_RXCSUM) == 0)) 2376 do_csum = 0; 2377 2378 /* receive side checksum offload */ 2379 if (info->csum_info != HN_NDIS_RXCSUM_INFO_INVALID) { 2380 /* IP csum offload */ 2381 if ((info->csum_info & NDIS_RXCSUM_INFO_IPCS_OK) && do_csum) { 2382 m_new->m_pkthdr.csum_flags |= 2383 (CSUM_IP_CHECKED | CSUM_IP_VALID); 2384 rxr->hn_csum_ip++; 2385 } 2386 2387 /* TCP/UDP csum offload */ 2388 if ((info->csum_info & (NDIS_RXCSUM_INFO_UDPCS_OK | 2389 NDIS_RXCSUM_INFO_TCPCS_OK)) && do_csum) { 2390 m_new->m_pkthdr.csum_flags |= 2391 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 2392 m_new->m_pkthdr.csum_data = 0xffff; 2393 if (info->csum_info & NDIS_RXCSUM_INFO_TCPCS_OK) 2394 rxr->hn_csum_tcp++; 2395 else 2396 rxr->hn_csum_udp++; 2397 } 2398 2399 /* 2400 * XXX 2401 * As of this write (Oct 28th, 2016), host side will turn 2402 * on only TCPCS_OK and IPCS_OK even for UDP datagrams, so 2403 * the do_lro setting here is actually _not_ accurate. We 2404 * depend on the RSS hash type check to reset do_lro. 2405 */ 2406 if ((info->csum_info & 2407 (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) == 2408 (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) 2409 do_lro = 1; 2410 } else { 2411 const struct ether_header *eh; 2412 uint16_t etype; 2413 int hoff; 2414 2415 hoff = sizeof(*eh); 2416 if (m_new->m_len < hoff) 2417 goto skip; 2418 eh = mtod(m_new, struct ether_header *); 2419 etype = ntohs(eh->ether_type); 2420 if (etype == ETHERTYPE_VLAN) { 2421 const struct ether_vlan_header *evl; 2422 2423 hoff = sizeof(*evl); 2424 if (m_new->m_len < hoff) 2425 goto skip; 2426 evl = mtod(m_new, struct ether_vlan_header *); 2427 etype = ntohs(evl->evl_proto); 2428 } 2429 2430 if (etype == ETHERTYPE_IP) { 2431 int pr; 2432 2433 pr = hn_check_iplen(m_new, hoff); 2434 if (pr == IPPROTO_TCP) { 2435 if (do_csum && 2436 (rxr->hn_trust_hcsum & 2437 HN_TRUST_HCSUM_TCP)) { 2438 rxr->hn_csum_trusted++; 2439 m_new->m_pkthdr.csum_flags |= 2440 (CSUM_IP_CHECKED | CSUM_IP_VALID | 2441 CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 2442 m_new->m_pkthdr.csum_data = 0xffff; 2443 } 2444 do_lro = 1; 2445 } else if (pr == IPPROTO_UDP) { 2446 if (do_csum && 2447 (rxr->hn_trust_hcsum & 2448 HN_TRUST_HCSUM_UDP)) { 2449 rxr->hn_csum_trusted++; 2450 m_new->m_pkthdr.csum_flags |= 2451 (CSUM_IP_CHECKED | CSUM_IP_VALID | 2452 CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 2453 m_new->m_pkthdr.csum_data = 0xffff; 2454 } 2455 } else if (pr != IPPROTO_DONE && do_csum && 2456 (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_IP)) { 2457 rxr->hn_csum_trusted++; 2458 m_new->m_pkthdr.csum_flags |= 2459 (CSUM_IP_CHECKED | CSUM_IP_VALID); 2460 } 2461 } 2462 } 2463 skip: 2464 if (info->vlan_info != HN_NDIS_VLAN_INFO_INVALID) { 2465 m_new->m_pkthdr.ether_vtag = EVL_MAKETAG( 2466 NDIS_VLAN_INFO_ID(info->vlan_info), 2467 NDIS_VLAN_INFO_PRI(info->vlan_info), 2468 NDIS_VLAN_INFO_CFI(info->vlan_info)); 2469 m_new->m_flags |= M_VLANTAG; 2470 } 2471 2472 if (info->hash_info != HN_NDIS_HASH_INFO_INVALID) { 2473 rxr->hn_rss_pkts++; 2474 m_new->m_pkthdr.flowid = info->hash_value; 2475 hash_type = M_HASHTYPE_OPAQUE_HASH; 2476 if ((info->hash_info & NDIS_HASH_FUNCTION_MASK) == 2477 NDIS_HASH_FUNCTION_TOEPLITZ) { 2478 uint32_t type = (info->hash_info & NDIS_HASH_TYPE_MASK); 2479 2480 /* 2481 * NOTE: 2482 * do_lro is resetted, if the hash types are not TCP 2483 * related. See the comment in the above csum_flags 2484 * setup section. 2485 */ 2486 switch (type) { 2487 case NDIS_HASH_IPV4: 2488 hash_type = M_HASHTYPE_RSS_IPV4; 2489 do_lro = 0; 2490 break; 2491 2492 case NDIS_HASH_TCP_IPV4: 2493 hash_type = M_HASHTYPE_RSS_TCP_IPV4; 2494 break; 2495 2496 case NDIS_HASH_IPV6: 2497 hash_type = M_HASHTYPE_RSS_IPV6; 2498 do_lro = 0; 2499 break; 2500 2501 case NDIS_HASH_IPV6_EX: 2502 hash_type = M_HASHTYPE_RSS_IPV6_EX; 2503 do_lro = 0; 2504 break; 2505 2506 case NDIS_HASH_TCP_IPV6: 2507 hash_type = M_HASHTYPE_RSS_TCP_IPV6; 2508 break; 2509 2510 case NDIS_HASH_TCP_IPV6_EX: 2511 hash_type = M_HASHTYPE_RSS_TCP_IPV6_EX; 2512 break; 2513 } 2514 } 2515 } else { 2516 m_new->m_pkthdr.flowid = rxr->hn_rx_idx; 2517 hash_type = M_HASHTYPE_OPAQUE; 2518 } 2519 M_HASHTYPE_SET(m_new, hash_type); 2520 2521 /* 2522 * Note: Moved RX completion back to hv_nv_on_receive() so all 2523 * messages (not just data messages) will trigger a response. 2524 */ 2525 2526 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); 2527 rxr->hn_pkts++; 2528 2529 if ((ifp->if_capenable & IFCAP_LRO) && do_lro) { 2530 #if defined(INET) || defined(INET6) 2531 struct lro_ctrl *lro = &rxr->hn_lro; 2532 2533 if (lro->lro_cnt) { 2534 rxr->hn_lro_tried++; 2535 if (hn_lro_rx(lro, m_new) == 0) { 2536 /* DONE! */ 2537 return 0; 2538 } 2539 } 2540 #endif 2541 } 2542 2543 /* We're not holding the lock here, so don't release it */ 2544 (*ifp->if_input)(ifp, m_new); 2545 2546 return (0); 2547 } 2548 2549 static int 2550 hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 2551 { 2552 struct hn_softc *sc = ifp->if_softc; 2553 struct ifreq *ifr = (struct ifreq *)data; 2554 int mask, error = 0; 2555 2556 switch (cmd) { 2557 case SIOCSIFMTU: 2558 if (ifr->ifr_mtu > HN_MTU_MAX) { 2559 error = EINVAL; 2560 break; 2561 } 2562 2563 HN_LOCK(sc); 2564 2565 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) { 2566 HN_UNLOCK(sc); 2567 break; 2568 } 2569 2570 if ((sc->hn_caps & HN_CAP_MTU) == 0) { 2571 /* Can't change MTU */ 2572 HN_UNLOCK(sc); 2573 error = EOPNOTSUPP; 2574 break; 2575 } 2576 2577 if (ifp->if_mtu == ifr->ifr_mtu) { 2578 HN_UNLOCK(sc); 2579 break; 2580 } 2581 2582 /* 2583 * Suspend this interface before the synthetic parts 2584 * are ripped. 2585 */ 2586 hn_suspend(sc); 2587 2588 /* 2589 * Detach the synthetics parts, i.e. NVS and RNDIS. 2590 */ 2591 hn_synth_detach(sc); 2592 2593 /* 2594 * Reattach the synthetic parts, i.e. NVS and RNDIS, 2595 * with the new MTU setting. 2596 */ 2597 error = hn_synth_attach(sc, ifr->ifr_mtu); 2598 if (error) { 2599 HN_UNLOCK(sc); 2600 break; 2601 } 2602 2603 /* 2604 * Commit the requested MTU, after the synthetic parts 2605 * have been successfully attached. 2606 */ 2607 ifp->if_mtu = ifr->ifr_mtu; 2608 2609 /* 2610 * Make sure that various parameters based on MTU are 2611 * still valid, after the MTU change. 2612 */ 2613 if (sc->hn_tx_ring[0].hn_chim_size > sc->hn_chim_szmax) 2614 hn_set_chim_size(sc, sc->hn_chim_szmax); 2615 hn_set_tso_maxsize(sc, hn_tso_maxlen, ifp->if_mtu); 2616 #if __FreeBSD_version >= 1100099 2617 if (sc->hn_rx_ring[0].hn_lro.lro_length_lim < 2618 HN_LRO_LENLIM_MIN(ifp)) 2619 hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MIN(ifp)); 2620 #endif 2621 2622 /* 2623 * All done! Resume the interface now. 2624 */ 2625 hn_resume(sc); 2626 2627 HN_UNLOCK(sc); 2628 break; 2629 2630 case SIOCSIFFLAGS: 2631 HN_LOCK(sc); 2632 2633 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) { 2634 HN_UNLOCK(sc); 2635 break; 2636 } 2637 2638 if (ifp->if_flags & IFF_UP) { 2639 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 2640 /* 2641 * Caller meight hold mutex, e.g. 2642 * bpf; use busy-wait for the RNDIS 2643 * reply. 2644 */ 2645 HN_NO_SLEEPING(sc); 2646 hn_rxfilter_config(sc); 2647 HN_SLEEPING_OK(sc); 2648 } else { 2649 hn_init_locked(sc); 2650 } 2651 } else { 2652 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 2653 hn_stop(sc, false); 2654 } 2655 sc->hn_if_flags = ifp->if_flags; 2656 2657 HN_UNLOCK(sc); 2658 break; 2659 2660 case SIOCSIFCAP: 2661 HN_LOCK(sc); 2662 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 2663 2664 if (mask & IFCAP_TXCSUM) { 2665 ifp->if_capenable ^= IFCAP_TXCSUM; 2666 if (ifp->if_capenable & IFCAP_TXCSUM) 2667 ifp->if_hwassist |= HN_CSUM_IP_HWASSIST(sc); 2668 else 2669 ifp->if_hwassist &= ~HN_CSUM_IP_HWASSIST(sc); 2670 } 2671 if (mask & IFCAP_TXCSUM_IPV6) { 2672 ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; 2673 if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) 2674 ifp->if_hwassist |= HN_CSUM_IP6_HWASSIST(sc); 2675 else 2676 ifp->if_hwassist &= ~HN_CSUM_IP6_HWASSIST(sc); 2677 } 2678 2679 /* TODO: flip RNDIS offload parameters for RXCSUM. */ 2680 if (mask & IFCAP_RXCSUM) 2681 ifp->if_capenable ^= IFCAP_RXCSUM; 2682 #ifdef foo 2683 /* We can't diff IPv6 packets from IPv4 packets on RX path. */ 2684 if (mask & IFCAP_RXCSUM_IPV6) 2685 ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; 2686 #endif 2687 2688 if (mask & IFCAP_LRO) 2689 ifp->if_capenable ^= IFCAP_LRO; 2690 2691 if (mask & IFCAP_TSO4) { 2692 ifp->if_capenable ^= IFCAP_TSO4; 2693 if (ifp->if_capenable & IFCAP_TSO4) 2694 ifp->if_hwassist |= CSUM_IP_TSO; 2695 else 2696 ifp->if_hwassist &= ~CSUM_IP_TSO; 2697 } 2698 if (mask & IFCAP_TSO6) { 2699 ifp->if_capenable ^= IFCAP_TSO6; 2700 if (ifp->if_capenable & IFCAP_TSO6) 2701 ifp->if_hwassist |= CSUM_IP6_TSO; 2702 else 2703 ifp->if_hwassist &= ~CSUM_IP6_TSO; 2704 } 2705 2706 HN_UNLOCK(sc); 2707 break; 2708 2709 case SIOCADDMULTI: 2710 case SIOCDELMULTI: 2711 HN_LOCK(sc); 2712 2713 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) { 2714 HN_UNLOCK(sc); 2715 break; 2716 } 2717 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 2718 /* 2719 * Multicast uses mutex; use busy-wait for 2720 * the RNDIS reply. 2721 */ 2722 HN_NO_SLEEPING(sc); 2723 hn_rxfilter_config(sc); 2724 HN_SLEEPING_OK(sc); 2725 } 2726 2727 HN_UNLOCK(sc); 2728 break; 2729 2730 case SIOCSIFMEDIA: 2731 case SIOCGIFMEDIA: 2732 error = ifmedia_ioctl(ifp, ifr, &sc->hn_media, cmd); 2733 break; 2734 2735 default: 2736 error = ether_ioctl(ifp, cmd, data); 2737 break; 2738 } 2739 return (error); 2740 } 2741 2742 static void 2743 hn_stop(struct hn_softc *sc, bool detaching) 2744 { 2745 struct ifnet *ifp = sc->hn_ifp; 2746 int i; 2747 2748 HN_LOCK_ASSERT(sc); 2749 2750 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, 2751 ("synthetic parts were not attached")); 2752 2753 /* Disable polling. */ 2754 hn_polling(sc, 0); 2755 2756 /* Clear RUNNING bit _before_ hn_suspend_data() */ 2757 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_RUNNING); 2758 hn_suspend_data(sc); 2759 2760 /* Clear OACTIVE bit. */ 2761 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 2762 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) 2763 sc->hn_tx_ring[i].hn_oactive = 0; 2764 2765 /* 2766 * If the VF is active, make sure the filter is not 0, even if 2767 * the synthetic NIC is down. 2768 */ 2769 if (!detaching && (sc->hn_flags & HN_FLAG_VF)) 2770 hn_rxfilter_config(sc); 2771 } 2772 2773 static void 2774 hn_init_locked(struct hn_softc *sc) 2775 { 2776 struct ifnet *ifp = sc->hn_ifp; 2777 int i; 2778 2779 HN_LOCK_ASSERT(sc); 2780 2781 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) 2782 return; 2783 2784 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 2785 return; 2786 2787 /* Configure RX filter */ 2788 hn_rxfilter_config(sc); 2789 2790 /* Clear OACTIVE bit. */ 2791 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 2792 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) 2793 sc->hn_tx_ring[i].hn_oactive = 0; 2794 2795 /* Clear TX 'suspended' bit. */ 2796 hn_resume_tx(sc, sc->hn_tx_ring_inuse); 2797 2798 /* Everything is ready; unleash! */ 2799 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_RUNNING); 2800 2801 /* Re-enable polling if requested. */ 2802 if (sc->hn_pollhz > 0) 2803 hn_polling(sc, sc->hn_pollhz); 2804 } 2805 2806 static void 2807 hn_init(void *xsc) 2808 { 2809 struct hn_softc *sc = xsc; 2810 2811 HN_LOCK(sc); 2812 hn_init_locked(sc); 2813 HN_UNLOCK(sc); 2814 } 2815 2816 #if __FreeBSD_version >= 1100099 2817 2818 static int 2819 hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS) 2820 { 2821 struct hn_softc *sc = arg1; 2822 unsigned int lenlim; 2823 int error; 2824 2825 lenlim = sc->hn_rx_ring[0].hn_lro.lro_length_lim; 2826 error = sysctl_handle_int(oidp, &lenlim, 0, req); 2827 if (error || req->newptr == NULL) 2828 return error; 2829 2830 HN_LOCK(sc); 2831 if (lenlim < HN_LRO_LENLIM_MIN(sc->hn_ifp) || 2832 lenlim > TCP_LRO_LENGTH_MAX) { 2833 HN_UNLOCK(sc); 2834 return EINVAL; 2835 } 2836 hn_set_lro_lenlim(sc, lenlim); 2837 HN_UNLOCK(sc); 2838 2839 return 0; 2840 } 2841 2842 static int 2843 hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS) 2844 { 2845 struct hn_softc *sc = arg1; 2846 int ackcnt, error, i; 2847 2848 /* 2849 * lro_ackcnt_lim is append count limit, 2850 * +1 to turn it into aggregation limit. 2851 */ 2852 ackcnt = sc->hn_rx_ring[0].hn_lro.lro_ackcnt_lim + 1; 2853 error = sysctl_handle_int(oidp, &ackcnt, 0, req); 2854 if (error || req->newptr == NULL) 2855 return error; 2856 2857 if (ackcnt < 2 || ackcnt > (TCP_LRO_ACKCNT_MAX + 1)) 2858 return EINVAL; 2859 2860 /* 2861 * Convert aggregation limit back to append 2862 * count limit. 2863 */ 2864 --ackcnt; 2865 HN_LOCK(sc); 2866 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 2867 sc->hn_rx_ring[i].hn_lro.lro_ackcnt_lim = ackcnt; 2868 HN_UNLOCK(sc); 2869 return 0; 2870 } 2871 2872 #endif 2873 2874 static int 2875 hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS) 2876 { 2877 struct hn_softc *sc = arg1; 2878 int hcsum = arg2; 2879 int on, error, i; 2880 2881 on = 0; 2882 if (sc->hn_rx_ring[0].hn_trust_hcsum & hcsum) 2883 on = 1; 2884 2885 error = sysctl_handle_int(oidp, &on, 0, req); 2886 if (error || req->newptr == NULL) 2887 return error; 2888 2889 HN_LOCK(sc); 2890 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 2891 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 2892 2893 if (on) 2894 rxr->hn_trust_hcsum |= hcsum; 2895 else 2896 rxr->hn_trust_hcsum &= ~hcsum; 2897 } 2898 HN_UNLOCK(sc); 2899 return 0; 2900 } 2901 2902 static int 2903 hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS) 2904 { 2905 struct hn_softc *sc = arg1; 2906 int chim_size, error; 2907 2908 chim_size = sc->hn_tx_ring[0].hn_chim_size; 2909 error = sysctl_handle_int(oidp, &chim_size, 0, req); 2910 if (error || req->newptr == NULL) 2911 return error; 2912 2913 if (chim_size > sc->hn_chim_szmax || chim_size <= 0) 2914 return EINVAL; 2915 2916 HN_LOCK(sc); 2917 hn_set_chim_size(sc, chim_size); 2918 HN_UNLOCK(sc); 2919 return 0; 2920 } 2921 2922 #if __FreeBSD_version < 1100095 2923 static int 2924 hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS) 2925 { 2926 struct hn_softc *sc = arg1; 2927 int ofs = arg2, i, error; 2928 struct hn_rx_ring *rxr; 2929 uint64_t stat; 2930 2931 stat = 0; 2932 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 2933 rxr = &sc->hn_rx_ring[i]; 2934 stat += *((int *)((uint8_t *)rxr + ofs)); 2935 } 2936 2937 error = sysctl_handle_64(oidp, &stat, 0, req); 2938 if (error || req->newptr == NULL) 2939 return error; 2940 2941 /* Zero out this stat. */ 2942 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 2943 rxr = &sc->hn_rx_ring[i]; 2944 *((int *)((uint8_t *)rxr + ofs)) = 0; 2945 } 2946 return 0; 2947 } 2948 #else 2949 static int 2950 hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS) 2951 { 2952 struct hn_softc *sc = arg1; 2953 int ofs = arg2, i, error; 2954 struct hn_rx_ring *rxr; 2955 uint64_t stat; 2956 2957 stat = 0; 2958 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 2959 rxr = &sc->hn_rx_ring[i]; 2960 stat += *((uint64_t *)((uint8_t *)rxr + ofs)); 2961 } 2962 2963 error = sysctl_handle_64(oidp, &stat, 0, req); 2964 if (error || req->newptr == NULL) 2965 return error; 2966 2967 /* Zero out this stat. */ 2968 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 2969 rxr = &sc->hn_rx_ring[i]; 2970 *((uint64_t *)((uint8_t *)rxr + ofs)) = 0; 2971 } 2972 return 0; 2973 } 2974 2975 #endif 2976 2977 static int 2978 hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS) 2979 { 2980 struct hn_softc *sc = arg1; 2981 int ofs = arg2, i, error; 2982 struct hn_rx_ring *rxr; 2983 u_long stat; 2984 2985 stat = 0; 2986 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 2987 rxr = &sc->hn_rx_ring[i]; 2988 stat += *((u_long *)((uint8_t *)rxr + ofs)); 2989 } 2990 2991 error = sysctl_handle_long(oidp, &stat, 0, req); 2992 if (error || req->newptr == NULL) 2993 return error; 2994 2995 /* Zero out this stat. */ 2996 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 2997 rxr = &sc->hn_rx_ring[i]; 2998 *((u_long *)((uint8_t *)rxr + ofs)) = 0; 2999 } 3000 return 0; 3001 } 3002 3003 static int 3004 hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS) 3005 { 3006 struct hn_softc *sc = arg1; 3007 int ofs = arg2, i, error; 3008 struct hn_tx_ring *txr; 3009 u_long stat; 3010 3011 stat = 0; 3012 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 3013 txr = &sc->hn_tx_ring[i]; 3014 stat += *((u_long *)((uint8_t *)txr + ofs)); 3015 } 3016 3017 error = sysctl_handle_long(oidp, &stat, 0, req); 3018 if (error || req->newptr == NULL) 3019 return error; 3020 3021 /* Zero out this stat. */ 3022 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 3023 txr = &sc->hn_tx_ring[i]; 3024 *((u_long *)((uint8_t *)txr + ofs)) = 0; 3025 } 3026 return 0; 3027 } 3028 3029 static int 3030 hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS) 3031 { 3032 struct hn_softc *sc = arg1; 3033 int ofs = arg2, i, error, conf; 3034 struct hn_tx_ring *txr; 3035 3036 txr = &sc->hn_tx_ring[0]; 3037 conf = *((int *)((uint8_t *)txr + ofs)); 3038 3039 error = sysctl_handle_int(oidp, &conf, 0, req); 3040 if (error || req->newptr == NULL) 3041 return error; 3042 3043 HN_LOCK(sc); 3044 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 3045 txr = &sc->hn_tx_ring[i]; 3046 *((int *)((uint8_t *)txr + ofs)) = conf; 3047 } 3048 HN_UNLOCK(sc); 3049 3050 return 0; 3051 } 3052 3053 static int 3054 hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS) 3055 { 3056 struct hn_softc *sc = arg1; 3057 int error, size; 3058 3059 size = sc->hn_agg_size; 3060 error = sysctl_handle_int(oidp, &size, 0, req); 3061 if (error || req->newptr == NULL) 3062 return (error); 3063 3064 HN_LOCK(sc); 3065 sc->hn_agg_size = size; 3066 hn_set_txagg(sc); 3067 HN_UNLOCK(sc); 3068 3069 return (0); 3070 } 3071 3072 static int 3073 hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS) 3074 { 3075 struct hn_softc *sc = arg1; 3076 int error, pkts; 3077 3078 pkts = sc->hn_agg_pkts; 3079 error = sysctl_handle_int(oidp, &pkts, 0, req); 3080 if (error || req->newptr == NULL) 3081 return (error); 3082 3083 HN_LOCK(sc); 3084 sc->hn_agg_pkts = pkts; 3085 hn_set_txagg(sc); 3086 HN_UNLOCK(sc); 3087 3088 return (0); 3089 } 3090 3091 static int 3092 hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS) 3093 { 3094 struct hn_softc *sc = arg1; 3095 int pkts; 3096 3097 pkts = sc->hn_tx_ring[0].hn_agg_pktmax; 3098 return (sysctl_handle_int(oidp, &pkts, 0, req)); 3099 } 3100 3101 static int 3102 hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS) 3103 { 3104 struct hn_softc *sc = arg1; 3105 int align; 3106 3107 align = sc->hn_tx_ring[0].hn_agg_align; 3108 return (sysctl_handle_int(oidp, &align, 0, req)); 3109 } 3110 3111 static void 3112 hn_chan_polling(struct vmbus_channel *chan, u_int pollhz) 3113 { 3114 if (pollhz == 0) 3115 vmbus_chan_poll_disable(chan); 3116 else 3117 vmbus_chan_poll_enable(chan, pollhz); 3118 } 3119 3120 static void 3121 hn_polling(struct hn_softc *sc, u_int pollhz) 3122 { 3123 int nsubch = sc->hn_rx_ring_inuse - 1; 3124 3125 HN_LOCK_ASSERT(sc); 3126 3127 if (nsubch > 0) { 3128 struct vmbus_channel **subch; 3129 int i; 3130 3131 subch = vmbus_subchan_get(sc->hn_prichan, nsubch); 3132 for (i = 0; i < nsubch; ++i) 3133 hn_chan_polling(subch[i], pollhz); 3134 vmbus_subchan_rel(subch, nsubch); 3135 } 3136 hn_chan_polling(sc->hn_prichan, pollhz); 3137 } 3138 3139 static int 3140 hn_polling_sysctl(SYSCTL_HANDLER_ARGS) 3141 { 3142 struct hn_softc *sc = arg1; 3143 int pollhz, error; 3144 3145 pollhz = sc->hn_pollhz; 3146 error = sysctl_handle_int(oidp, &pollhz, 0, req); 3147 if (error || req->newptr == NULL) 3148 return (error); 3149 3150 if (pollhz != 0 && 3151 (pollhz < VMBUS_CHAN_POLLHZ_MIN || pollhz > VMBUS_CHAN_POLLHZ_MAX)) 3152 return (EINVAL); 3153 3154 HN_LOCK(sc); 3155 if (sc->hn_pollhz != pollhz) { 3156 sc->hn_pollhz = pollhz; 3157 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) && 3158 (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED)) 3159 hn_polling(sc, sc->hn_pollhz); 3160 } 3161 HN_UNLOCK(sc); 3162 3163 return (0); 3164 } 3165 3166 static int 3167 hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS) 3168 { 3169 struct hn_softc *sc = arg1; 3170 char verstr[16]; 3171 3172 snprintf(verstr, sizeof(verstr), "%u.%u", 3173 HN_NDIS_VERSION_MAJOR(sc->hn_ndis_ver), 3174 HN_NDIS_VERSION_MINOR(sc->hn_ndis_ver)); 3175 return sysctl_handle_string(oidp, verstr, sizeof(verstr), req); 3176 } 3177 3178 static int 3179 hn_caps_sysctl(SYSCTL_HANDLER_ARGS) 3180 { 3181 struct hn_softc *sc = arg1; 3182 char caps_str[128]; 3183 uint32_t caps; 3184 3185 HN_LOCK(sc); 3186 caps = sc->hn_caps; 3187 HN_UNLOCK(sc); 3188 snprintf(caps_str, sizeof(caps_str), "%b", caps, HN_CAP_BITS); 3189 return sysctl_handle_string(oidp, caps_str, sizeof(caps_str), req); 3190 } 3191 3192 static int 3193 hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS) 3194 { 3195 struct hn_softc *sc = arg1; 3196 char assist_str[128]; 3197 uint32_t hwassist; 3198 3199 HN_LOCK(sc); 3200 hwassist = sc->hn_ifp->if_hwassist; 3201 HN_UNLOCK(sc); 3202 snprintf(assist_str, sizeof(assist_str), "%b", hwassist, CSUM_BITS); 3203 return sysctl_handle_string(oidp, assist_str, sizeof(assist_str), req); 3204 } 3205 3206 static int 3207 hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS) 3208 { 3209 struct hn_softc *sc = arg1; 3210 char filter_str[128]; 3211 uint32_t filter; 3212 3213 HN_LOCK(sc); 3214 filter = sc->hn_rx_filter; 3215 HN_UNLOCK(sc); 3216 snprintf(filter_str, sizeof(filter_str), "%b", filter, 3217 NDIS_PACKET_TYPES); 3218 return sysctl_handle_string(oidp, filter_str, sizeof(filter_str), req); 3219 } 3220 3221 #ifndef RSS 3222 3223 static int 3224 hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS) 3225 { 3226 struct hn_softc *sc = arg1; 3227 int error; 3228 3229 HN_LOCK(sc); 3230 3231 error = SYSCTL_OUT(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key)); 3232 if (error || req->newptr == NULL) 3233 goto back; 3234 3235 error = SYSCTL_IN(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key)); 3236 if (error) 3237 goto back; 3238 sc->hn_flags |= HN_FLAG_HAS_RSSKEY; 3239 3240 if (sc->hn_rx_ring_inuse > 1) { 3241 error = hn_rss_reconfig(sc); 3242 } else { 3243 /* Not RSS capable, at least for now; just save the RSS key. */ 3244 error = 0; 3245 } 3246 back: 3247 HN_UNLOCK(sc); 3248 return (error); 3249 } 3250 3251 static int 3252 hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS) 3253 { 3254 struct hn_softc *sc = arg1; 3255 int error; 3256 3257 HN_LOCK(sc); 3258 3259 error = SYSCTL_OUT(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind)); 3260 if (error || req->newptr == NULL) 3261 goto back; 3262 3263 /* 3264 * Don't allow RSS indirect table change, if this interface is not 3265 * RSS capable currently. 3266 */ 3267 if (sc->hn_rx_ring_inuse == 1) { 3268 error = EOPNOTSUPP; 3269 goto back; 3270 } 3271 3272 error = SYSCTL_IN(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind)); 3273 if (error) 3274 goto back; 3275 sc->hn_flags |= HN_FLAG_HAS_RSSIND; 3276 3277 hn_rss_ind_fixup(sc); 3278 error = hn_rss_reconfig(sc); 3279 back: 3280 HN_UNLOCK(sc); 3281 return (error); 3282 } 3283 3284 #endif /* !RSS */ 3285 3286 static int 3287 hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS) 3288 { 3289 struct hn_softc *sc = arg1; 3290 char hash_str[128]; 3291 uint32_t hash; 3292 3293 HN_LOCK(sc); 3294 hash = sc->hn_rss_hash; 3295 HN_UNLOCK(sc); 3296 snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS); 3297 return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req); 3298 } 3299 3300 static int 3301 hn_vf_sysctl(SYSCTL_HANDLER_ARGS) 3302 { 3303 struct hn_softc *sc = arg1; 3304 char vf_name[128]; 3305 struct ifnet *vf; 3306 3307 HN_LOCK(sc); 3308 vf_name[0] = '\0'; 3309 vf = sc->hn_rx_ring[0].hn_vf; 3310 if (vf != NULL) 3311 snprintf(vf_name, sizeof(vf_name), "%s", if_name(vf)); 3312 HN_UNLOCK(sc); 3313 return sysctl_handle_string(oidp, vf_name, sizeof(vf_name), req); 3314 } 3315 3316 static int 3317 hn_check_iplen(const struct mbuf *m, int hoff) 3318 { 3319 const struct ip *ip; 3320 int len, iphlen, iplen; 3321 const struct tcphdr *th; 3322 int thoff; /* TCP data offset */ 3323 3324 len = hoff + sizeof(struct ip); 3325 3326 /* The packet must be at least the size of an IP header. */ 3327 if (m->m_pkthdr.len < len) 3328 return IPPROTO_DONE; 3329 3330 /* The fixed IP header must reside completely in the first mbuf. */ 3331 if (m->m_len < len) 3332 return IPPROTO_DONE; 3333 3334 ip = mtodo(m, hoff); 3335 3336 /* Bound check the packet's stated IP header length. */ 3337 iphlen = ip->ip_hl << 2; 3338 if (iphlen < sizeof(struct ip)) /* minimum header length */ 3339 return IPPROTO_DONE; 3340 3341 /* The full IP header must reside completely in the one mbuf. */ 3342 if (m->m_len < hoff + iphlen) 3343 return IPPROTO_DONE; 3344 3345 iplen = ntohs(ip->ip_len); 3346 3347 /* 3348 * Check that the amount of data in the buffers is as 3349 * at least much as the IP header would have us expect. 3350 */ 3351 if (m->m_pkthdr.len < hoff + iplen) 3352 return IPPROTO_DONE; 3353 3354 /* 3355 * Ignore IP fragments. 3356 */ 3357 if (ntohs(ip->ip_off) & (IP_OFFMASK | IP_MF)) 3358 return IPPROTO_DONE; 3359 3360 /* 3361 * The TCP/IP or UDP/IP header must be entirely contained within 3362 * the first fragment of a packet. 3363 */ 3364 switch (ip->ip_p) { 3365 case IPPROTO_TCP: 3366 if (iplen < iphlen + sizeof(struct tcphdr)) 3367 return IPPROTO_DONE; 3368 if (m->m_len < hoff + iphlen + sizeof(struct tcphdr)) 3369 return IPPROTO_DONE; 3370 th = (const struct tcphdr *)((const uint8_t *)ip + iphlen); 3371 thoff = th->th_off << 2; 3372 if (thoff < sizeof(struct tcphdr) || thoff + iphlen > iplen) 3373 return IPPROTO_DONE; 3374 if (m->m_len < hoff + iphlen + thoff) 3375 return IPPROTO_DONE; 3376 break; 3377 case IPPROTO_UDP: 3378 if (iplen < iphlen + sizeof(struct udphdr)) 3379 return IPPROTO_DONE; 3380 if (m->m_len < hoff + iphlen + sizeof(struct udphdr)) 3381 return IPPROTO_DONE; 3382 break; 3383 default: 3384 if (iplen < iphlen) 3385 return IPPROTO_DONE; 3386 break; 3387 } 3388 return ip->ip_p; 3389 } 3390 3391 static int 3392 hn_create_rx_data(struct hn_softc *sc, int ring_cnt) 3393 { 3394 struct sysctl_oid_list *child; 3395 struct sysctl_ctx_list *ctx; 3396 device_t dev = sc->hn_dev; 3397 #if defined(INET) || defined(INET6) 3398 #if __FreeBSD_version >= 1100095 3399 int lroent_cnt; 3400 #endif 3401 #endif 3402 int i; 3403 3404 /* 3405 * Create RXBUF for reception. 3406 * 3407 * NOTE: 3408 * - It is shared by all channels. 3409 * - A large enough buffer is allocated, certain version of NVSes 3410 * may further limit the usable space. 3411 */ 3412 sc->hn_rxbuf = hyperv_dmamem_alloc(bus_get_dma_tag(dev), 3413 PAGE_SIZE, 0, HN_RXBUF_SIZE, &sc->hn_rxbuf_dma, 3414 BUS_DMA_WAITOK | BUS_DMA_ZERO); 3415 if (sc->hn_rxbuf == NULL) { 3416 device_printf(sc->hn_dev, "allocate rxbuf failed\n"); 3417 return (ENOMEM); 3418 } 3419 3420 sc->hn_rx_ring_cnt = ring_cnt; 3421 sc->hn_rx_ring_inuse = sc->hn_rx_ring_cnt; 3422 3423 sc->hn_rx_ring = malloc(sizeof(struct hn_rx_ring) * sc->hn_rx_ring_cnt, 3424 M_DEVBUF, M_WAITOK | M_ZERO); 3425 3426 #if defined(INET) || defined(INET6) 3427 #if __FreeBSD_version >= 1100095 3428 lroent_cnt = hn_lro_entry_count; 3429 if (lroent_cnt < TCP_LRO_ENTRIES) 3430 lroent_cnt = TCP_LRO_ENTRIES; 3431 if (bootverbose) 3432 device_printf(dev, "LRO: entry count %d\n", lroent_cnt); 3433 #endif 3434 #endif /* INET || INET6 */ 3435 3436 ctx = device_get_sysctl_ctx(dev); 3437 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); 3438 3439 /* Create dev.hn.UNIT.rx sysctl tree */ 3440 sc->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rx", 3441 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 3442 3443 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 3444 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 3445 3446 rxr->hn_br = hyperv_dmamem_alloc(bus_get_dma_tag(dev), 3447 PAGE_SIZE, 0, HN_TXBR_SIZE + HN_RXBR_SIZE, 3448 &rxr->hn_br_dma, BUS_DMA_WAITOK); 3449 if (rxr->hn_br == NULL) { 3450 device_printf(dev, "allocate bufring failed\n"); 3451 return (ENOMEM); 3452 } 3453 3454 if (hn_trust_hosttcp) 3455 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_TCP; 3456 if (hn_trust_hostudp) 3457 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_UDP; 3458 if (hn_trust_hostip) 3459 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_IP; 3460 rxr->hn_ifp = sc->hn_ifp; 3461 if (i < sc->hn_tx_ring_cnt) 3462 rxr->hn_txr = &sc->hn_tx_ring[i]; 3463 rxr->hn_pktbuf_len = HN_PKTBUF_LEN_DEF; 3464 rxr->hn_pktbuf = malloc(rxr->hn_pktbuf_len, M_DEVBUF, M_WAITOK); 3465 rxr->hn_rx_idx = i; 3466 rxr->hn_rxbuf = sc->hn_rxbuf; 3467 3468 /* 3469 * Initialize LRO. 3470 */ 3471 #if defined(INET) || defined(INET6) 3472 #if __FreeBSD_version >= 1100095 3473 tcp_lro_init_args(&rxr->hn_lro, sc->hn_ifp, lroent_cnt, 3474 hn_lro_mbufq_depth); 3475 #else 3476 tcp_lro_init(&rxr->hn_lro); 3477 rxr->hn_lro.ifp = sc->hn_ifp; 3478 #endif 3479 #if __FreeBSD_version >= 1100099 3480 rxr->hn_lro.lro_length_lim = HN_LRO_LENLIM_DEF; 3481 rxr->hn_lro.lro_ackcnt_lim = HN_LRO_ACKCNT_DEF; 3482 #endif 3483 #endif /* INET || INET6 */ 3484 3485 if (sc->hn_rx_sysctl_tree != NULL) { 3486 char name[16]; 3487 3488 /* 3489 * Create per RX ring sysctl tree: 3490 * dev.hn.UNIT.rx.RINGID 3491 */ 3492 snprintf(name, sizeof(name), "%d", i); 3493 rxr->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, 3494 SYSCTL_CHILDREN(sc->hn_rx_sysctl_tree), 3495 OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 3496 3497 if (rxr->hn_rx_sysctl_tree != NULL) { 3498 SYSCTL_ADD_ULONG(ctx, 3499 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), 3500 OID_AUTO, "packets", CTLFLAG_RW, 3501 &rxr->hn_pkts, "# of packets received"); 3502 SYSCTL_ADD_ULONG(ctx, 3503 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), 3504 OID_AUTO, "rss_pkts", CTLFLAG_RW, 3505 &rxr->hn_rss_pkts, 3506 "# of packets w/ RSS info received"); 3507 SYSCTL_ADD_INT(ctx, 3508 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), 3509 OID_AUTO, "pktbuf_len", CTLFLAG_RD, 3510 &rxr->hn_pktbuf_len, 0, 3511 "Temporary channel packet buffer length"); 3512 } 3513 } 3514 } 3515 3516 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_queued", 3517 CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3518 __offsetof(struct hn_rx_ring, hn_lro.lro_queued), 3519 #if __FreeBSD_version < 1100095 3520 hn_rx_stat_int_sysctl, 3521 #else 3522 hn_rx_stat_u64_sysctl, 3523 #endif 3524 "LU", "LRO queued"); 3525 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_flushed", 3526 CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3527 __offsetof(struct hn_rx_ring, hn_lro.lro_flushed), 3528 #if __FreeBSD_version < 1100095 3529 hn_rx_stat_int_sysctl, 3530 #else 3531 hn_rx_stat_u64_sysctl, 3532 #endif 3533 "LU", "LRO flushed"); 3534 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_tried", 3535 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3536 __offsetof(struct hn_rx_ring, hn_lro_tried), 3537 hn_rx_stat_ulong_sysctl, "LU", "# of LRO tries"); 3538 #if __FreeBSD_version >= 1100099 3539 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_length_lim", 3540 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 3541 hn_lro_lenlim_sysctl, "IU", 3542 "Max # of data bytes to be aggregated by LRO"); 3543 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_ackcnt_lim", 3544 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 3545 hn_lro_ackcnt_sysctl, "I", 3546 "Max # of ACKs to be aggregated by LRO"); 3547 #endif 3548 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hosttcp", 3549 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_TCP, 3550 hn_trust_hcsum_sysctl, "I", 3551 "Trust tcp segement verification on host side, " 3552 "when csum info is missing"); 3553 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostudp", 3554 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_UDP, 3555 hn_trust_hcsum_sysctl, "I", 3556 "Trust udp datagram verification on host side, " 3557 "when csum info is missing"); 3558 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostip", 3559 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_IP, 3560 hn_trust_hcsum_sysctl, "I", 3561 "Trust ip packet verification on host side, " 3562 "when csum info is missing"); 3563 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_ip", 3564 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3565 __offsetof(struct hn_rx_ring, hn_csum_ip), 3566 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM IP"); 3567 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_tcp", 3568 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3569 __offsetof(struct hn_rx_ring, hn_csum_tcp), 3570 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM TCP"); 3571 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_udp", 3572 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3573 __offsetof(struct hn_rx_ring, hn_csum_udp), 3574 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM UDP"); 3575 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_trusted", 3576 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3577 __offsetof(struct hn_rx_ring, hn_csum_trusted), 3578 hn_rx_stat_ulong_sysctl, "LU", 3579 "# of packets that we trust host's csum verification"); 3580 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "small_pkts", 3581 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3582 __offsetof(struct hn_rx_ring, hn_small_pkts), 3583 hn_rx_stat_ulong_sysctl, "LU", "# of small packets received"); 3584 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_ack_failed", 3585 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3586 __offsetof(struct hn_rx_ring, hn_ack_failed), 3587 hn_rx_stat_ulong_sysctl, "LU", "# of RXBUF ack failures"); 3588 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_cnt", 3589 CTLFLAG_RD, &sc->hn_rx_ring_cnt, 0, "# created RX rings"); 3590 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_inuse", 3591 CTLFLAG_RD, &sc->hn_rx_ring_inuse, 0, "# used RX rings"); 3592 3593 return (0); 3594 } 3595 3596 static void 3597 hn_destroy_rx_data(struct hn_softc *sc) 3598 { 3599 int i; 3600 3601 if (sc->hn_rxbuf != NULL) { 3602 if ((sc->hn_flags & HN_FLAG_RXBUF_REF) == 0) 3603 hyperv_dmamem_free(&sc->hn_rxbuf_dma, sc->hn_rxbuf); 3604 else 3605 device_printf(sc->hn_dev, "RXBUF is referenced\n"); 3606 sc->hn_rxbuf = NULL; 3607 } 3608 3609 if (sc->hn_rx_ring_cnt == 0) 3610 return; 3611 3612 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 3613 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 3614 3615 if (rxr->hn_br == NULL) 3616 continue; 3617 if ((rxr->hn_rx_flags & HN_RX_FLAG_BR_REF) == 0) { 3618 hyperv_dmamem_free(&rxr->hn_br_dma, rxr->hn_br); 3619 } else { 3620 device_printf(sc->hn_dev, 3621 "%dth channel bufring is referenced", i); 3622 } 3623 rxr->hn_br = NULL; 3624 3625 #if defined(INET) || defined(INET6) 3626 tcp_lro_free(&rxr->hn_lro); 3627 #endif 3628 free(rxr->hn_pktbuf, M_DEVBUF); 3629 } 3630 free(sc->hn_rx_ring, M_DEVBUF); 3631 sc->hn_rx_ring = NULL; 3632 3633 sc->hn_rx_ring_cnt = 0; 3634 sc->hn_rx_ring_inuse = 0; 3635 } 3636 3637 static int 3638 hn_tx_ring_create(struct hn_softc *sc, int id) 3639 { 3640 struct hn_tx_ring *txr = &sc->hn_tx_ring[id]; 3641 device_t dev = sc->hn_dev; 3642 bus_dma_tag_t parent_dtag; 3643 int error, i; 3644 3645 txr->hn_sc = sc; 3646 txr->hn_tx_idx = id; 3647 3648 #ifndef HN_USE_TXDESC_BUFRING 3649 mtx_init(&txr->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN); 3650 #endif 3651 mtx_init(&txr->hn_tx_lock, "hn tx", NULL, MTX_DEF); 3652 3653 txr->hn_txdesc_cnt = HN_TX_DESC_CNT; 3654 txr->hn_txdesc = malloc(sizeof(struct hn_txdesc) * txr->hn_txdesc_cnt, 3655 M_DEVBUF, M_WAITOK | M_ZERO); 3656 #ifndef HN_USE_TXDESC_BUFRING 3657 SLIST_INIT(&txr->hn_txlist); 3658 #else 3659 txr->hn_txdesc_br = buf_ring_alloc(txr->hn_txdesc_cnt, M_DEVBUF, 3660 M_WAITOK, &txr->hn_tx_lock); 3661 #endif 3662 3663 if (hn_tx_taskq_mode == HN_TX_TASKQ_M_EVTTQ) { 3664 txr->hn_tx_taskq = VMBUS_GET_EVENT_TASKQ( 3665 device_get_parent(dev), dev, HN_RING_IDX2CPU(sc, id)); 3666 } else { 3667 txr->hn_tx_taskq = sc->hn_tx_taskqs[id % hn_tx_taskq_cnt]; 3668 } 3669 3670 #ifdef HN_IFSTART_SUPPORT 3671 if (hn_use_if_start) { 3672 txr->hn_txeof = hn_start_txeof; 3673 TASK_INIT(&txr->hn_tx_task, 0, hn_start_taskfunc, txr); 3674 TASK_INIT(&txr->hn_txeof_task, 0, hn_start_txeof_taskfunc, txr); 3675 } else 3676 #endif 3677 { 3678 int br_depth; 3679 3680 txr->hn_txeof = hn_xmit_txeof; 3681 TASK_INIT(&txr->hn_tx_task, 0, hn_xmit_taskfunc, txr); 3682 TASK_INIT(&txr->hn_txeof_task, 0, hn_xmit_txeof_taskfunc, txr); 3683 3684 br_depth = hn_get_txswq_depth(txr); 3685 txr->hn_mbuf_br = buf_ring_alloc(br_depth, M_DEVBUF, 3686 M_WAITOK, &txr->hn_tx_lock); 3687 } 3688 3689 txr->hn_direct_tx_size = hn_direct_tx_size; 3690 3691 /* 3692 * Always schedule transmission instead of trying to do direct 3693 * transmission. This one gives the best performance so far. 3694 */ 3695 txr->hn_sched_tx = 1; 3696 3697 parent_dtag = bus_get_dma_tag(dev); 3698 3699 /* DMA tag for RNDIS packet messages. */ 3700 error = bus_dma_tag_create(parent_dtag, /* parent */ 3701 HN_RNDIS_PKT_ALIGN, /* alignment */ 3702 HN_RNDIS_PKT_BOUNDARY, /* boundary */ 3703 BUS_SPACE_MAXADDR, /* lowaddr */ 3704 BUS_SPACE_MAXADDR, /* highaddr */ 3705 NULL, NULL, /* filter, filterarg */ 3706 HN_RNDIS_PKT_LEN, /* maxsize */ 3707 1, /* nsegments */ 3708 HN_RNDIS_PKT_LEN, /* maxsegsize */ 3709 0, /* flags */ 3710 NULL, /* lockfunc */ 3711 NULL, /* lockfuncarg */ 3712 &txr->hn_tx_rndis_dtag); 3713 if (error) { 3714 device_printf(dev, "failed to create rndis dmatag\n"); 3715 return error; 3716 } 3717 3718 /* DMA tag for data. */ 3719 error = bus_dma_tag_create(parent_dtag, /* parent */ 3720 1, /* alignment */ 3721 HN_TX_DATA_BOUNDARY, /* boundary */ 3722 BUS_SPACE_MAXADDR, /* lowaddr */ 3723 BUS_SPACE_MAXADDR, /* highaddr */ 3724 NULL, NULL, /* filter, filterarg */ 3725 HN_TX_DATA_MAXSIZE, /* maxsize */ 3726 HN_TX_DATA_SEGCNT_MAX, /* nsegments */ 3727 HN_TX_DATA_SEGSIZE, /* maxsegsize */ 3728 0, /* flags */ 3729 NULL, /* lockfunc */ 3730 NULL, /* lockfuncarg */ 3731 &txr->hn_tx_data_dtag); 3732 if (error) { 3733 device_printf(dev, "failed to create data dmatag\n"); 3734 return error; 3735 } 3736 3737 for (i = 0; i < txr->hn_txdesc_cnt; ++i) { 3738 struct hn_txdesc *txd = &txr->hn_txdesc[i]; 3739 3740 txd->txr = txr; 3741 txd->chim_index = HN_NVS_CHIM_IDX_INVALID; 3742 STAILQ_INIT(&txd->agg_list); 3743 3744 /* 3745 * Allocate and load RNDIS packet message. 3746 */ 3747 error = bus_dmamem_alloc(txr->hn_tx_rndis_dtag, 3748 (void **)&txd->rndis_pkt, 3749 BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, 3750 &txd->rndis_pkt_dmap); 3751 if (error) { 3752 device_printf(dev, 3753 "failed to allocate rndis_packet_msg, %d\n", i); 3754 return error; 3755 } 3756 3757 error = bus_dmamap_load(txr->hn_tx_rndis_dtag, 3758 txd->rndis_pkt_dmap, 3759 txd->rndis_pkt, HN_RNDIS_PKT_LEN, 3760 hyperv_dma_map_paddr, &txd->rndis_pkt_paddr, 3761 BUS_DMA_NOWAIT); 3762 if (error) { 3763 device_printf(dev, 3764 "failed to load rndis_packet_msg, %d\n", i); 3765 bus_dmamem_free(txr->hn_tx_rndis_dtag, 3766 txd->rndis_pkt, txd->rndis_pkt_dmap); 3767 return error; 3768 } 3769 3770 /* DMA map for TX data. */ 3771 error = bus_dmamap_create(txr->hn_tx_data_dtag, 0, 3772 &txd->data_dmap); 3773 if (error) { 3774 device_printf(dev, 3775 "failed to allocate tx data dmamap\n"); 3776 bus_dmamap_unload(txr->hn_tx_rndis_dtag, 3777 txd->rndis_pkt_dmap); 3778 bus_dmamem_free(txr->hn_tx_rndis_dtag, 3779 txd->rndis_pkt, txd->rndis_pkt_dmap); 3780 return error; 3781 } 3782 3783 /* All set, put it to list */ 3784 txd->flags |= HN_TXD_FLAG_ONLIST; 3785 #ifndef HN_USE_TXDESC_BUFRING 3786 SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link); 3787 #else 3788 buf_ring_enqueue(txr->hn_txdesc_br, txd); 3789 #endif 3790 } 3791 txr->hn_txdesc_avail = txr->hn_txdesc_cnt; 3792 3793 if (sc->hn_tx_sysctl_tree != NULL) { 3794 struct sysctl_oid_list *child; 3795 struct sysctl_ctx_list *ctx; 3796 char name[16]; 3797 3798 /* 3799 * Create per TX ring sysctl tree: 3800 * dev.hn.UNIT.tx.RINGID 3801 */ 3802 ctx = device_get_sysctl_ctx(dev); 3803 child = SYSCTL_CHILDREN(sc->hn_tx_sysctl_tree); 3804 3805 snprintf(name, sizeof(name), "%d", id); 3806 txr->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, 3807 name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 3808 3809 if (txr->hn_tx_sysctl_tree != NULL) { 3810 child = SYSCTL_CHILDREN(txr->hn_tx_sysctl_tree); 3811 3812 #ifdef HN_DEBUG 3813 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail", 3814 CTLFLAG_RD, &txr->hn_txdesc_avail, 0, 3815 "# of available TX descs"); 3816 #endif 3817 #ifdef HN_IFSTART_SUPPORT 3818 if (!hn_use_if_start) 3819 #endif 3820 { 3821 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "oactive", 3822 CTLFLAG_RD, &txr->hn_oactive, 0, 3823 "over active"); 3824 } 3825 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "packets", 3826 CTLFLAG_RW, &txr->hn_pkts, 3827 "# of packets transmitted"); 3828 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "sends", 3829 CTLFLAG_RW, &txr->hn_sends, "# of sends"); 3830 } 3831 } 3832 3833 return 0; 3834 } 3835 3836 static void 3837 hn_txdesc_dmamap_destroy(struct hn_txdesc *txd) 3838 { 3839 struct hn_tx_ring *txr = txd->txr; 3840 3841 KASSERT(txd->m == NULL, ("still has mbuf installed")); 3842 KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("still dma mapped")); 3843 3844 bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_pkt_dmap); 3845 bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_pkt, 3846 txd->rndis_pkt_dmap); 3847 bus_dmamap_destroy(txr->hn_tx_data_dtag, txd->data_dmap); 3848 } 3849 3850 static void 3851 hn_txdesc_gc(struct hn_tx_ring *txr, struct hn_txdesc *txd) 3852 { 3853 3854 KASSERT(txd->refs == 0 || txd->refs == 1, 3855 ("invalid txd refs %d", txd->refs)); 3856 3857 /* Aggregated txds will be freed by their aggregating txd. */ 3858 if (txd->refs > 0 && (txd->flags & HN_TXD_FLAG_ONAGG) == 0) { 3859 int freed; 3860 3861 freed = hn_txdesc_put(txr, txd); 3862 KASSERT(freed, ("can't free txdesc")); 3863 } 3864 } 3865 3866 static void 3867 hn_tx_ring_destroy(struct hn_tx_ring *txr) 3868 { 3869 int i; 3870 3871 if (txr->hn_txdesc == NULL) 3872 return; 3873 3874 /* 3875 * NOTE: 3876 * Because the freeing of aggregated txds will be deferred 3877 * to the aggregating txd, two passes are used here: 3878 * - The first pass GCes any pending txds. This GC is necessary, 3879 * since if the channels are revoked, hypervisor will not 3880 * deliver send-done for all pending txds. 3881 * - The second pass frees the busdma stuffs, i.e. after all txds 3882 * were freed. 3883 */ 3884 for (i = 0; i < txr->hn_txdesc_cnt; ++i) 3885 hn_txdesc_gc(txr, &txr->hn_txdesc[i]); 3886 for (i = 0; i < txr->hn_txdesc_cnt; ++i) 3887 hn_txdesc_dmamap_destroy(&txr->hn_txdesc[i]); 3888 3889 if (txr->hn_tx_data_dtag != NULL) 3890 bus_dma_tag_destroy(txr->hn_tx_data_dtag); 3891 if (txr->hn_tx_rndis_dtag != NULL) 3892 bus_dma_tag_destroy(txr->hn_tx_rndis_dtag); 3893 3894 #ifdef HN_USE_TXDESC_BUFRING 3895 buf_ring_free(txr->hn_txdesc_br, M_DEVBUF); 3896 #endif 3897 3898 free(txr->hn_txdesc, M_DEVBUF); 3899 txr->hn_txdesc = NULL; 3900 3901 if (txr->hn_mbuf_br != NULL) 3902 buf_ring_free(txr->hn_mbuf_br, M_DEVBUF); 3903 3904 #ifndef HN_USE_TXDESC_BUFRING 3905 mtx_destroy(&txr->hn_txlist_spin); 3906 #endif 3907 mtx_destroy(&txr->hn_tx_lock); 3908 } 3909 3910 static int 3911 hn_create_tx_data(struct hn_softc *sc, int ring_cnt) 3912 { 3913 struct sysctl_oid_list *child; 3914 struct sysctl_ctx_list *ctx; 3915 int i; 3916 3917 /* 3918 * Create TXBUF for chimney sending. 3919 * 3920 * NOTE: It is shared by all channels. 3921 */ 3922 sc->hn_chim = hyperv_dmamem_alloc(bus_get_dma_tag(sc->hn_dev), 3923 PAGE_SIZE, 0, HN_CHIM_SIZE, &sc->hn_chim_dma, 3924 BUS_DMA_WAITOK | BUS_DMA_ZERO); 3925 if (sc->hn_chim == NULL) { 3926 device_printf(sc->hn_dev, "allocate txbuf failed\n"); 3927 return (ENOMEM); 3928 } 3929 3930 sc->hn_tx_ring_cnt = ring_cnt; 3931 sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt; 3932 3933 sc->hn_tx_ring = malloc(sizeof(struct hn_tx_ring) * sc->hn_tx_ring_cnt, 3934 M_DEVBUF, M_WAITOK | M_ZERO); 3935 3936 ctx = device_get_sysctl_ctx(sc->hn_dev); 3937 child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->hn_dev)); 3938 3939 /* Create dev.hn.UNIT.tx sysctl tree */ 3940 sc->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "tx", 3941 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 3942 3943 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 3944 int error; 3945 3946 error = hn_tx_ring_create(sc, i); 3947 if (error) 3948 return error; 3949 } 3950 3951 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "no_txdescs", 3952 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3953 __offsetof(struct hn_tx_ring, hn_no_txdescs), 3954 hn_tx_stat_ulong_sysctl, "LU", "# of times short of TX descs"); 3955 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "send_failed", 3956 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3957 __offsetof(struct hn_tx_ring, hn_send_failed), 3958 hn_tx_stat_ulong_sysctl, "LU", "# of hyper-v sending failure"); 3959 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txdma_failed", 3960 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3961 __offsetof(struct hn_tx_ring, hn_txdma_failed), 3962 hn_tx_stat_ulong_sysctl, "LU", "# of TX DMA failure"); 3963 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_flush_failed", 3964 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3965 __offsetof(struct hn_tx_ring, hn_flush_failed), 3966 hn_tx_stat_ulong_sysctl, "LU", 3967 "# of packet transmission aggregation flush failure"); 3968 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_collapsed", 3969 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3970 __offsetof(struct hn_tx_ring, hn_tx_collapsed), 3971 hn_tx_stat_ulong_sysctl, "LU", "# of TX mbuf collapsed"); 3972 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney", 3973 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3974 __offsetof(struct hn_tx_ring, hn_tx_chimney), 3975 hn_tx_stat_ulong_sysctl, "LU", "# of chimney send"); 3976 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_tried", 3977 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3978 __offsetof(struct hn_tx_ring, hn_tx_chimney_tried), 3979 hn_tx_stat_ulong_sysctl, "LU", "# of chimney send tries"); 3980 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt", 3981 CTLFLAG_RD, &sc->hn_tx_ring[0].hn_txdesc_cnt, 0, 3982 "# of total TX descs"); 3983 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max", 3984 CTLFLAG_RD, &sc->hn_chim_szmax, 0, 3985 "Chimney send packet size upper boundary"); 3986 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size", 3987 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 3988 hn_chim_size_sysctl, "I", "Chimney send packet size limit"); 3989 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "direct_tx_size", 3990 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3991 __offsetof(struct hn_tx_ring, hn_direct_tx_size), 3992 hn_tx_conf_int_sysctl, "I", 3993 "Size of the packet for direct transmission"); 3994 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "sched_tx", 3995 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3996 __offsetof(struct hn_tx_ring, hn_sched_tx), 3997 hn_tx_conf_int_sysctl, "I", 3998 "Always schedule transmission " 3999 "instead of doing direct transmission"); 4000 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_cnt", 4001 CTLFLAG_RD, &sc->hn_tx_ring_cnt, 0, "# created TX rings"); 4002 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_inuse", 4003 CTLFLAG_RD, &sc->hn_tx_ring_inuse, 0, "# used TX rings"); 4004 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "agg_szmax", 4005 CTLFLAG_RD, &sc->hn_tx_ring[0].hn_agg_szmax, 0, 4006 "Applied packet transmission aggregation size"); 4007 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pktmax", 4008 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 4009 hn_txagg_pktmax_sysctl, "I", 4010 "Applied packet transmission aggregation packets"); 4011 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_align", 4012 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 4013 hn_txagg_align_sysctl, "I", 4014 "Applied packet transmission aggregation alignment"); 4015 4016 return 0; 4017 } 4018 4019 static void 4020 hn_set_chim_size(struct hn_softc *sc, int chim_size) 4021 { 4022 int i; 4023 4024 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 4025 sc->hn_tx_ring[i].hn_chim_size = chim_size; 4026 } 4027 4028 static void 4029 hn_set_tso_maxsize(struct hn_softc *sc, int tso_maxlen, int mtu) 4030 { 4031 struct ifnet *ifp = sc->hn_ifp; 4032 int tso_minlen; 4033 4034 if ((ifp->if_capabilities & (IFCAP_TSO4 | IFCAP_TSO6)) == 0) 4035 return; 4036 4037 KASSERT(sc->hn_ndis_tso_sgmin >= 2, 4038 ("invalid NDIS tso sgmin %d", sc->hn_ndis_tso_sgmin)); 4039 tso_minlen = sc->hn_ndis_tso_sgmin * mtu; 4040 4041 KASSERT(sc->hn_ndis_tso_szmax >= tso_minlen && 4042 sc->hn_ndis_tso_szmax <= IP_MAXPACKET, 4043 ("invalid NDIS tso szmax %d", sc->hn_ndis_tso_szmax)); 4044 4045 if (tso_maxlen < tso_minlen) 4046 tso_maxlen = tso_minlen; 4047 else if (tso_maxlen > IP_MAXPACKET) 4048 tso_maxlen = IP_MAXPACKET; 4049 if (tso_maxlen > sc->hn_ndis_tso_szmax) 4050 tso_maxlen = sc->hn_ndis_tso_szmax; 4051 ifp->if_hw_tsomax = tso_maxlen - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); 4052 if (bootverbose) 4053 if_printf(ifp, "TSO size max %u\n", ifp->if_hw_tsomax); 4054 } 4055 4056 static void 4057 hn_fixup_tx_data(struct hn_softc *sc) 4058 { 4059 uint64_t csum_assist; 4060 int i; 4061 4062 hn_set_chim_size(sc, sc->hn_chim_szmax); 4063 if (hn_tx_chimney_size > 0 && 4064 hn_tx_chimney_size < sc->hn_chim_szmax) 4065 hn_set_chim_size(sc, hn_tx_chimney_size); 4066 4067 csum_assist = 0; 4068 if (sc->hn_caps & HN_CAP_IPCS) 4069 csum_assist |= CSUM_IP; 4070 if (sc->hn_caps & HN_CAP_TCP4CS) 4071 csum_assist |= CSUM_IP_TCP; 4072 if (sc->hn_caps & HN_CAP_UDP4CS) 4073 csum_assist |= CSUM_IP_UDP; 4074 if (sc->hn_caps & HN_CAP_TCP6CS) 4075 csum_assist |= CSUM_IP6_TCP; 4076 if (sc->hn_caps & HN_CAP_UDP6CS) 4077 csum_assist |= CSUM_IP6_UDP; 4078 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 4079 sc->hn_tx_ring[i].hn_csum_assist = csum_assist; 4080 4081 if (sc->hn_caps & HN_CAP_HASHVAL) { 4082 /* 4083 * Support HASHVAL pktinfo on TX path. 4084 */ 4085 if (bootverbose) 4086 if_printf(sc->hn_ifp, "support HASHVAL pktinfo\n"); 4087 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 4088 sc->hn_tx_ring[i].hn_tx_flags |= HN_TX_FLAG_HASHVAL; 4089 } 4090 } 4091 4092 static void 4093 hn_destroy_tx_data(struct hn_softc *sc) 4094 { 4095 int i; 4096 4097 if (sc->hn_chim != NULL) { 4098 if ((sc->hn_flags & HN_FLAG_CHIM_REF) == 0) { 4099 hyperv_dmamem_free(&sc->hn_chim_dma, sc->hn_chim); 4100 } else { 4101 device_printf(sc->hn_dev, 4102 "chimney sending buffer is referenced"); 4103 } 4104 sc->hn_chim = NULL; 4105 } 4106 4107 if (sc->hn_tx_ring_cnt == 0) 4108 return; 4109 4110 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 4111 hn_tx_ring_destroy(&sc->hn_tx_ring[i]); 4112 4113 free(sc->hn_tx_ring, M_DEVBUF); 4114 sc->hn_tx_ring = NULL; 4115 4116 sc->hn_tx_ring_cnt = 0; 4117 sc->hn_tx_ring_inuse = 0; 4118 } 4119 4120 #ifdef HN_IFSTART_SUPPORT 4121 4122 static void 4123 hn_start_taskfunc(void *xtxr, int pending __unused) 4124 { 4125 struct hn_tx_ring *txr = xtxr; 4126 4127 mtx_lock(&txr->hn_tx_lock); 4128 hn_start_locked(txr, 0); 4129 mtx_unlock(&txr->hn_tx_lock); 4130 } 4131 4132 static int 4133 hn_start_locked(struct hn_tx_ring *txr, int len) 4134 { 4135 struct hn_softc *sc = txr->hn_sc; 4136 struct ifnet *ifp = sc->hn_ifp; 4137 int sched = 0; 4138 4139 KASSERT(hn_use_if_start, 4140 ("hn_start_locked is called, when if_start is disabled")); 4141 KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring")); 4142 mtx_assert(&txr->hn_tx_lock, MA_OWNED); 4143 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc")); 4144 4145 if (__predict_false(txr->hn_suspended)) 4146 return (0); 4147 4148 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != 4149 IFF_DRV_RUNNING) 4150 return (0); 4151 4152 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { 4153 struct hn_txdesc *txd; 4154 struct mbuf *m_head; 4155 int error; 4156 4157 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); 4158 if (m_head == NULL) 4159 break; 4160 4161 if (len > 0 && m_head->m_pkthdr.len > len) { 4162 /* 4163 * This sending could be time consuming; let callers 4164 * dispatch this packet sending (and sending of any 4165 * following up packets) to tx taskqueue. 4166 */ 4167 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 4168 sched = 1; 4169 break; 4170 } 4171 4172 #if defined(INET6) || defined(INET) 4173 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 4174 m_head = hn_tso_fixup(m_head); 4175 if (__predict_false(m_head == NULL)) { 4176 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 4177 continue; 4178 } 4179 } 4180 #endif 4181 4182 txd = hn_txdesc_get(txr); 4183 if (txd == NULL) { 4184 txr->hn_no_txdescs++; 4185 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 4186 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 4187 break; 4188 } 4189 4190 error = hn_encap(ifp, txr, txd, &m_head); 4191 if (error) { 4192 /* Both txd and m_head are freed */ 4193 KASSERT(txr->hn_agg_txd == NULL, 4194 ("encap failed w/ pending aggregating txdesc")); 4195 continue; 4196 } 4197 4198 if (txr->hn_agg_pktleft == 0) { 4199 if (txr->hn_agg_txd != NULL) { 4200 KASSERT(m_head == NULL, 4201 ("pending mbuf for aggregating txdesc")); 4202 error = hn_flush_txagg(ifp, txr); 4203 if (__predict_false(error)) { 4204 atomic_set_int(&ifp->if_drv_flags, 4205 IFF_DRV_OACTIVE); 4206 break; 4207 } 4208 } else { 4209 KASSERT(m_head != NULL, ("mbuf was freed")); 4210 error = hn_txpkt(ifp, txr, txd); 4211 if (__predict_false(error)) { 4212 /* txd is freed, but m_head is not */ 4213 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 4214 atomic_set_int(&ifp->if_drv_flags, 4215 IFF_DRV_OACTIVE); 4216 break; 4217 } 4218 } 4219 } 4220 #ifdef INVARIANTS 4221 else { 4222 KASSERT(txr->hn_agg_txd != NULL, 4223 ("no aggregating txdesc")); 4224 KASSERT(m_head == NULL, 4225 ("pending mbuf for aggregating txdesc")); 4226 } 4227 #endif 4228 } 4229 4230 /* Flush pending aggerated transmission. */ 4231 if (txr->hn_agg_txd != NULL) 4232 hn_flush_txagg(ifp, txr); 4233 return (sched); 4234 } 4235 4236 static void 4237 hn_start(struct ifnet *ifp) 4238 { 4239 struct hn_softc *sc = ifp->if_softc; 4240 struct hn_tx_ring *txr = &sc->hn_tx_ring[0]; 4241 4242 if (txr->hn_sched_tx) 4243 goto do_sched; 4244 4245 if (mtx_trylock(&txr->hn_tx_lock)) { 4246 int sched; 4247 4248 sched = hn_start_locked(txr, txr->hn_direct_tx_size); 4249 mtx_unlock(&txr->hn_tx_lock); 4250 if (!sched) 4251 return; 4252 } 4253 do_sched: 4254 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); 4255 } 4256 4257 static void 4258 hn_start_txeof_taskfunc(void *xtxr, int pending __unused) 4259 { 4260 struct hn_tx_ring *txr = xtxr; 4261 4262 mtx_lock(&txr->hn_tx_lock); 4263 atomic_clear_int(&txr->hn_sc->hn_ifp->if_drv_flags, IFF_DRV_OACTIVE); 4264 hn_start_locked(txr, 0); 4265 mtx_unlock(&txr->hn_tx_lock); 4266 } 4267 4268 static void 4269 hn_start_txeof(struct hn_tx_ring *txr) 4270 { 4271 struct hn_softc *sc = txr->hn_sc; 4272 struct ifnet *ifp = sc->hn_ifp; 4273 4274 KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring")); 4275 4276 if (txr->hn_sched_tx) 4277 goto do_sched; 4278 4279 if (mtx_trylock(&txr->hn_tx_lock)) { 4280 int sched; 4281 4282 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 4283 sched = hn_start_locked(txr, txr->hn_direct_tx_size); 4284 mtx_unlock(&txr->hn_tx_lock); 4285 if (sched) { 4286 taskqueue_enqueue(txr->hn_tx_taskq, 4287 &txr->hn_tx_task); 4288 } 4289 } else { 4290 do_sched: 4291 /* 4292 * Release the OACTIVE earlier, with the hope, that 4293 * others could catch up. The task will clear the 4294 * flag again with the hn_tx_lock to avoid possible 4295 * races. 4296 */ 4297 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 4298 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); 4299 } 4300 } 4301 4302 #endif /* HN_IFSTART_SUPPORT */ 4303 4304 static int 4305 hn_xmit(struct hn_tx_ring *txr, int len) 4306 { 4307 struct hn_softc *sc = txr->hn_sc; 4308 struct ifnet *ifp = sc->hn_ifp; 4309 struct mbuf *m_head; 4310 int sched = 0; 4311 4312 mtx_assert(&txr->hn_tx_lock, MA_OWNED); 4313 #ifdef HN_IFSTART_SUPPORT 4314 KASSERT(hn_use_if_start == 0, 4315 ("hn_xmit is called, when if_start is enabled")); 4316 #endif 4317 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc")); 4318 4319 if (__predict_false(txr->hn_suspended)) 4320 return (0); 4321 4322 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || txr->hn_oactive) 4323 return (0); 4324 4325 while ((m_head = drbr_peek(ifp, txr->hn_mbuf_br)) != NULL) { 4326 struct hn_txdesc *txd; 4327 int error; 4328 4329 if (len > 0 && m_head->m_pkthdr.len > len) { 4330 /* 4331 * This sending could be time consuming; let callers 4332 * dispatch this packet sending (and sending of any 4333 * following up packets) to tx taskqueue. 4334 */ 4335 drbr_putback(ifp, txr->hn_mbuf_br, m_head); 4336 sched = 1; 4337 break; 4338 } 4339 4340 txd = hn_txdesc_get(txr); 4341 if (txd == NULL) { 4342 txr->hn_no_txdescs++; 4343 drbr_putback(ifp, txr->hn_mbuf_br, m_head); 4344 txr->hn_oactive = 1; 4345 break; 4346 } 4347 4348 error = hn_encap(ifp, txr, txd, &m_head); 4349 if (error) { 4350 /* Both txd and m_head are freed; discard */ 4351 KASSERT(txr->hn_agg_txd == NULL, 4352 ("encap failed w/ pending aggregating txdesc")); 4353 drbr_advance(ifp, txr->hn_mbuf_br); 4354 continue; 4355 } 4356 4357 if (txr->hn_agg_pktleft == 0) { 4358 if (txr->hn_agg_txd != NULL) { 4359 KASSERT(m_head == NULL, 4360 ("pending mbuf for aggregating txdesc")); 4361 error = hn_flush_txagg(ifp, txr); 4362 if (__predict_false(error)) { 4363 txr->hn_oactive = 1; 4364 break; 4365 } 4366 } else { 4367 KASSERT(m_head != NULL, ("mbuf was freed")); 4368 error = hn_txpkt(ifp, txr, txd); 4369 if (__predict_false(error)) { 4370 /* txd is freed, but m_head is not */ 4371 drbr_putback(ifp, txr->hn_mbuf_br, 4372 m_head); 4373 txr->hn_oactive = 1; 4374 break; 4375 } 4376 } 4377 } 4378 #ifdef INVARIANTS 4379 else { 4380 KASSERT(txr->hn_agg_txd != NULL, 4381 ("no aggregating txdesc")); 4382 KASSERT(m_head == NULL, 4383 ("pending mbuf for aggregating txdesc")); 4384 } 4385 #endif 4386 4387 /* Sent */ 4388 drbr_advance(ifp, txr->hn_mbuf_br); 4389 } 4390 4391 /* Flush pending aggerated transmission. */ 4392 if (txr->hn_agg_txd != NULL) 4393 hn_flush_txagg(ifp, txr); 4394 return (sched); 4395 } 4396 4397 static int 4398 hn_transmit(struct ifnet *ifp, struct mbuf *m) 4399 { 4400 struct hn_softc *sc = ifp->if_softc; 4401 struct hn_tx_ring *txr; 4402 int error, idx = 0; 4403 4404 #if defined(INET6) || defined(INET) 4405 /* 4406 * Perform TSO packet header fixup now, since the TSO 4407 * packet header should be cache-hot. 4408 */ 4409 if (m->m_pkthdr.csum_flags & CSUM_TSO) { 4410 m = hn_tso_fixup(m); 4411 if (__predict_false(m == NULL)) { 4412 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 4413 return EIO; 4414 } 4415 } 4416 #endif 4417 4418 /* 4419 * Select the TX ring based on flowid 4420 */ 4421 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 4422 #ifdef RSS 4423 uint32_t bid; 4424 4425 if (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m), 4426 &bid) == 0) 4427 idx = bid % sc->hn_tx_ring_inuse; 4428 else 4429 #endif 4430 { 4431 #if defined(INET6) || defined(INET) 4432 int tcpsyn = 0; 4433 4434 if (m->m_pkthdr.len < 128 && 4435 (m->m_pkthdr.csum_flags & 4436 (CSUM_IP_TCP | CSUM_IP6_TCP)) && 4437 (m->m_pkthdr.csum_flags & CSUM_TSO) == 0) { 4438 m = hn_check_tcpsyn(m, &tcpsyn); 4439 if (__predict_false(m == NULL)) { 4440 if_inc_counter(ifp, 4441 IFCOUNTER_OERRORS, 1); 4442 return (EIO); 4443 } 4444 } 4445 #else 4446 const int tcpsyn = 0; 4447 #endif 4448 if (tcpsyn) 4449 idx = 0; 4450 else 4451 idx = m->m_pkthdr.flowid % sc->hn_tx_ring_inuse; 4452 } 4453 } 4454 txr = &sc->hn_tx_ring[idx]; 4455 4456 error = drbr_enqueue(ifp, txr->hn_mbuf_br, m); 4457 if (error) { 4458 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); 4459 return error; 4460 } 4461 4462 if (txr->hn_oactive) 4463 return 0; 4464 4465 if (txr->hn_sched_tx) 4466 goto do_sched; 4467 4468 if (mtx_trylock(&txr->hn_tx_lock)) { 4469 int sched; 4470 4471 sched = hn_xmit(txr, txr->hn_direct_tx_size); 4472 mtx_unlock(&txr->hn_tx_lock); 4473 if (!sched) 4474 return 0; 4475 } 4476 do_sched: 4477 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); 4478 return 0; 4479 } 4480 4481 static void 4482 hn_tx_ring_qflush(struct hn_tx_ring *txr) 4483 { 4484 struct mbuf *m; 4485 4486 mtx_lock(&txr->hn_tx_lock); 4487 while ((m = buf_ring_dequeue_sc(txr->hn_mbuf_br)) != NULL) 4488 m_freem(m); 4489 mtx_unlock(&txr->hn_tx_lock); 4490 } 4491 4492 static void 4493 hn_xmit_qflush(struct ifnet *ifp) 4494 { 4495 struct hn_softc *sc = ifp->if_softc; 4496 int i; 4497 4498 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) 4499 hn_tx_ring_qflush(&sc->hn_tx_ring[i]); 4500 if_qflush(ifp); 4501 } 4502 4503 static void 4504 hn_xmit_txeof(struct hn_tx_ring *txr) 4505 { 4506 4507 if (txr->hn_sched_tx) 4508 goto do_sched; 4509 4510 if (mtx_trylock(&txr->hn_tx_lock)) { 4511 int sched; 4512 4513 txr->hn_oactive = 0; 4514 sched = hn_xmit(txr, txr->hn_direct_tx_size); 4515 mtx_unlock(&txr->hn_tx_lock); 4516 if (sched) { 4517 taskqueue_enqueue(txr->hn_tx_taskq, 4518 &txr->hn_tx_task); 4519 } 4520 } else { 4521 do_sched: 4522 /* 4523 * Release the oactive earlier, with the hope, that 4524 * others could catch up. The task will clear the 4525 * oactive again with the hn_tx_lock to avoid possible 4526 * races. 4527 */ 4528 txr->hn_oactive = 0; 4529 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); 4530 } 4531 } 4532 4533 static void 4534 hn_xmit_taskfunc(void *xtxr, int pending __unused) 4535 { 4536 struct hn_tx_ring *txr = xtxr; 4537 4538 mtx_lock(&txr->hn_tx_lock); 4539 hn_xmit(txr, 0); 4540 mtx_unlock(&txr->hn_tx_lock); 4541 } 4542 4543 static void 4544 hn_xmit_txeof_taskfunc(void *xtxr, int pending __unused) 4545 { 4546 struct hn_tx_ring *txr = xtxr; 4547 4548 mtx_lock(&txr->hn_tx_lock); 4549 txr->hn_oactive = 0; 4550 hn_xmit(txr, 0); 4551 mtx_unlock(&txr->hn_tx_lock); 4552 } 4553 4554 static int 4555 hn_chan_attach(struct hn_softc *sc, struct vmbus_channel *chan) 4556 { 4557 struct vmbus_chan_br cbr; 4558 struct hn_rx_ring *rxr; 4559 struct hn_tx_ring *txr = NULL; 4560 int idx, error; 4561 4562 idx = vmbus_chan_subidx(chan); 4563 4564 /* 4565 * Link this channel to RX/TX ring. 4566 */ 4567 KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse, 4568 ("invalid channel index %d, should > 0 && < %d", 4569 idx, sc->hn_rx_ring_inuse)); 4570 rxr = &sc->hn_rx_ring[idx]; 4571 KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED) == 0, 4572 ("RX ring %d already attached", idx)); 4573 rxr->hn_rx_flags |= HN_RX_FLAG_ATTACHED; 4574 rxr->hn_chan = chan; 4575 4576 if (bootverbose) { 4577 if_printf(sc->hn_ifp, "link RX ring %d to chan%u\n", 4578 idx, vmbus_chan_id(chan)); 4579 } 4580 4581 if (idx < sc->hn_tx_ring_inuse) { 4582 txr = &sc->hn_tx_ring[idx]; 4583 KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED) == 0, 4584 ("TX ring %d already attached", idx)); 4585 txr->hn_tx_flags |= HN_TX_FLAG_ATTACHED; 4586 4587 txr->hn_chan = chan; 4588 if (bootverbose) { 4589 if_printf(sc->hn_ifp, "link TX ring %d to chan%u\n", 4590 idx, vmbus_chan_id(chan)); 4591 } 4592 } 4593 4594 /* Bind this channel to a proper CPU. */ 4595 vmbus_chan_cpu_set(chan, HN_RING_IDX2CPU(sc, idx)); 4596 4597 /* 4598 * Open this channel 4599 */ 4600 cbr.cbr = rxr->hn_br; 4601 cbr.cbr_paddr = rxr->hn_br_dma.hv_paddr; 4602 cbr.cbr_txsz = HN_TXBR_SIZE; 4603 cbr.cbr_rxsz = HN_RXBR_SIZE; 4604 error = vmbus_chan_open_br(chan, &cbr, NULL, 0, hn_chan_callback, rxr); 4605 if (error) { 4606 if (error == EISCONN) { 4607 if_printf(sc->hn_ifp, "bufring is connected after " 4608 "chan%u open failure\n", vmbus_chan_id(chan)); 4609 rxr->hn_rx_flags |= HN_RX_FLAG_BR_REF; 4610 } else { 4611 if_printf(sc->hn_ifp, "open chan%u failed: %d\n", 4612 vmbus_chan_id(chan), error); 4613 } 4614 } 4615 return (error); 4616 } 4617 4618 static void 4619 hn_chan_detach(struct hn_softc *sc, struct vmbus_channel *chan) 4620 { 4621 struct hn_rx_ring *rxr; 4622 int idx, error; 4623 4624 idx = vmbus_chan_subidx(chan); 4625 4626 /* 4627 * Link this channel to RX/TX ring. 4628 */ 4629 KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse, 4630 ("invalid channel index %d, should > 0 && < %d", 4631 idx, sc->hn_rx_ring_inuse)); 4632 rxr = &sc->hn_rx_ring[idx]; 4633 KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED), 4634 ("RX ring %d is not attached", idx)); 4635 rxr->hn_rx_flags &= ~HN_RX_FLAG_ATTACHED; 4636 4637 if (idx < sc->hn_tx_ring_inuse) { 4638 struct hn_tx_ring *txr = &sc->hn_tx_ring[idx]; 4639 4640 KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED), 4641 ("TX ring %d is not attached attached", idx)); 4642 txr->hn_tx_flags &= ~HN_TX_FLAG_ATTACHED; 4643 } 4644 4645 /* 4646 * Close this channel. 4647 * 4648 * NOTE: 4649 * Channel closing does _not_ destroy the target channel. 4650 */ 4651 error = vmbus_chan_close_direct(chan); 4652 if (error == EISCONN) { 4653 if_printf(sc->hn_ifp, "chan%u bufring is connected " 4654 "after being closed\n", vmbus_chan_id(chan)); 4655 rxr->hn_rx_flags |= HN_RX_FLAG_BR_REF; 4656 } else if (error) { 4657 if_printf(sc->hn_ifp, "chan%u close failed: %d\n", 4658 vmbus_chan_id(chan), error); 4659 } 4660 } 4661 4662 static int 4663 hn_attach_subchans(struct hn_softc *sc) 4664 { 4665 struct vmbus_channel **subchans; 4666 int subchan_cnt = sc->hn_rx_ring_inuse - 1; 4667 int i, error = 0; 4668 4669 KASSERT(subchan_cnt > 0, ("no sub-channels")); 4670 4671 /* Attach the sub-channels. */ 4672 subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt); 4673 for (i = 0; i < subchan_cnt; ++i) { 4674 int error1; 4675 4676 error1 = hn_chan_attach(sc, subchans[i]); 4677 if (error1) { 4678 error = error1; 4679 /* Move on; all channels will be detached later. */ 4680 } 4681 } 4682 vmbus_subchan_rel(subchans, subchan_cnt); 4683 4684 if (error) { 4685 if_printf(sc->hn_ifp, "sub-channels attach failed: %d\n", error); 4686 } else { 4687 if (bootverbose) { 4688 if_printf(sc->hn_ifp, "%d sub-channels attached\n", 4689 subchan_cnt); 4690 } 4691 } 4692 return (error); 4693 } 4694 4695 static void 4696 hn_detach_allchans(struct hn_softc *sc) 4697 { 4698 struct vmbus_channel **subchans; 4699 int subchan_cnt = sc->hn_rx_ring_inuse - 1; 4700 int i; 4701 4702 if (subchan_cnt == 0) 4703 goto back; 4704 4705 /* Detach the sub-channels. */ 4706 subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt); 4707 for (i = 0; i < subchan_cnt; ++i) 4708 hn_chan_detach(sc, subchans[i]); 4709 vmbus_subchan_rel(subchans, subchan_cnt); 4710 4711 back: 4712 /* 4713 * Detach the primary channel, _after_ all sub-channels 4714 * are detached. 4715 */ 4716 hn_chan_detach(sc, sc->hn_prichan); 4717 4718 /* Wait for sub-channels to be destroyed, if any. */ 4719 vmbus_subchan_drain(sc->hn_prichan); 4720 4721 #ifdef INVARIANTS 4722 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4723 KASSERT((sc->hn_rx_ring[i].hn_rx_flags & 4724 HN_RX_FLAG_ATTACHED) == 0, 4725 ("%dth RX ring is still attached", i)); 4726 } 4727 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 4728 KASSERT((sc->hn_tx_ring[i].hn_tx_flags & 4729 HN_TX_FLAG_ATTACHED) == 0, 4730 ("%dth TX ring is still attached", i)); 4731 } 4732 #endif 4733 } 4734 4735 static int 4736 hn_synth_alloc_subchans(struct hn_softc *sc, int *nsubch) 4737 { 4738 struct vmbus_channel **subchans; 4739 int nchan, rxr_cnt, error; 4740 4741 nchan = *nsubch + 1; 4742 if (nchan == 1) { 4743 /* 4744 * Multiple RX/TX rings are not requested. 4745 */ 4746 *nsubch = 0; 4747 return (0); 4748 } 4749 4750 /* 4751 * Query RSS capabilities, e.g. # of RX rings, and # of indirect 4752 * table entries. 4753 */ 4754 error = hn_rndis_query_rsscaps(sc, &rxr_cnt); 4755 if (error) { 4756 /* No RSS; this is benign. */ 4757 *nsubch = 0; 4758 return (0); 4759 } 4760 if (bootverbose) { 4761 if_printf(sc->hn_ifp, "RX rings offered %u, requested %d\n", 4762 rxr_cnt, nchan); 4763 } 4764 4765 if (nchan > rxr_cnt) 4766 nchan = rxr_cnt; 4767 if (nchan == 1) { 4768 if_printf(sc->hn_ifp, "only 1 channel is supported, no vRSS\n"); 4769 *nsubch = 0; 4770 return (0); 4771 } 4772 4773 /* 4774 * Allocate sub-channels from NVS. 4775 */ 4776 *nsubch = nchan - 1; 4777 error = hn_nvs_alloc_subchans(sc, nsubch); 4778 if (error || *nsubch == 0) { 4779 /* Failed to allocate sub-channels. */ 4780 *nsubch = 0; 4781 return (0); 4782 } 4783 4784 /* 4785 * Wait for all sub-channels to become ready before moving on. 4786 */ 4787 subchans = vmbus_subchan_get(sc->hn_prichan, *nsubch); 4788 vmbus_subchan_rel(subchans, *nsubch); 4789 return (0); 4790 } 4791 4792 static bool 4793 hn_synth_attachable(const struct hn_softc *sc) 4794 { 4795 int i; 4796 4797 if (sc->hn_flags & HN_FLAG_ERRORS) 4798 return (false); 4799 4800 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4801 const struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 4802 4803 if (rxr->hn_rx_flags & HN_RX_FLAG_BR_REF) 4804 return (false); 4805 } 4806 return (true); 4807 } 4808 4809 /* 4810 * Make sure that the RX filter is zero after the successful 4811 * RNDIS initialization. 4812 * 4813 * NOTE: 4814 * Under certain conditions on certain versions of Hyper-V, 4815 * the RNDIS rxfilter is _not_ zero on the hypervisor side 4816 * after the successful RNDIS initialization, which breaks 4817 * the assumption of any following code (well, it breaks the 4818 * RNDIS API contract actually). Clear the RNDIS rxfilter 4819 * explicitly, drain packets sneaking through, and drain the 4820 * interrupt taskqueues scheduled due to the stealth packets. 4821 */ 4822 static void 4823 hn_rndis_init_fixat(struct hn_softc *sc, int nchan) 4824 { 4825 4826 hn_disable_rx(sc); 4827 hn_drain_rxtx(sc, nchan); 4828 } 4829 4830 static int 4831 hn_synth_attach(struct hn_softc *sc, int mtu) 4832 { 4833 #define ATTACHED_NVS 0x0002 4834 #define ATTACHED_RNDIS 0x0004 4835 4836 struct ndis_rssprm_toeplitz *rss = &sc->hn_rss; 4837 int error, nsubch, nchan = 1, i, rndis_inited; 4838 uint32_t old_caps, attached = 0; 4839 4840 KASSERT((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0, 4841 ("synthetic parts were attached")); 4842 4843 if (!hn_synth_attachable(sc)) 4844 return (ENXIO); 4845 4846 /* Save capabilities for later verification. */ 4847 old_caps = sc->hn_caps; 4848 sc->hn_caps = 0; 4849 4850 /* Clear RSS stuffs. */ 4851 sc->hn_rss_ind_size = 0; 4852 sc->hn_rss_hash = 0; 4853 4854 /* 4855 * Attach the primary channel _before_ attaching NVS and RNDIS. 4856 */ 4857 error = hn_chan_attach(sc, sc->hn_prichan); 4858 if (error) 4859 goto failed; 4860 4861 /* 4862 * Attach NVS. 4863 */ 4864 error = hn_nvs_attach(sc, mtu); 4865 if (error) 4866 goto failed; 4867 attached |= ATTACHED_NVS; 4868 4869 /* 4870 * Attach RNDIS _after_ NVS is attached. 4871 */ 4872 error = hn_rndis_attach(sc, mtu, &rndis_inited); 4873 if (rndis_inited) 4874 attached |= ATTACHED_RNDIS; 4875 if (error) 4876 goto failed; 4877 4878 /* 4879 * Make sure capabilities are not changed. 4880 */ 4881 if (device_is_attached(sc->hn_dev) && old_caps != sc->hn_caps) { 4882 if_printf(sc->hn_ifp, "caps mismatch old 0x%08x, new 0x%08x\n", 4883 old_caps, sc->hn_caps); 4884 error = ENXIO; 4885 goto failed; 4886 } 4887 4888 /* 4889 * Allocate sub-channels for multi-TX/RX rings. 4890 * 4891 * NOTE: 4892 * The # of RX rings that can be used is equivalent to the # of 4893 * channels to be requested. 4894 */ 4895 nsubch = sc->hn_rx_ring_cnt - 1; 4896 error = hn_synth_alloc_subchans(sc, &nsubch); 4897 if (error) 4898 goto failed; 4899 /* NOTE: _Full_ synthetic parts detach is required now. */ 4900 sc->hn_flags |= HN_FLAG_SYNTH_ATTACHED; 4901 4902 /* 4903 * Set the # of TX/RX rings that could be used according to 4904 * the # of channels that NVS offered. 4905 */ 4906 nchan = nsubch + 1; 4907 hn_set_ring_inuse(sc, nchan); 4908 if (nchan == 1) { 4909 /* Only the primary channel can be used; done */ 4910 goto back; 4911 } 4912 4913 /* 4914 * Attach the sub-channels. 4915 * 4916 * NOTE: hn_set_ring_inuse() _must_ have been called. 4917 */ 4918 error = hn_attach_subchans(sc); 4919 if (error) 4920 goto failed; 4921 4922 /* 4923 * Configure RSS key and indirect table _after_ all sub-channels 4924 * are attached. 4925 */ 4926 if ((sc->hn_flags & HN_FLAG_HAS_RSSKEY) == 0) { 4927 /* 4928 * RSS key is not set yet; set it to the default RSS key. 4929 */ 4930 if (bootverbose) 4931 if_printf(sc->hn_ifp, "setup default RSS key\n"); 4932 #ifdef RSS 4933 rss_getkey(rss->rss_key); 4934 #else 4935 memcpy(rss->rss_key, hn_rss_key_default, sizeof(rss->rss_key)); 4936 #endif 4937 sc->hn_flags |= HN_FLAG_HAS_RSSKEY; 4938 } 4939 4940 if ((sc->hn_flags & HN_FLAG_HAS_RSSIND) == 0) { 4941 /* 4942 * RSS indirect table is not set yet; set it up in round- 4943 * robin fashion. 4944 */ 4945 if (bootverbose) { 4946 if_printf(sc->hn_ifp, "setup default RSS indirect " 4947 "table\n"); 4948 } 4949 for (i = 0; i < NDIS_HASH_INDCNT; ++i) { 4950 uint32_t subidx; 4951 4952 #ifdef RSS 4953 subidx = rss_get_indirection_to_bucket(i); 4954 #else 4955 subidx = i; 4956 #endif 4957 rss->rss_ind[i] = subidx % nchan; 4958 } 4959 sc->hn_flags |= HN_FLAG_HAS_RSSIND; 4960 } else { 4961 /* 4962 * # of usable channels may be changed, so we have to 4963 * make sure that all entries in RSS indirect table 4964 * are valid. 4965 * 4966 * NOTE: hn_set_ring_inuse() _must_ have been called. 4967 */ 4968 hn_rss_ind_fixup(sc); 4969 } 4970 4971 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE); 4972 if (error) 4973 goto failed; 4974 back: 4975 /* 4976 * Fixup transmission aggregation setup. 4977 */ 4978 hn_set_txagg(sc); 4979 hn_rndis_init_fixat(sc, nchan); 4980 return (0); 4981 4982 failed: 4983 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) { 4984 hn_rndis_init_fixat(sc, nchan); 4985 hn_synth_detach(sc); 4986 } else { 4987 if (attached & ATTACHED_RNDIS) { 4988 hn_rndis_init_fixat(sc, nchan); 4989 hn_rndis_detach(sc); 4990 } 4991 if (attached & ATTACHED_NVS) 4992 hn_nvs_detach(sc); 4993 hn_chan_detach(sc, sc->hn_prichan); 4994 /* Restore old capabilities. */ 4995 sc->hn_caps = old_caps; 4996 } 4997 return (error); 4998 4999 #undef ATTACHED_RNDIS 5000 #undef ATTACHED_NVS 5001 } 5002 5003 /* 5004 * NOTE: 5005 * The interface must have been suspended though hn_suspend(), before 5006 * this function get called. 5007 */ 5008 static void 5009 hn_synth_detach(struct hn_softc *sc) 5010 { 5011 5012 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, 5013 ("synthetic parts were not attached")); 5014 5015 /* Detach the RNDIS first. */ 5016 hn_rndis_detach(sc); 5017 5018 /* Detach NVS. */ 5019 hn_nvs_detach(sc); 5020 5021 /* Detach all of the channels. */ 5022 hn_detach_allchans(sc); 5023 5024 sc->hn_flags &= ~HN_FLAG_SYNTH_ATTACHED; 5025 } 5026 5027 static void 5028 hn_set_ring_inuse(struct hn_softc *sc, int ring_cnt) 5029 { 5030 KASSERT(ring_cnt > 0 && ring_cnt <= sc->hn_rx_ring_cnt, 5031 ("invalid ring count %d", ring_cnt)); 5032 5033 if (sc->hn_tx_ring_cnt > ring_cnt) 5034 sc->hn_tx_ring_inuse = ring_cnt; 5035 else 5036 sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt; 5037 sc->hn_rx_ring_inuse = ring_cnt; 5038 5039 #ifdef RSS 5040 if (sc->hn_rx_ring_inuse != rss_getnumbuckets()) { 5041 if_printf(sc->hn_ifp, "# of RX rings (%d) does not match " 5042 "# of RSS buckets (%d)\n", sc->hn_rx_ring_inuse, 5043 rss_getnumbuckets()); 5044 } 5045 #endif 5046 5047 if (bootverbose) { 5048 if_printf(sc->hn_ifp, "%d TX ring, %d RX ring\n", 5049 sc->hn_tx_ring_inuse, sc->hn_rx_ring_inuse); 5050 } 5051 } 5052 5053 static void 5054 hn_chan_drain(struct hn_softc *sc, struct vmbus_channel *chan) 5055 { 5056 5057 /* 5058 * NOTE: 5059 * The TX bufring will not be drained by the hypervisor, 5060 * if the primary channel is revoked. 5061 */ 5062 while (!vmbus_chan_rx_empty(chan) || 5063 (!vmbus_chan_is_revoked(sc->hn_prichan) && 5064 !vmbus_chan_tx_empty(chan))) 5065 pause("waitch", 1); 5066 vmbus_chan_intr_drain(chan); 5067 } 5068 5069 static void 5070 hn_disable_rx(struct hn_softc *sc) 5071 { 5072 5073 /* 5074 * Disable RX by clearing RX filter forcefully. 5075 */ 5076 sc->hn_rx_filter = NDIS_PACKET_TYPE_NONE; 5077 hn_rndis_set_rxfilter(sc, sc->hn_rx_filter); /* ignore error */ 5078 5079 /* 5080 * Give RNDIS enough time to flush all pending data packets. 5081 */ 5082 pause("waitrx", (200 * hz) / 1000); 5083 } 5084 5085 /* 5086 * NOTE: 5087 * RX/TX _must_ have been suspended/disabled, before this function 5088 * is called. 5089 */ 5090 static void 5091 hn_drain_rxtx(struct hn_softc *sc, int nchan) 5092 { 5093 struct vmbus_channel **subch = NULL; 5094 int nsubch; 5095 5096 /* 5097 * Drain RX/TX bufrings and interrupts. 5098 */ 5099 nsubch = nchan - 1; 5100 if (nsubch > 0) 5101 subch = vmbus_subchan_get(sc->hn_prichan, nsubch); 5102 5103 if (subch != NULL) { 5104 int i; 5105 5106 for (i = 0; i < nsubch; ++i) 5107 hn_chan_drain(sc, subch[i]); 5108 } 5109 hn_chan_drain(sc, sc->hn_prichan); 5110 5111 if (subch != NULL) 5112 vmbus_subchan_rel(subch, nsubch); 5113 } 5114 5115 static void 5116 hn_suspend_data(struct hn_softc *sc) 5117 { 5118 struct hn_tx_ring *txr; 5119 int i; 5120 5121 HN_LOCK_ASSERT(sc); 5122 5123 /* 5124 * Suspend TX. 5125 */ 5126 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { 5127 txr = &sc->hn_tx_ring[i]; 5128 5129 mtx_lock(&txr->hn_tx_lock); 5130 txr->hn_suspended = 1; 5131 mtx_unlock(&txr->hn_tx_lock); 5132 /* No one is able send more packets now. */ 5133 5134 /* 5135 * Wait for all pending sends to finish. 5136 * 5137 * NOTE: 5138 * We will _not_ receive all pending send-done, if the 5139 * primary channel is revoked. 5140 */ 5141 while (hn_tx_ring_pending(txr) && 5142 !vmbus_chan_is_revoked(sc->hn_prichan)) 5143 pause("hnwtx", 1 /* 1 tick */); 5144 } 5145 5146 /* 5147 * Disable RX. 5148 */ 5149 hn_disable_rx(sc); 5150 5151 /* 5152 * Drain RX/TX. 5153 */ 5154 hn_drain_rxtx(sc, sc->hn_rx_ring_inuse); 5155 5156 /* 5157 * Drain any pending TX tasks. 5158 * 5159 * NOTE: 5160 * The above hn_drain_rxtx() can dispatch TX tasks, so the TX 5161 * tasks will have to be drained _after_ the above hn_drain_rxtx(). 5162 */ 5163 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { 5164 txr = &sc->hn_tx_ring[i]; 5165 5166 taskqueue_drain(txr->hn_tx_taskq, &txr->hn_tx_task); 5167 taskqueue_drain(txr->hn_tx_taskq, &txr->hn_txeof_task); 5168 } 5169 } 5170 5171 static void 5172 hn_suspend_mgmt_taskfunc(void *xsc, int pending __unused) 5173 { 5174 5175 ((struct hn_softc *)xsc)->hn_mgmt_taskq = NULL; 5176 } 5177 5178 static void 5179 hn_suspend_mgmt(struct hn_softc *sc) 5180 { 5181 struct task task; 5182 5183 HN_LOCK_ASSERT(sc); 5184 5185 /* 5186 * Make sure that hn_mgmt_taskq0 can nolonger be accessed 5187 * through hn_mgmt_taskq. 5188 */ 5189 TASK_INIT(&task, 0, hn_suspend_mgmt_taskfunc, sc); 5190 vmbus_chan_run_task(sc->hn_prichan, &task); 5191 5192 /* 5193 * Make sure that all pending management tasks are completed. 5194 */ 5195 taskqueue_drain(sc->hn_mgmt_taskq0, &sc->hn_netchg_init); 5196 taskqueue_drain_timeout(sc->hn_mgmt_taskq0, &sc->hn_netchg_status); 5197 taskqueue_drain_all(sc->hn_mgmt_taskq0); 5198 } 5199 5200 static void 5201 hn_suspend(struct hn_softc *sc) 5202 { 5203 5204 /* Disable polling. */ 5205 hn_polling(sc, 0); 5206 5207 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) || 5208 (sc->hn_flags & HN_FLAG_VF)) 5209 hn_suspend_data(sc); 5210 hn_suspend_mgmt(sc); 5211 } 5212 5213 static void 5214 hn_resume_tx(struct hn_softc *sc, int tx_ring_cnt) 5215 { 5216 int i; 5217 5218 KASSERT(tx_ring_cnt <= sc->hn_tx_ring_cnt, 5219 ("invalid TX ring count %d", tx_ring_cnt)); 5220 5221 for (i = 0; i < tx_ring_cnt; ++i) { 5222 struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; 5223 5224 mtx_lock(&txr->hn_tx_lock); 5225 txr->hn_suspended = 0; 5226 mtx_unlock(&txr->hn_tx_lock); 5227 } 5228 } 5229 5230 static void 5231 hn_resume_data(struct hn_softc *sc) 5232 { 5233 int i; 5234 5235 HN_LOCK_ASSERT(sc); 5236 5237 /* 5238 * Re-enable RX. 5239 */ 5240 hn_rxfilter_config(sc); 5241 5242 /* 5243 * Make sure to clear suspend status on "all" TX rings, 5244 * since hn_tx_ring_inuse can be changed after 5245 * hn_suspend_data(). 5246 */ 5247 hn_resume_tx(sc, sc->hn_tx_ring_cnt); 5248 5249 #ifdef HN_IFSTART_SUPPORT 5250 if (!hn_use_if_start) 5251 #endif 5252 { 5253 /* 5254 * Flush unused drbrs, since hn_tx_ring_inuse may be 5255 * reduced. 5256 */ 5257 for (i = sc->hn_tx_ring_inuse; i < sc->hn_tx_ring_cnt; ++i) 5258 hn_tx_ring_qflush(&sc->hn_tx_ring[i]); 5259 } 5260 5261 /* 5262 * Kick start TX. 5263 */ 5264 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { 5265 struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; 5266 5267 /* 5268 * Use txeof task, so that any pending oactive can be 5269 * cleared properly. 5270 */ 5271 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); 5272 } 5273 } 5274 5275 static void 5276 hn_resume_mgmt(struct hn_softc *sc) 5277 { 5278 5279 sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0; 5280 5281 /* 5282 * Kick off network change detection, if it was pending. 5283 * If no network change was pending, start link status 5284 * checks, which is more lightweight than network change 5285 * detection. 5286 */ 5287 if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG) 5288 hn_change_network(sc); 5289 else 5290 hn_update_link_status(sc); 5291 } 5292 5293 static void 5294 hn_resume(struct hn_softc *sc) 5295 { 5296 5297 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) || 5298 (sc->hn_flags & HN_FLAG_VF)) 5299 hn_resume_data(sc); 5300 5301 /* 5302 * When the VF is activated, the synthetic interface is changed 5303 * to DOWN in hn_set_vf(). Here, if the VF is still active, we 5304 * don't call hn_resume_mgmt() until the VF is deactivated in 5305 * hn_set_vf(). 5306 */ 5307 if (!(sc->hn_flags & HN_FLAG_VF)) 5308 hn_resume_mgmt(sc); 5309 5310 /* 5311 * Re-enable polling if this interface is running and 5312 * the polling is requested. 5313 */ 5314 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) && sc->hn_pollhz > 0) 5315 hn_polling(sc, sc->hn_pollhz); 5316 } 5317 5318 static void 5319 hn_rndis_rx_status(struct hn_softc *sc, const void *data, int dlen) 5320 { 5321 const struct rndis_status_msg *msg; 5322 int ofs; 5323 5324 if (dlen < sizeof(*msg)) { 5325 if_printf(sc->hn_ifp, "invalid RNDIS status\n"); 5326 return; 5327 } 5328 msg = data; 5329 5330 switch (msg->rm_status) { 5331 case RNDIS_STATUS_MEDIA_CONNECT: 5332 case RNDIS_STATUS_MEDIA_DISCONNECT: 5333 hn_update_link_status(sc); 5334 break; 5335 5336 case RNDIS_STATUS_TASK_OFFLOAD_CURRENT_CONFIG: 5337 /* Not really useful; ignore. */ 5338 break; 5339 5340 case RNDIS_STATUS_NETWORK_CHANGE: 5341 ofs = RNDIS_STBUFOFFSET_ABS(msg->rm_stbufoffset); 5342 if (dlen < ofs + msg->rm_stbuflen || 5343 msg->rm_stbuflen < sizeof(uint32_t)) { 5344 if_printf(sc->hn_ifp, "network changed\n"); 5345 } else { 5346 uint32_t change; 5347 5348 memcpy(&change, ((const uint8_t *)msg) + ofs, 5349 sizeof(change)); 5350 if_printf(sc->hn_ifp, "network changed, change %u\n", 5351 change); 5352 } 5353 hn_change_network(sc); 5354 break; 5355 5356 default: 5357 if_printf(sc->hn_ifp, "unknown RNDIS status 0x%08x\n", 5358 msg->rm_status); 5359 break; 5360 } 5361 } 5362 5363 static int 5364 hn_rndis_rxinfo(const void *info_data, int info_dlen, struct hn_rxinfo *info) 5365 { 5366 const struct rndis_pktinfo *pi = info_data; 5367 uint32_t mask = 0; 5368 5369 while (info_dlen != 0) { 5370 const void *data; 5371 uint32_t dlen; 5372 5373 if (__predict_false(info_dlen < sizeof(*pi))) 5374 return (EINVAL); 5375 if (__predict_false(info_dlen < pi->rm_size)) 5376 return (EINVAL); 5377 info_dlen -= pi->rm_size; 5378 5379 if (__predict_false(pi->rm_size & RNDIS_PKTINFO_SIZE_ALIGNMASK)) 5380 return (EINVAL); 5381 if (__predict_false(pi->rm_size < pi->rm_pktinfooffset)) 5382 return (EINVAL); 5383 dlen = pi->rm_size - pi->rm_pktinfooffset; 5384 data = pi->rm_data; 5385 5386 switch (pi->rm_type) { 5387 case NDIS_PKTINFO_TYPE_VLAN: 5388 if (__predict_false(dlen < NDIS_VLAN_INFO_SIZE)) 5389 return (EINVAL); 5390 info->vlan_info = *((const uint32_t *)data); 5391 mask |= HN_RXINFO_VLAN; 5392 break; 5393 5394 case NDIS_PKTINFO_TYPE_CSUM: 5395 if (__predict_false(dlen < NDIS_RXCSUM_INFO_SIZE)) 5396 return (EINVAL); 5397 info->csum_info = *((const uint32_t *)data); 5398 mask |= HN_RXINFO_CSUM; 5399 break; 5400 5401 case HN_NDIS_PKTINFO_TYPE_HASHVAL: 5402 if (__predict_false(dlen < HN_NDIS_HASH_VALUE_SIZE)) 5403 return (EINVAL); 5404 info->hash_value = *((const uint32_t *)data); 5405 mask |= HN_RXINFO_HASHVAL; 5406 break; 5407 5408 case HN_NDIS_PKTINFO_TYPE_HASHINF: 5409 if (__predict_false(dlen < HN_NDIS_HASH_INFO_SIZE)) 5410 return (EINVAL); 5411 info->hash_info = *((const uint32_t *)data); 5412 mask |= HN_RXINFO_HASHINF; 5413 break; 5414 5415 default: 5416 goto next; 5417 } 5418 5419 if (mask == HN_RXINFO_ALL) { 5420 /* All found; done */ 5421 break; 5422 } 5423 next: 5424 pi = (const struct rndis_pktinfo *) 5425 ((const uint8_t *)pi + pi->rm_size); 5426 } 5427 5428 /* 5429 * Final fixup. 5430 * - If there is no hash value, invalidate the hash info. 5431 */ 5432 if ((mask & HN_RXINFO_HASHVAL) == 0) 5433 info->hash_info = HN_NDIS_HASH_INFO_INVALID; 5434 return (0); 5435 } 5436 5437 static __inline bool 5438 hn_rndis_check_overlap(int off, int len, int check_off, int check_len) 5439 { 5440 5441 if (off < check_off) { 5442 if (__predict_true(off + len <= check_off)) 5443 return (false); 5444 } else if (off > check_off) { 5445 if (__predict_true(check_off + check_len <= off)) 5446 return (false); 5447 } 5448 return (true); 5449 } 5450 5451 static void 5452 hn_rndis_rx_data(struct hn_rx_ring *rxr, const void *data, int dlen) 5453 { 5454 const struct rndis_packet_msg *pkt; 5455 struct hn_rxinfo info; 5456 int data_off, pktinfo_off, data_len, pktinfo_len; 5457 5458 /* 5459 * Check length. 5460 */ 5461 if (__predict_false(dlen < sizeof(*pkt))) { 5462 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg\n"); 5463 return; 5464 } 5465 pkt = data; 5466 5467 if (__predict_false(dlen < pkt->rm_len)) { 5468 if_printf(rxr->hn_ifp, "truncated RNDIS packet msg, " 5469 "dlen %d, msglen %u\n", dlen, pkt->rm_len); 5470 return; 5471 } 5472 if (__predict_false(pkt->rm_len < 5473 pkt->rm_datalen + pkt->rm_oobdatalen + pkt->rm_pktinfolen)) { 5474 if_printf(rxr->hn_ifp, "invalid RNDIS packet msglen, " 5475 "msglen %u, data %u, oob %u, pktinfo %u\n", 5476 pkt->rm_len, pkt->rm_datalen, pkt->rm_oobdatalen, 5477 pkt->rm_pktinfolen); 5478 return; 5479 } 5480 if (__predict_false(pkt->rm_datalen == 0)) { 5481 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, no data\n"); 5482 return; 5483 } 5484 5485 /* 5486 * Check offests. 5487 */ 5488 #define IS_OFFSET_INVALID(ofs) \ 5489 ((ofs) < RNDIS_PACKET_MSG_OFFSET_MIN || \ 5490 ((ofs) & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK)) 5491 5492 /* XXX Hyper-V does not meet data offset alignment requirement */ 5493 if (__predict_false(pkt->rm_dataoffset < RNDIS_PACKET_MSG_OFFSET_MIN)) { 5494 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 5495 "data offset %u\n", pkt->rm_dataoffset); 5496 return; 5497 } 5498 if (__predict_false(pkt->rm_oobdataoffset > 0 && 5499 IS_OFFSET_INVALID(pkt->rm_oobdataoffset))) { 5500 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 5501 "oob offset %u\n", pkt->rm_oobdataoffset); 5502 return; 5503 } 5504 if (__predict_true(pkt->rm_pktinfooffset > 0) && 5505 __predict_false(IS_OFFSET_INVALID(pkt->rm_pktinfooffset))) { 5506 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 5507 "pktinfo offset %u\n", pkt->rm_pktinfooffset); 5508 return; 5509 } 5510 5511 #undef IS_OFFSET_INVALID 5512 5513 data_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_dataoffset); 5514 data_len = pkt->rm_datalen; 5515 pktinfo_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_pktinfooffset); 5516 pktinfo_len = pkt->rm_pktinfolen; 5517 5518 /* 5519 * Check OOB coverage. 5520 */ 5521 if (__predict_false(pkt->rm_oobdatalen != 0)) { 5522 int oob_off, oob_len; 5523 5524 if_printf(rxr->hn_ifp, "got oobdata\n"); 5525 oob_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_oobdataoffset); 5526 oob_len = pkt->rm_oobdatalen; 5527 5528 if (__predict_false(oob_off + oob_len > pkt->rm_len)) { 5529 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 5530 "oob overflow, msglen %u, oob abs %d len %d\n", 5531 pkt->rm_len, oob_off, oob_len); 5532 return; 5533 } 5534 5535 /* 5536 * Check against data. 5537 */ 5538 if (hn_rndis_check_overlap(oob_off, oob_len, 5539 data_off, data_len)) { 5540 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 5541 "oob overlaps data, oob abs %d len %d, " 5542 "data abs %d len %d\n", 5543 oob_off, oob_len, data_off, data_len); 5544 return; 5545 } 5546 5547 /* 5548 * Check against pktinfo. 5549 */ 5550 if (pktinfo_len != 0 && 5551 hn_rndis_check_overlap(oob_off, oob_len, 5552 pktinfo_off, pktinfo_len)) { 5553 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 5554 "oob overlaps pktinfo, oob abs %d len %d, " 5555 "pktinfo abs %d len %d\n", 5556 oob_off, oob_len, pktinfo_off, pktinfo_len); 5557 return; 5558 } 5559 } 5560 5561 /* 5562 * Check per-packet-info coverage and find useful per-packet-info. 5563 */ 5564 info.vlan_info = HN_NDIS_VLAN_INFO_INVALID; 5565 info.csum_info = HN_NDIS_RXCSUM_INFO_INVALID; 5566 info.hash_info = HN_NDIS_HASH_INFO_INVALID; 5567 if (__predict_true(pktinfo_len != 0)) { 5568 bool overlap; 5569 int error; 5570 5571 if (__predict_false(pktinfo_off + pktinfo_len > pkt->rm_len)) { 5572 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 5573 "pktinfo overflow, msglen %u, " 5574 "pktinfo abs %d len %d\n", 5575 pkt->rm_len, pktinfo_off, pktinfo_len); 5576 return; 5577 } 5578 5579 /* 5580 * Check packet info coverage. 5581 */ 5582 overlap = hn_rndis_check_overlap(pktinfo_off, pktinfo_len, 5583 data_off, data_len); 5584 if (__predict_false(overlap)) { 5585 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 5586 "pktinfo overlap data, pktinfo abs %d len %d, " 5587 "data abs %d len %d\n", 5588 pktinfo_off, pktinfo_len, data_off, data_len); 5589 return; 5590 } 5591 5592 /* 5593 * Find useful per-packet-info. 5594 */ 5595 error = hn_rndis_rxinfo(((const uint8_t *)pkt) + pktinfo_off, 5596 pktinfo_len, &info); 5597 if (__predict_false(error)) { 5598 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg " 5599 "pktinfo\n"); 5600 return; 5601 } 5602 } 5603 5604 if (__predict_false(data_off + data_len > pkt->rm_len)) { 5605 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 5606 "data overflow, msglen %u, data abs %d len %d\n", 5607 pkt->rm_len, data_off, data_len); 5608 return; 5609 } 5610 hn_rxpkt(rxr, ((const uint8_t *)pkt) + data_off, data_len, &info); 5611 } 5612 5613 static __inline void 5614 hn_rndis_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen) 5615 { 5616 const struct rndis_msghdr *hdr; 5617 5618 if (__predict_false(dlen < sizeof(*hdr))) { 5619 if_printf(rxr->hn_ifp, "invalid RNDIS msg\n"); 5620 return; 5621 } 5622 hdr = data; 5623 5624 if (__predict_true(hdr->rm_type == REMOTE_NDIS_PACKET_MSG)) { 5625 /* Hot data path. */ 5626 hn_rndis_rx_data(rxr, data, dlen); 5627 /* Done! */ 5628 return; 5629 } 5630 5631 if (hdr->rm_type == REMOTE_NDIS_INDICATE_STATUS_MSG) 5632 hn_rndis_rx_status(rxr->hn_ifp->if_softc, data, dlen); 5633 else 5634 hn_rndis_rx_ctrl(rxr->hn_ifp->if_softc, data, dlen); 5635 } 5636 5637 static void 5638 hn_nvs_handle_notify(struct hn_softc *sc, const struct vmbus_chanpkt_hdr *pkt) 5639 { 5640 const struct hn_nvs_hdr *hdr; 5641 5642 if (VMBUS_CHANPKT_DATALEN(pkt) < sizeof(*hdr)) { 5643 if_printf(sc->hn_ifp, "invalid nvs notify\n"); 5644 return; 5645 } 5646 hdr = VMBUS_CHANPKT_CONST_DATA(pkt); 5647 5648 if (hdr->nvs_type == HN_NVS_TYPE_TXTBL_NOTE) { 5649 /* Useless; ignore */ 5650 return; 5651 } 5652 if_printf(sc->hn_ifp, "got notify, nvs type %u\n", hdr->nvs_type); 5653 } 5654 5655 static void 5656 hn_nvs_handle_comp(struct hn_softc *sc, struct vmbus_channel *chan, 5657 const struct vmbus_chanpkt_hdr *pkt) 5658 { 5659 struct hn_nvs_sendctx *sndc; 5660 5661 sndc = (struct hn_nvs_sendctx *)(uintptr_t)pkt->cph_xactid; 5662 sndc->hn_cb(sndc, sc, chan, VMBUS_CHANPKT_CONST_DATA(pkt), 5663 VMBUS_CHANPKT_DATALEN(pkt)); 5664 /* 5665 * NOTE: 5666 * 'sndc' CAN NOT be accessed anymore, since it can be freed by 5667 * its callback. 5668 */ 5669 } 5670 5671 static void 5672 hn_nvs_handle_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan, 5673 const struct vmbus_chanpkt_hdr *pkthdr) 5674 { 5675 const struct vmbus_chanpkt_rxbuf *pkt; 5676 const struct hn_nvs_hdr *nvs_hdr; 5677 int count, i, hlen; 5678 5679 if (__predict_false(VMBUS_CHANPKT_DATALEN(pkthdr) < sizeof(*nvs_hdr))) { 5680 if_printf(rxr->hn_ifp, "invalid nvs RNDIS\n"); 5681 return; 5682 } 5683 nvs_hdr = VMBUS_CHANPKT_CONST_DATA(pkthdr); 5684 5685 /* Make sure that this is a RNDIS message. */ 5686 if (__predict_false(nvs_hdr->nvs_type != HN_NVS_TYPE_RNDIS)) { 5687 if_printf(rxr->hn_ifp, "nvs type %u, not RNDIS\n", 5688 nvs_hdr->nvs_type); 5689 return; 5690 } 5691 5692 hlen = VMBUS_CHANPKT_GETLEN(pkthdr->cph_hlen); 5693 if (__predict_false(hlen < sizeof(*pkt))) { 5694 if_printf(rxr->hn_ifp, "invalid rxbuf chanpkt\n"); 5695 return; 5696 } 5697 pkt = (const struct vmbus_chanpkt_rxbuf *)pkthdr; 5698 5699 if (__predict_false(pkt->cp_rxbuf_id != HN_NVS_RXBUF_SIG)) { 5700 if_printf(rxr->hn_ifp, "invalid rxbuf_id 0x%08x\n", 5701 pkt->cp_rxbuf_id); 5702 return; 5703 } 5704 5705 count = pkt->cp_rxbuf_cnt; 5706 if (__predict_false(hlen < 5707 __offsetof(struct vmbus_chanpkt_rxbuf, cp_rxbuf[count]))) { 5708 if_printf(rxr->hn_ifp, "invalid rxbuf_cnt %d\n", count); 5709 return; 5710 } 5711 5712 /* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */ 5713 for (i = 0; i < count; ++i) { 5714 int ofs, len; 5715 5716 ofs = pkt->cp_rxbuf[i].rb_ofs; 5717 len = pkt->cp_rxbuf[i].rb_len; 5718 if (__predict_false(ofs + len > HN_RXBUF_SIZE)) { 5719 if_printf(rxr->hn_ifp, "%dth RNDIS msg overflow rxbuf, " 5720 "ofs %d, len %d\n", i, ofs, len); 5721 continue; 5722 } 5723 hn_rndis_rxpkt(rxr, rxr->hn_rxbuf + ofs, len); 5724 } 5725 5726 /* 5727 * Ack the consumed RXBUF associated w/ this channel packet, 5728 * so that this RXBUF can be recycled by the hypervisor. 5729 */ 5730 hn_nvs_ack_rxbuf(rxr, chan, pkt->cp_hdr.cph_xactid); 5731 } 5732 5733 static void 5734 hn_nvs_ack_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan, 5735 uint64_t tid) 5736 { 5737 struct hn_nvs_rndis_ack ack; 5738 int retries, error; 5739 5740 ack.nvs_type = HN_NVS_TYPE_RNDIS_ACK; 5741 ack.nvs_status = HN_NVS_STATUS_OK; 5742 5743 retries = 0; 5744 again: 5745 error = vmbus_chan_send(chan, VMBUS_CHANPKT_TYPE_COMP, 5746 VMBUS_CHANPKT_FLAG_NONE, &ack, sizeof(ack), tid); 5747 if (__predict_false(error == EAGAIN)) { 5748 /* 5749 * NOTE: 5750 * This should _not_ happen in real world, since the 5751 * consumption of the TX bufring from the TX path is 5752 * controlled. 5753 */ 5754 if (rxr->hn_ack_failed == 0) 5755 if_printf(rxr->hn_ifp, "RXBUF ack retry\n"); 5756 rxr->hn_ack_failed++; 5757 retries++; 5758 if (retries < 10) { 5759 DELAY(100); 5760 goto again; 5761 } 5762 /* RXBUF leaks! */ 5763 if_printf(rxr->hn_ifp, "RXBUF ack failed\n"); 5764 } 5765 } 5766 5767 static void 5768 hn_chan_callback(struct vmbus_channel *chan, void *xrxr) 5769 { 5770 struct hn_rx_ring *rxr = xrxr; 5771 struct hn_softc *sc = rxr->hn_ifp->if_softc; 5772 5773 for (;;) { 5774 struct vmbus_chanpkt_hdr *pkt = rxr->hn_pktbuf; 5775 int error, pktlen; 5776 5777 pktlen = rxr->hn_pktbuf_len; 5778 error = vmbus_chan_recv_pkt(chan, pkt, &pktlen); 5779 if (__predict_false(error == ENOBUFS)) { 5780 void *nbuf; 5781 int nlen; 5782 5783 /* 5784 * Expand channel packet buffer. 5785 * 5786 * XXX 5787 * Use M_WAITOK here, since allocation failure 5788 * is fatal. 5789 */ 5790 nlen = rxr->hn_pktbuf_len * 2; 5791 while (nlen < pktlen) 5792 nlen *= 2; 5793 nbuf = malloc(nlen, M_DEVBUF, M_WAITOK); 5794 5795 if_printf(rxr->hn_ifp, "expand pktbuf %d -> %d\n", 5796 rxr->hn_pktbuf_len, nlen); 5797 5798 free(rxr->hn_pktbuf, M_DEVBUF); 5799 rxr->hn_pktbuf = nbuf; 5800 rxr->hn_pktbuf_len = nlen; 5801 /* Retry! */ 5802 continue; 5803 } else if (__predict_false(error == EAGAIN)) { 5804 /* No more channel packets; done! */ 5805 break; 5806 } 5807 KASSERT(!error, ("vmbus_chan_recv_pkt failed: %d", error)); 5808 5809 switch (pkt->cph_type) { 5810 case VMBUS_CHANPKT_TYPE_COMP: 5811 hn_nvs_handle_comp(sc, chan, pkt); 5812 break; 5813 5814 case VMBUS_CHANPKT_TYPE_RXBUF: 5815 hn_nvs_handle_rxbuf(rxr, chan, pkt); 5816 break; 5817 5818 case VMBUS_CHANPKT_TYPE_INBAND: 5819 hn_nvs_handle_notify(sc, pkt); 5820 break; 5821 5822 default: 5823 if_printf(rxr->hn_ifp, "unknown chan pkt %u\n", 5824 pkt->cph_type); 5825 break; 5826 } 5827 } 5828 hn_chan_rollup(rxr, rxr->hn_txr); 5829 } 5830 5831 static void 5832 hn_tx_taskq_create(void *arg __unused) 5833 { 5834 int i; 5835 5836 /* 5837 * Fix the # of TX taskqueues. 5838 */ 5839 if (hn_tx_taskq_cnt <= 0) 5840 hn_tx_taskq_cnt = 1; 5841 else if (hn_tx_taskq_cnt > mp_ncpus) 5842 hn_tx_taskq_cnt = mp_ncpus; 5843 5844 /* 5845 * Fix the TX taskqueue mode. 5846 */ 5847 switch (hn_tx_taskq_mode) { 5848 case HN_TX_TASKQ_M_INDEP: 5849 case HN_TX_TASKQ_M_GLOBAL: 5850 case HN_TX_TASKQ_M_EVTTQ: 5851 break; 5852 default: 5853 hn_tx_taskq_mode = HN_TX_TASKQ_M_INDEP; 5854 break; 5855 } 5856 5857 if (vm_guest != VM_GUEST_HV) 5858 return; 5859 5860 if (hn_tx_taskq_mode != HN_TX_TASKQ_M_GLOBAL) 5861 return; 5862 5863 hn_tx_taskque = malloc(hn_tx_taskq_cnt * sizeof(struct taskqueue *), 5864 M_DEVBUF, M_WAITOK); 5865 for (i = 0; i < hn_tx_taskq_cnt; ++i) { 5866 hn_tx_taskque[i] = taskqueue_create("hn_tx", M_WAITOK, 5867 taskqueue_thread_enqueue, &hn_tx_taskque[i]); 5868 taskqueue_start_threads(&hn_tx_taskque[i], 1, PI_NET, 5869 "hn tx%d", i); 5870 } 5871 } 5872 SYSINIT(hn_txtq_create, SI_SUB_DRIVERS, SI_ORDER_SECOND, 5873 hn_tx_taskq_create, NULL); 5874 5875 static void 5876 hn_tx_taskq_destroy(void *arg __unused) 5877 { 5878 5879 if (hn_tx_taskque != NULL) { 5880 int i; 5881 5882 for (i = 0; i < hn_tx_taskq_cnt; ++i) 5883 taskqueue_free(hn_tx_taskque[i]); 5884 free(hn_tx_taskque, M_DEVBUF); 5885 } 5886 } 5887 SYSUNINIT(hn_txtq_destroy, SI_SUB_DRIVERS, SI_ORDER_SECOND, 5888 hn_tx_taskq_destroy, NULL); 5889