1 /*- 2 * Copyright (c) 2010-2012 Citrix Inc. 3 * Copyright (c) 2009-2012,2016 Microsoft Corp. 4 * Copyright (c) 2012 NetApp Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /*- 30 * Copyright (c) 2004-2006 Kip Macy 31 * All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 45 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 52 * SUCH DAMAGE. 53 */ 54 55 #include <sys/cdefs.h> 56 __FBSDID("$FreeBSD$"); 57 58 #include "opt_inet6.h" 59 #include "opt_inet.h" 60 61 #include <sys/param.h> 62 #include <sys/bus.h> 63 #include <sys/kernel.h> 64 #include <sys/limits.h> 65 #include <sys/malloc.h> 66 #include <sys/mbuf.h> 67 #include <sys/module.h> 68 #include <sys/queue.h> 69 #include <sys/lock.h> 70 #include <sys/smp.h> 71 #include <sys/socket.h> 72 #include <sys/sockio.h> 73 #include <sys/sx.h> 74 #include <sys/sysctl.h> 75 #include <sys/systm.h> 76 #include <sys/taskqueue.h> 77 #include <sys/buf_ring.h> 78 79 #include <machine/atomic.h> 80 #include <machine/in_cksum.h> 81 82 #include <net/bpf.h> 83 #include <net/ethernet.h> 84 #include <net/if.h> 85 #include <net/if_media.h> 86 #include <net/if_types.h> 87 #include <net/if_var.h> 88 #include <net/rndis.h> 89 90 #include <netinet/in_systm.h> 91 #include <netinet/in.h> 92 #include <netinet/ip.h> 93 #include <netinet/ip6.h> 94 #include <netinet/tcp.h> 95 #include <netinet/tcp_lro.h> 96 #include <netinet/udp.h> 97 98 #include <dev/hyperv/include/hyperv.h> 99 #include <dev/hyperv/include/hyperv_busdma.h> 100 #include <dev/hyperv/include/vmbus.h> 101 #include <dev/hyperv/include/vmbus_xact.h> 102 103 #include <dev/hyperv/netvsc/ndis.h> 104 #include <dev/hyperv/netvsc/if_hnreg.h> 105 #include <dev/hyperv/netvsc/if_hnvar.h> 106 #include <dev/hyperv/netvsc/hn_nvs.h> 107 #include <dev/hyperv/netvsc/hn_rndis.h> 108 109 #include "vmbus_if.h" 110 111 #define HN_IFSTART_SUPPORT 112 113 #define HN_RING_CNT_DEF_MAX 8 114 115 /* YYY should get it from the underlying channel */ 116 #define HN_TX_DESC_CNT 512 117 118 #define HN_RNDIS_PKT_LEN \ 119 (sizeof(struct rndis_packet_msg) + \ 120 HN_RNDIS_PKTINFO_SIZE(HN_NDIS_HASH_VALUE_SIZE) + \ 121 HN_RNDIS_PKTINFO_SIZE(NDIS_VLAN_INFO_SIZE) + \ 122 HN_RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) + \ 123 HN_RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE)) 124 #define HN_RNDIS_PKT_BOUNDARY PAGE_SIZE 125 #define HN_RNDIS_PKT_ALIGN CACHE_LINE_SIZE 126 127 #define HN_TX_DATA_BOUNDARY PAGE_SIZE 128 #define HN_TX_DATA_MAXSIZE IP_MAXPACKET 129 #define HN_TX_DATA_SEGSIZE PAGE_SIZE 130 /* -1 for RNDIS packet message */ 131 #define HN_TX_DATA_SEGCNT_MAX (HN_GPACNT_MAX - 1) 132 133 #define HN_DIRECT_TX_SIZE_DEF 128 134 135 #define HN_EARLY_TXEOF_THRESH 8 136 137 #define HN_PKTBUF_LEN_DEF (16 * 1024) 138 139 #define HN_LROENT_CNT_DEF 128 140 141 #define HN_LRO_LENLIM_MULTIRX_DEF (12 * ETHERMTU) 142 #define HN_LRO_LENLIM_DEF (25 * ETHERMTU) 143 /* YYY 2*MTU is a bit rough, but should be good enough. */ 144 #define HN_LRO_LENLIM_MIN(ifp) (2 * (ifp)->if_mtu) 145 146 #define HN_LRO_ACKCNT_DEF 1 147 148 #define HN_LOCK_INIT(sc) \ 149 sx_init(&(sc)->hn_lock, device_get_nameunit((sc)->hn_dev)) 150 #define HN_LOCK_DESTROY(sc) sx_destroy(&(sc)->hn_lock) 151 #define HN_LOCK_ASSERT(sc) sx_assert(&(sc)->hn_lock, SA_XLOCKED) 152 #define HN_LOCK(sc) \ 153 do { \ 154 while (sx_try_xlock(&(sc)->hn_lock) == 0) \ 155 DELAY(1000); \ 156 } while (0) 157 #define HN_UNLOCK(sc) sx_xunlock(&(sc)->hn_lock) 158 159 #define HN_CSUM_IP_MASK (CSUM_IP | CSUM_IP_TCP | CSUM_IP_UDP) 160 #define HN_CSUM_IP6_MASK (CSUM_IP6_TCP | CSUM_IP6_UDP) 161 #define HN_CSUM_IP_HWASSIST(sc) \ 162 ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP_MASK) 163 #define HN_CSUM_IP6_HWASSIST(sc) \ 164 ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP6_MASK) 165 166 #define HN_PKTSIZE_MIN(align) \ 167 roundup2(ETHER_MIN_LEN + ETHER_VLAN_ENCAP_LEN - ETHER_CRC_LEN + \ 168 HN_RNDIS_PKT_LEN, (align)) 169 #define HN_PKTSIZE(m, align) \ 170 roundup2((m)->m_pkthdr.len + HN_RNDIS_PKT_LEN, (align)) 171 172 struct hn_txdesc { 173 #ifndef HN_USE_TXDESC_BUFRING 174 SLIST_ENTRY(hn_txdesc) link; 175 #endif 176 STAILQ_ENTRY(hn_txdesc) agg_link; 177 178 /* Aggregated txdescs, in sending order. */ 179 STAILQ_HEAD(, hn_txdesc) agg_list; 180 181 /* The oldest packet, if transmission aggregation happens. */ 182 struct mbuf *m; 183 struct hn_tx_ring *txr; 184 int refs; 185 uint32_t flags; /* HN_TXD_FLAG_ */ 186 struct hn_nvs_sendctx send_ctx; 187 uint32_t chim_index; 188 int chim_size; 189 190 bus_dmamap_t data_dmap; 191 192 bus_addr_t rndis_pkt_paddr; 193 struct rndis_packet_msg *rndis_pkt; 194 bus_dmamap_t rndis_pkt_dmap; 195 }; 196 197 #define HN_TXD_FLAG_ONLIST 0x0001 198 #define HN_TXD_FLAG_DMAMAP 0x0002 199 #define HN_TXD_FLAG_ONAGG 0x0004 200 201 struct hn_rxinfo { 202 uint32_t vlan_info; 203 uint32_t csum_info; 204 uint32_t hash_info; 205 uint32_t hash_value; 206 }; 207 208 #define HN_RXINFO_VLAN 0x0001 209 #define HN_RXINFO_CSUM 0x0002 210 #define HN_RXINFO_HASHINF 0x0004 211 #define HN_RXINFO_HASHVAL 0x0008 212 #define HN_RXINFO_ALL \ 213 (HN_RXINFO_VLAN | \ 214 HN_RXINFO_CSUM | \ 215 HN_RXINFO_HASHINF | \ 216 HN_RXINFO_HASHVAL) 217 218 #define HN_NDIS_VLAN_INFO_INVALID 0xffffffff 219 #define HN_NDIS_RXCSUM_INFO_INVALID 0 220 #define HN_NDIS_HASH_INFO_INVALID 0 221 222 static int hn_probe(device_t); 223 static int hn_attach(device_t); 224 static int hn_detach(device_t); 225 static int hn_shutdown(device_t); 226 static void hn_chan_callback(struct vmbus_channel *, 227 void *); 228 229 static void hn_init(void *); 230 static int hn_ioctl(struct ifnet *, u_long, caddr_t); 231 #ifdef HN_IFSTART_SUPPORT 232 static void hn_start(struct ifnet *); 233 #endif 234 static int hn_transmit(struct ifnet *, struct mbuf *); 235 static void hn_xmit_qflush(struct ifnet *); 236 static int hn_ifmedia_upd(struct ifnet *); 237 static void hn_ifmedia_sts(struct ifnet *, 238 struct ifmediareq *); 239 240 static int hn_rndis_rxinfo(const void *, int, 241 struct hn_rxinfo *); 242 static void hn_rndis_rx_data(struct hn_rx_ring *, 243 const void *, int); 244 static void hn_rndis_rx_status(struct hn_softc *, 245 const void *, int); 246 247 static void hn_nvs_handle_notify(struct hn_softc *, 248 const struct vmbus_chanpkt_hdr *); 249 static void hn_nvs_handle_comp(struct hn_softc *, 250 struct vmbus_channel *, 251 const struct vmbus_chanpkt_hdr *); 252 static void hn_nvs_handle_rxbuf(struct hn_rx_ring *, 253 struct vmbus_channel *, 254 const struct vmbus_chanpkt_hdr *); 255 static void hn_nvs_ack_rxbuf(struct hn_rx_ring *, 256 struct vmbus_channel *, uint64_t); 257 258 #if __FreeBSD_version >= 1100099 259 static int hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS); 260 static int hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS); 261 #endif 262 static int hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS); 263 static int hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS); 264 #if __FreeBSD_version < 1100095 265 static int hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS); 266 #else 267 static int hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS); 268 #endif 269 static int hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS); 270 static int hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS); 271 static int hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS); 272 static int hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS); 273 static int hn_caps_sysctl(SYSCTL_HANDLER_ARGS); 274 static int hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS); 275 static int hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS); 276 static int hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS); 277 static int hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS); 278 static int hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS); 279 static int hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS); 280 static int hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS); 281 static int hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS); 282 static int hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS); 283 284 static void hn_stop(struct hn_softc *); 285 static void hn_init_locked(struct hn_softc *); 286 static int hn_chan_attach(struct hn_softc *, 287 struct vmbus_channel *); 288 static void hn_chan_detach(struct hn_softc *, 289 struct vmbus_channel *); 290 static int hn_attach_subchans(struct hn_softc *); 291 static void hn_detach_allchans(struct hn_softc *); 292 static void hn_chan_rollup(struct hn_rx_ring *, 293 struct hn_tx_ring *); 294 static void hn_set_ring_inuse(struct hn_softc *, int); 295 static int hn_synth_attach(struct hn_softc *, int); 296 static void hn_synth_detach(struct hn_softc *); 297 static int hn_synth_alloc_subchans(struct hn_softc *, 298 int *); 299 static void hn_suspend(struct hn_softc *); 300 static void hn_suspend_data(struct hn_softc *); 301 static void hn_suspend_mgmt(struct hn_softc *); 302 static void hn_resume(struct hn_softc *); 303 static void hn_resume_data(struct hn_softc *); 304 static void hn_resume_mgmt(struct hn_softc *); 305 static void hn_suspend_mgmt_taskfunc(void *, int); 306 static void hn_chan_drain(struct vmbus_channel *); 307 308 static void hn_update_link_status(struct hn_softc *); 309 static void hn_change_network(struct hn_softc *); 310 static void hn_link_taskfunc(void *, int); 311 static void hn_netchg_init_taskfunc(void *, int); 312 static void hn_netchg_status_taskfunc(void *, int); 313 static void hn_link_status(struct hn_softc *); 314 315 static int hn_create_rx_data(struct hn_softc *, int); 316 static void hn_destroy_rx_data(struct hn_softc *); 317 static int hn_check_iplen(const struct mbuf *, int); 318 static int hn_set_rxfilter(struct hn_softc *); 319 static int hn_rss_reconfig(struct hn_softc *); 320 static void hn_rss_ind_fixup(struct hn_softc *, int); 321 static int hn_rxpkt(struct hn_rx_ring *, const void *, 322 int, const struct hn_rxinfo *); 323 324 static int hn_tx_ring_create(struct hn_softc *, int); 325 static void hn_tx_ring_destroy(struct hn_tx_ring *); 326 static int hn_create_tx_data(struct hn_softc *, int); 327 static void hn_fixup_tx_data(struct hn_softc *); 328 static void hn_destroy_tx_data(struct hn_softc *); 329 static void hn_txdesc_dmamap_destroy(struct hn_txdesc *); 330 static int hn_encap(struct ifnet *, struct hn_tx_ring *, 331 struct hn_txdesc *, struct mbuf **); 332 static int hn_txpkt(struct ifnet *, struct hn_tx_ring *, 333 struct hn_txdesc *); 334 static void hn_set_chim_size(struct hn_softc *, int); 335 static void hn_set_tso_maxsize(struct hn_softc *, int, int); 336 static bool hn_tx_ring_pending(struct hn_tx_ring *); 337 static void hn_tx_ring_qflush(struct hn_tx_ring *); 338 static void hn_resume_tx(struct hn_softc *, int); 339 static void hn_set_txagg(struct hn_softc *); 340 static void *hn_try_txagg(struct ifnet *, 341 struct hn_tx_ring *, struct hn_txdesc *, 342 int); 343 static int hn_get_txswq_depth(const struct hn_tx_ring *); 344 static void hn_txpkt_done(struct hn_nvs_sendctx *, 345 struct hn_softc *, struct vmbus_channel *, 346 const void *, int); 347 static int hn_txpkt_sglist(struct hn_tx_ring *, 348 struct hn_txdesc *); 349 static int hn_txpkt_chim(struct hn_tx_ring *, 350 struct hn_txdesc *); 351 static int hn_xmit(struct hn_tx_ring *, int); 352 static void hn_xmit_taskfunc(void *, int); 353 static void hn_xmit_txeof(struct hn_tx_ring *); 354 static void hn_xmit_txeof_taskfunc(void *, int); 355 #ifdef HN_IFSTART_SUPPORT 356 static int hn_start_locked(struct hn_tx_ring *, int); 357 static void hn_start_taskfunc(void *, int); 358 static void hn_start_txeof(struct hn_tx_ring *); 359 static void hn_start_txeof_taskfunc(void *, int); 360 #endif 361 362 SYSCTL_NODE(_hw, OID_AUTO, hn, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 363 "Hyper-V network interface"); 364 365 /* Trust tcp segements verification on host side. */ 366 static int hn_trust_hosttcp = 1; 367 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hosttcp, CTLFLAG_RDTUN, 368 &hn_trust_hosttcp, 0, 369 "Trust tcp segement verification on host side, " 370 "when csum info is missing (global setting)"); 371 372 /* Trust udp datagrams verification on host side. */ 373 static int hn_trust_hostudp = 1; 374 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostudp, CTLFLAG_RDTUN, 375 &hn_trust_hostudp, 0, 376 "Trust udp datagram verification on host side, " 377 "when csum info is missing (global setting)"); 378 379 /* Trust ip packets verification on host side. */ 380 static int hn_trust_hostip = 1; 381 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostip, CTLFLAG_RDTUN, 382 &hn_trust_hostip, 0, 383 "Trust ip packet verification on host side, " 384 "when csum info is missing (global setting)"); 385 386 /* Limit TSO burst size */ 387 static int hn_tso_maxlen = IP_MAXPACKET; 388 SYSCTL_INT(_hw_hn, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN, 389 &hn_tso_maxlen, 0, "TSO burst limit"); 390 391 /* Limit chimney send size */ 392 static int hn_tx_chimney_size = 0; 393 SYSCTL_INT(_hw_hn, OID_AUTO, tx_chimney_size, CTLFLAG_RDTUN, 394 &hn_tx_chimney_size, 0, "Chimney send packet size limit"); 395 396 /* Limit the size of packet for direct transmission */ 397 static int hn_direct_tx_size = HN_DIRECT_TX_SIZE_DEF; 398 SYSCTL_INT(_hw_hn, OID_AUTO, direct_tx_size, CTLFLAG_RDTUN, 399 &hn_direct_tx_size, 0, "Size of the packet for direct transmission"); 400 401 /* # of LRO entries per RX ring */ 402 #if defined(INET) || defined(INET6) 403 #if __FreeBSD_version >= 1100095 404 static int hn_lro_entry_count = HN_LROENT_CNT_DEF; 405 SYSCTL_INT(_hw_hn, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN, 406 &hn_lro_entry_count, 0, "LRO entry count"); 407 #endif 408 #endif 409 410 /* Use shared TX taskqueue */ 411 static int hn_share_tx_taskq = 0; 412 SYSCTL_INT(_hw_hn, OID_AUTO, share_tx_taskq, CTLFLAG_RDTUN, 413 &hn_share_tx_taskq, 0, "Enable shared TX taskqueue"); 414 415 #ifndef HN_USE_TXDESC_BUFRING 416 static int hn_use_txdesc_bufring = 0; 417 #else 418 static int hn_use_txdesc_bufring = 1; 419 #endif 420 SYSCTL_INT(_hw_hn, OID_AUTO, use_txdesc_bufring, CTLFLAG_RD, 421 &hn_use_txdesc_bufring, 0, "Use buf_ring for TX descriptors"); 422 423 /* Bind TX taskqueue to the target CPU */ 424 static int hn_bind_tx_taskq = -1; 425 SYSCTL_INT(_hw_hn, OID_AUTO, bind_tx_taskq, CTLFLAG_RDTUN, 426 &hn_bind_tx_taskq, 0, "Bind TX taskqueue to the specified cpu"); 427 428 #ifdef HN_IFSTART_SUPPORT 429 /* Use ifnet.if_start instead of ifnet.if_transmit */ 430 static int hn_use_if_start = 0; 431 SYSCTL_INT(_hw_hn, OID_AUTO, use_if_start, CTLFLAG_RDTUN, 432 &hn_use_if_start, 0, "Use if_start TX method"); 433 #endif 434 435 /* # of channels to use */ 436 static int hn_chan_cnt = 0; 437 SYSCTL_INT(_hw_hn, OID_AUTO, chan_cnt, CTLFLAG_RDTUN, 438 &hn_chan_cnt, 0, 439 "# of channels to use; each channel has one RX ring and one TX ring"); 440 441 /* # of transmit rings to use */ 442 static int hn_tx_ring_cnt = 0; 443 SYSCTL_INT(_hw_hn, OID_AUTO, tx_ring_cnt, CTLFLAG_RDTUN, 444 &hn_tx_ring_cnt, 0, "# of TX rings to use"); 445 446 /* Software TX ring deptch */ 447 static int hn_tx_swq_depth = 0; 448 SYSCTL_INT(_hw_hn, OID_AUTO, tx_swq_depth, CTLFLAG_RDTUN, 449 &hn_tx_swq_depth, 0, "Depth of IFQ or BUFRING"); 450 451 /* Enable sorted LRO, and the depth of the per-channel mbuf queue */ 452 #if __FreeBSD_version >= 1100095 453 static u_int hn_lro_mbufq_depth = 0; 454 SYSCTL_UINT(_hw_hn, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN, 455 &hn_lro_mbufq_depth, 0, "Depth of LRO mbuf queue"); 456 #endif 457 458 /* Packet transmission aggregation size limit */ 459 static int hn_tx_agg_size = -1; 460 SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_size, CTLFLAG_RDTUN, 461 &hn_tx_agg_size, 0, "Packet transmission aggregation size limit"); 462 463 /* Packet transmission aggregation count limit */ 464 static int hn_tx_agg_pkts = 0; 465 SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_pkts, CTLFLAG_RDTUN, 466 &hn_tx_agg_pkts, 0, "Packet transmission aggregation packet limit"); 467 468 static u_int hn_cpu_index; /* next CPU for channel */ 469 static struct taskqueue *hn_tx_taskq; /* shared TX taskqueue */ 470 471 static const uint8_t 472 hn_rss_key_default[NDIS_HASH_KEYSIZE_TOEPLITZ] = { 473 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 474 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 475 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, 476 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, 477 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa 478 }; 479 480 static device_method_t hn_methods[] = { 481 /* Device interface */ 482 DEVMETHOD(device_probe, hn_probe), 483 DEVMETHOD(device_attach, hn_attach), 484 DEVMETHOD(device_detach, hn_detach), 485 DEVMETHOD(device_shutdown, hn_shutdown), 486 DEVMETHOD_END 487 }; 488 489 static driver_t hn_driver = { 490 "hn", 491 hn_methods, 492 sizeof(struct hn_softc) 493 }; 494 495 static devclass_t hn_devclass; 496 497 DRIVER_MODULE(hn, vmbus, hn_driver, hn_devclass, 0, 0); 498 MODULE_VERSION(hn, 1); 499 MODULE_DEPEND(hn, vmbus, 1, 1, 1); 500 501 #if __FreeBSD_version >= 1100099 502 static void 503 hn_set_lro_lenlim(struct hn_softc *sc, int lenlim) 504 { 505 int i; 506 507 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 508 sc->hn_rx_ring[i].hn_lro.lro_length_lim = lenlim; 509 } 510 #endif 511 512 static int 513 hn_txpkt_sglist(struct hn_tx_ring *txr, struct hn_txdesc *txd) 514 { 515 516 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID && 517 txd->chim_size == 0, ("invalid rndis sglist txd")); 518 return (hn_nvs_send_rndis_sglist(txr->hn_chan, HN_NVS_RNDIS_MTYPE_DATA, 519 &txd->send_ctx, txr->hn_gpa, txr->hn_gpa_cnt)); 520 } 521 522 static int 523 hn_txpkt_chim(struct hn_tx_ring *txr, struct hn_txdesc *txd) 524 { 525 struct hn_nvs_rndis rndis; 526 527 KASSERT(txd->chim_index != HN_NVS_CHIM_IDX_INVALID && 528 txd->chim_size > 0, ("invalid rndis chim txd")); 529 530 rndis.nvs_type = HN_NVS_TYPE_RNDIS; 531 rndis.nvs_rndis_mtype = HN_NVS_RNDIS_MTYPE_DATA; 532 rndis.nvs_chim_idx = txd->chim_index; 533 rndis.nvs_chim_sz = txd->chim_size; 534 535 return (hn_nvs_send(txr->hn_chan, VMBUS_CHANPKT_FLAG_RC, 536 &rndis, sizeof(rndis), &txd->send_ctx)); 537 } 538 539 static __inline uint32_t 540 hn_chim_alloc(struct hn_softc *sc) 541 { 542 int i, bmap_cnt = sc->hn_chim_bmap_cnt; 543 u_long *bmap = sc->hn_chim_bmap; 544 uint32_t ret = HN_NVS_CHIM_IDX_INVALID; 545 546 for (i = 0; i < bmap_cnt; ++i) { 547 int idx; 548 549 idx = ffsl(~bmap[i]); 550 if (idx == 0) 551 continue; 552 553 --idx; /* ffsl is 1-based */ 554 KASSERT(i * LONG_BIT + idx < sc->hn_chim_cnt, 555 ("invalid i %d and idx %d", i, idx)); 556 557 if (atomic_testandset_long(&bmap[i], idx)) 558 continue; 559 560 ret = i * LONG_BIT + idx; 561 break; 562 } 563 return (ret); 564 } 565 566 static __inline void 567 hn_chim_free(struct hn_softc *sc, uint32_t chim_idx) 568 { 569 u_long mask; 570 uint32_t idx; 571 572 idx = chim_idx / LONG_BIT; 573 KASSERT(idx < sc->hn_chim_bmap_cnt, 574 ("invalid chimney index 0x%x", chim_idx)); 575 576 mask = 1UL << (chim_idx % LONG_BIT); 577 KASSERT(sc->hn_chim_bmap[idx] & mask, 578 ("index bitmap 0x%lx, chimney index %u, " 579 "bitmap idx %d, bitmask 0x%lx", 580 sc->hn_chim_bmap[idx], chim_idx, idx, mask)); 581 582 atomic_clear_long(&sc->hn_chim_bmap[idx], mask); 583 } 584 585 #if defined(INET6) || defined(INET) 586 /* 587 * NOTE: If this function failed, the m_head would be freed. 588 */ 589 static __inline struct mbuf * 590 hn_tso_fixup(struct mbuf *m_head) 591 { 592 struct ether_vlan_header *evl; 593 struct tcphdr *th; 594 int ehlen; 595 596 KASSERT(M_WRITABLE(m_head), ("TSO mbuf not writable")); 597 598 #define PULLUP_HDR(m, len) \ 599 do { \ 600 if (__predict_false((m)->m_len < (len))) { \ 601 (m) = m_pullup((m), (len)); \ 602 if ((m) == NULL) \ 603 return (NULL); \ 604 } \ 605 } while (0) 606 607 PULLUP_HDR(m_head, sizeof(*evl)); 608 evl = mtod(m_head, struct ether_vlan_header *); 609 if (evl->evl_encap_proto == ntohs(ETHERTYPE_VLAN)) 610 ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 611 else 612 ehlen = ETHER_HDR_LEN; 613 614 #ifdef INET 615 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) { 616 struct ip *ip; 617 int iphlen; 618 619 PULLUP_HDR(m_head, ehlen + sizeof(*ip)); 620 ip = mtodo(m_head, ehlen); 621 iphlen = ip->ip_hl << 2; 622 623 PULLUP_HDR(m_head, ehlen + iphlen + sizeof(*th)); 624 th = mtodo(m_head, ehlen + iphlen); 625 626 ip->ip_len = 0; 627 ip->ip_sum = 0; 628 th->th_sum = in_pseudo(ip->ip_src.s_addr, 629 ip->ip_dst.s_addr, htons(IPPROTO_TCP)); 630 } 631 #endif 632 #if defined(INET6) && defined(INET) 633 else 634 #endif 635 #ifdef INET6 636 { 637 struct ip6_hdr *ip6; 638 639 PULLUP_HDR(m_head, ehlen + sizeof(*ip6)); 640 ip6 = mtodo(m_head, ehlen); 641 if (ip6->ip6_nxt != IPPROTO_TCP) { 642 m_freem(m_head); 643 return (NULL); 644 } 645 646 PULLUP_HDR(m_head, ehlen + sizeof(*ip6) + sizeof(*th)); 647 th = mtodo(m_head, ehlen + sizeof(*ip6)); 648 649 ip6->ip6_plen = 0; 650 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); 651 } 652 #endif 653 return (m_head); 654 655 #undef PULLUP_HDR 656 } 657 #endif /* INET6 || INET */ 658 659 static int 660 hn_set_rxfilter(struct hn_softc *sc) 661 { 662 struct ifnet *ifp = sc->hn_ifp; 663 uint32_t filter; 664 int error = 0; 665 666 HN_LOCK_ASSERT(sc); 667 668 if (ifp->if_flags & IFF_PROMISC) { 669 filter = NDIS_PACKET_TYPE_PROMISCUOUS; 670 } else { 671 filter = NDIS_PACKET_TYPE_DIRECTED; 672 if (ifp->if_flags & IFF_BROADCAST) 673 filter |= NDIS_PACKET_TYPE_BROADCAST; 674 /* TODO: support multicast list */ 675 if ((ifp->if_flags & IFF_ALLMULTI) || 676 !TAILQ_EMPTY(&ifp->if_multiaddrs)) 677 filter |= NDIS_PACKET_TYPE_ALL_MULTICAST; 678 } 679 680 if (sc->hn_rx_filter != filter) { 681 error = hn_rndis_set_rxfilter(sc, filter); 682 if (!error) 683 sc->hn_rx_filter = filter; 684 } 685 return (error); 686 } 687 688 static void 689 hn_set_txagg(struct hn_softc *sc) 690 { 691 uint32_t size, pkts; 692 int i; 693 694 /* 695 * Setup aggregation size. 696 */ 697 if (sc->hn_agg_size < 0) 698 size = UINT32_MAX; 699 else 700 size = sc->hn_agg_size; 701 702 if (sc->hn_rndis_agg_size < size) 703 size = sc->hn_rndis_agg_size; 704 705 if (size <= 2 * HN_PKTSIZE_MIN(sc->hn_rndis_agg_align)) { 706 /* Disable */ 707 size = 0; 708 pkts = 0; 709 goto done; 710 } 711 712 /* NOTE: Type of the per TX ring setting is 'int'. */ 713 if (size > INT_MAX) 714 size = INT_MAX; 715 716 /* NOTE: We only aggregate packets using chimney sending buffers. */ 717 if (size > (uint32_t)sc->hn_chim_szmax) 718 size = sc->hn_chim_szmax; 719 720 /* 721 * Setup aggregation packet count. 722 */ 723 if (sc->hn_agg_pkts < 0) 724 pkts = UINT32_MAX; 725 else 726 pkts = sc->hn_agg_pkts; 727 728 if (sc->hn_rndis_agg_pkts < pkts) 729 pkts = sc->hn_rndis_agg_pkts; 730 731 if (pkts <= 1) { 732 /* Disable */ 733 size = 0; 734 pkts = 0; 735 goto done; 736 } 737 738 /* NOTE: Type of the per TX ring setting is 'short'. */ 739 if (pkts > SHRT_MAX) 740 pkts = SHRT_MAX; 741 742 done: 743 /* NOTE: Type of the per TX ring setting is 'short'. */ 744 if (sc->hn_rndis_agg_align > SHRT_MAX) { 745 /* Disable */ 746 size = 0; 747 pkts = 0; 748 } 749 750 if (bootverbose) { 751 if_printf(sc->hn_ifp, "TX agg size %u, pkts %u, align %u\n", 752 size, pkts, sc->hn_rndis_agg_align); 753 } 754 755 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 756 struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; 757 758 mtx_lock(&txr->hn_tx_lock); 759 txr->hn_agg_szmax = size; 760 txr->hn_agg_pktmax = pkts; 761 txr->hn_agg_align = sc->hn_rndis_agg_align; 762 mtx_unlock(&txr->hn_tx_lock); 763 } 764 } 765 766 static int 767 hn_get_txswq_depth(const struct hn_tx_ring *txr) 768 { 769 770 KASSERT(txr->hn_txdesc_cnt > 0, ("tx ring is not setup yet")); 771 if (hn_tx_swq_depth < txr->hn_txdesc_cnt) 772 return txr->hn_txdesc_cnt; 773 return hn_tx_swq_depth; 774 } 775 776 static int 777 hn_rss_reconfig(struct hn_softc *sc) 778 { 779 int error; 780 781 HN_LOCK_ASSERT(sc); 782 783 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) 784 return (ENXIO); 785 786 /* 787 * Disable RSS first. 788 * 789 * NOTE: 790 * Direct reconfiguration by setting the UNCHG flags does 791 * _not_ work properly. 792 */ 793 if (bootverbose) 794 if_printf(sc->hn_ifp, "disable RSS\n"); 795 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_DISABLE); 796 if (error) { 797 if_printf(sc->hn_ifp, "RSS disable failed\n"); 798 return (error); 799 } 800 801 /* 802 * Reenable the RSS w/ the updated RSS key or indirect 803 * table. 804 */ 805 if (bootverbose) 806 if_printf(sc->hn_ifp, "reconfig RSS\n"); 807 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE); 808 if (error) { 809 if_printf(sc->hn_ifp, "RSS reconfig failed\n"); 810 return (error); 811 } 812 return (0); 813 } 814 815 static void 816 hn_rss_ind_fixup(struct hn_softc *sc, int nchan) 817 { 818 struct ndis_rssprm_toeplitz *rss = &sc->hn_rss; 819 int i; 820 821 KASSERT(nchan > 1, ("invalid # of channels %d", nchan)); 822 823 /* 824 * Check indirect table to make sure that all channels in it 825 * can be used. 826 */ 827 for (i = 0; i < NDIS_HASH_INDCNT; ++i) { 828 if (rss->rss_ind[i] >= nchan) { 829 if_printf(sc->hn_ifp, 830 "RSS indirect table %d fixup: %u -> %d\n", 831 i, rss->rss_ind[i], nchan - 1); 832 rss->rss_ind[i] = nchan - 1; 833 } 834 } 835 } 836 837 static int 838 hn_ifmedia_upd(struct ifnet *ifp __unused) 839 { 840 841 return EOPNOTSUPP; 842 } 843 844 static void 845 hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) 846 { 847 struct hn_softc *sc = ifp->if_softc; 848 849 ifmr->ifm_status = IFM_AVALID; 850 ifmr->ifm_active = IFM_ETHER; 851 852 if ((sc->hn_link_flags & HN_LINK_FLAG_LINKUP) == 0) { 853 ifmr->ifm_active |= IFM_NONE; 854 return; 855 } 856 ifmr->ifm_status |= IFM_ACTIVE; 857 ifmr->ifm_active |= IFM_10G_T | IFM_FDX; 858 } 859 860 /* {F8615163-DF3E-46c5-913F-F2D2F965ED0E} */ 861 static const struct hyperv_guid g_net_vsc_device_type = { 862 .hv_guid = {0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46, 863 0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E} 864 }; 865 866 static int 867 hn_probe(device_t dev) 868 { 869 870 if (VMBUS_PROBE_GUID(device_get_parent(dev), dev, 871 &g_net_vsc_device_type) == 0) { 872 device_set_desc(dev, "Hyper-V Network Interface"); 873 return BUS_PROBE_DEFAULT; 874 } 875 return ENXIO; 876 } 877 878 static int 879 hn_attach(device_t dev) 880 { 881 struct hn_softc *sc = device_get_softc(dev); 882 struct sysctl_oid_list *child; 883 struct sysctl_ctx_list *ctx; 884 uint8_t eaddr[ETHER_ADDR_LEN]; 885 struct ifnet *ifp = NULL; 886 int error, ring_cnt, tx_ring_cnt; 887 888 sc->hn_dev = dev; 889 sc->hn_prichan = vmbus_get_channel(dev); 890 HN_LOCK_INIT(sc); 891 892 /* 893 * Initialize these tunables once. 894 */ 895 sc->hn_agg_size = hn_tx_agg_size; 896 sc->hn_agg_pkts = hn_tx_agg_pkts; 897 898 /* 899 * Setup taskqueue for transmission. 900 */ 901 if (hn_tx_taskq == NULL) { 902 sc->hn_tx_taskq = taskqueue_create("hn_tx", M_WAITOK, 903 taskqueue_thread_enqueue, &sc->hn_tx_taskq); 904 if (hn_bind_tx_taskq >= 0) { 905 int cpu = hn_bind_tx_taskq; 906 cpuset_t cpu_set; 907 908 if (cpu > mp_ncpus - 1) 909 cpu = mp_ncpus - 1; 910 CPU_SETOF(cpu, &cpu_set); 911 taskqueue_start_threads_cpuset(&sc->hn_tx_taskq, 1, 912 PI_NET, &cpu_set, "%s tx", 913 device_get_nameunit(dev)); 914 } else { 915 taskqueue_start_threads(&sc->hn_tx_taskq, 1, PI_NET, 916 "%s tx", device_get_nameunit(dev)); 917 } 918 } else { 919 sc->hn_tx_taskq = hn_tx_taskq; 920 } 921 922 /* 923 * Setup taskqueue for mangement tasks, e.g. link status. 924 */ 925 sc->hn_mgmt_taskq0 = taskqueue_create("hn_mgmt", M_WAITOK, 926 taskqueue_thread_enqueue, &sc->hn_mgmt_taskq0); 927 taskqueue_start_threads(&sc->hn_mgmt_taskq0, 1, PI_NET, "%s mgmt", 928 device_get_nameunit(dev)); 929 TASK_INIT(&sc->hn_link_task, 0, hn_link_taskfunc, sc); 930 TASK_INIT(&sc->hn_netchg_init, 0, hn_netchg_init_taskfunc, sc); 931 TIMEOUT_TASK_INIT(sc->hn_mgmt_taskq0, &sc->hn_netchg_status, 0, 932 hn_netchg_status_taskfunc, sc); 933 934 /* 935 * Allocate ifnet and setup its name earlier, so that if_printf 936 * can be used by functions, which will be called after 937 * ether_ifattach(). 938 */ 939 ifp = sc->hn_ifp = if_alloc(IFT_ETHER); 940 ifp->if_softc = sc; 941 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 942 943 /* 944 * Initialize ifmedia earlier so that it can be unconditionally 945 * destroyed, if error happened later on. 946 */ 947 ifmedia_init(&sc->hn_media, 0, hn_ifmedia_upd, hn_ifmedia_sts); 948 949 /* 950 * Figure out the # of RX rings (ring_cnt) and the # of TX rings 951 * to use (tx_ring_cnt). 952 * 953 * NOTE: 954 * The # of RX rings to use is same as the # of channels to use. 955 */ 956 ring_cnt = hn_chan_cnt; 957 if (ring_cnt <= 0) { 958 /* Default */ 959 ring_cnt = mp_ncpus; 960 if (ring_cnt > HN_RING_CNT_DEF_MAX) 961 ring_cnt = HN_RING_CNT_DEF_MAX; 962 } else if (ring_cnt > mp_ncpus) { 963 ring_cnt = mp_ncpus; 964 } 965 966 tx_ring_cnt = hn_tx_ring_cnt; 967 if (tx_ring_cnt <= 0 || tx_ring_cnt > ring_cnt) 968 tx_ring_cnt = ring_cnt; 969 #ifdef HN_IFSTART_SUPPORT 970 if (hn_use_if_start) { 971 /* ifnet.if_start only needs one TX ring. */ 972 tx_ring_cnt = 1; 973 } 974 #endif 975 976 /* 977 * Set the leader CPU for channels. 978 */ 979 sc->hn_cpu = atomic_fetchadd_int(&hn_cpu_index, ring_cnt) % mp_ncpus; 980 981 /* 982 * Create enough TX/RX rings, even if only limited number of 983 * channels can be allocated. 984 */ 985 error = hn_create_tx_data(sc, tx_ring_cnt); 986 if (error) 987 goto failed; 988 error = hn_create_rx_data(sc, ring_cnt); 989 if (error) 990 goto failed; 991 992 /* 993 * Create transaction context for NVS and RNDIS transactions. 994 */ 995 sc->hn_xact = vmbus_xact_ctx_create(bus_get_dma_tag(dev), 996 HN_XACT_REQ_SIZE, HN_XACT_RESP_SIZE, 0); 997 if (sc->hn_xact == NULL) 998 goto failed; 999 1000 /* 1001 * Attach the synthetic parts, i.e. NVS and RNDIS. 1002 */ 1003 error = hn_synth_attach(sc, ETHERMTU); 1004 if (error) 1005 goto failed; 1006 1007 error = hn_rndis_get_eaddr(sc, eaddr); 1008 if (error) 1009 goto failed; 1010 1011 #if __FreeBSD_version >= 1100099 1012 if (sc->hn_rx_ring_inuse > 1) { 1013 /* 1014 * Reduce TCP segment aggregation limit for multiple 1015 * RX rings to increase ACK timeliness. 1016 */ 1017 hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MULTIRX_DEF); 1018 } 1019 #endif 1020 1021 /* 1022 * Fixup TX stuffs after synthetic parts are attached. 1023 */ 1024 hn_fixup_tx_data(sc); 1025 1026 ctx = device_get_sysctl_ctx(dev); 1027 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); 1028 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "nvs_version", CTLFLAG_RD, 1029 &sc->hn_nvs_ver, 0, "NVS version"); 1030 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "ndis_version", 1031 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 1032 hn_ndis_version_sysctl, "A", "NDIS version"); 1033 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "caps", 1034 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 1035 hn_caps_sysctl, "A", "capabilities"); 1036 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "hwassist", 1037 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 1038 hn_hwassist_sysctl, "A", "hwassist"); 1039 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxfilter", 1040 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 1041 hn_rxfilter_sysctl, "A", "rxfilter"); 1042 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_hash", 1043 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 1044 hn_rss_hash_sysctl, "A", "RSS hash"); 1045 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rss_ind_size", 1046 CTLFLAG_RD, &sc->hn_rss_ind_size, 0, "RSS indirect entry count"); 1047 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_key", 1048 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 1049 hn_rss_key_sysctl, "IU", "RSS key"); 1050 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_ind", 1051 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 1052 hn_rss_ind_sysctl, "IU", "RSS indirect table"); 1053 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_size", 1054 CTLFLAG_RD, &sc->hn_rndis_agg_size, 0, 1055 "RNDIS offered packet transmission aggregation size limit"); 1056 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_pkts", 1057 CTLFLAG_RD, &sc->hn_rndis_agg_pkts, 0, 1058 "RNDIS offered packet transmission aggregation count limit"); 1059 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_align", 1060 CTLFLAG_RD, &sc->hn_rndis_agg_align, 0, 1061 "RNDIS packet transmission aggregation alignment"); 1062 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_size", 1063 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 1064 hn_txagg_size_sysctl, "I", 1065 "Packet transmission aggregation size, 0 -- disable, -1 -- auto"); 1066 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pkts", 1067 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 1068 hn_txagg_pkts_sysctl, "I", 1069 "Packet transmission aggregation packets, " 1070 "0 -- disable, -1 -- auto"); 1071 1072 /* 1073 * Setup the ifmedia, which has been initialized earlier. 1074 */ 1075 ifmedia_add(&sc->hn_media, IFM_ETHER | IFM_AUTO, 0, NULL); 1076 ifmedia_set(&sc->hn_media, IFM_ETHER | IFM_AUTO); 1077 /* XXX ifmedia_set really should do this for us */ 1078 sc->hn_media.ifm_media = sc->hn_media.ifm_cur->ifm_media; 1079 1080 /* 1081 * Setup the ifnet for this interface. 1082 */ 1083 1084 ifp->if_baudrate = IF_Gbps(10); 1085 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 1086 ifp->if_ioctl = hn_ioctl; 1087 ifp->if_init = hn_init; 1088 #ifdef HN_IFSTART_SUPPORT 1089 if (hn_use_if_start) { 1090 int qdepth = hn_get_txswq_depth(&sc->hn_tx_ring[0]); 1091 1092 ifp->if_start = hn_start; 1093 IFQ_SET_MAXLEN(&ifp->if_snd, qdepth); 1094 ifp->if_snd.ifq_drv_maxlen = qdepth - 1; 1095 IFQ_SET_READY(&ifp->if_snd); 1096 } else 1097 #endif 1098 { 1099 ifp->if_transmit = hn_transmit; 1100 ifp->if_qflush = hn_xmit_qflush; 1101 } 1102 1103 ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_LRO; 1104 #ifdef foo 1105 /* We can't diff IPv6 packets from IPv4 packets on RX path. */ 1106 ifp->if_capabilities |= IFCAP_RXCSUM_IPV6; 1107 #endif 1108 if (sc->hn_caps & HN_CAP_VLAN) { 1109 /* XXX not sure about VLAN_MTU. */ 1110 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU; 1111 } 1112 1113 ifp->if_hwassist = sc->hn_tx_ring[0].hn_csum_assist; 1114 if (ifp->if_hwassist & HN_CSUM_IP_MASK) 1115 ifp->if_capabilities |= IFCAP_TXCSUM; 1116 if (ifp->if_hwassist & HN_CSUM_IP6_MASK) 1117 ifp->if_capabilities |= IFCAP_TXCSUM_IPV6; 1118 if (sc->hn_caps & HN_CAP_TSO4) { 1119 ifp->if_capabilities |= IFCAP_TSO4; 1120 ifp->if_hwassist |= CSUM_IP_TSO; 1121 } 1122 if (sc->hn_caps & HN_CAP_TSO6) { 1123 ifp->if_capabilities |= IFCAP_TSO6; 1124 ifp->if_hwassist |= CSUM_IP6_TSO; 1125 } 1126 1127 /* Enable all available capabilities by default. */ 1128 ifp->if_capenable = ifp->if_capabilities; 1129 1130 /* 1131 * Disable IPv6 TSO and TXCSUM by default, they still can 1132 * be enabled through SIOCSIFCAP. 1133 */ 1134 ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6 | IFCAP_TSO6); 1135 ifp->if_hwassist &= ~(HN_CSUM_IP6_MASK | CSUM_IP6_TSO); 1136 1137 if (ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) { 1138 hn_set_tso_maxsize(sc, hn_tso_maxlen, ETHERMTU); 1139 ifp->if_hw_tsomaxsegcount = HN_TX_DATA_SEGCNT_MAX; 1140 ifp->if_hw_tsomaxsegsize = PAGE_SIZE; 1141 } 1142 1143 ether_ifattach(ifp, eaddr); 1144 1145 if ((ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) && bootverbose) { 1146 if_printf(ifp, "TSO segcnt %u segsz %u\n", 1147 ifp->if_hw_tsomaxsegcount, ifp->if_hw_tsomaxsegsize); 1148 } 1149 1150 /* Inform the upper layer about the long frame support. */ 1151 ifp->if_hdrlen = sizeof(struct ether_vlan_header); 1152 1153 /* 1154 * Kick off link status check. 1155 */ 1156 sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0; 1157 hn_update_link_status(sc); 1158 1159 return (0); 1160 failed: 1161 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) 1162 hn_synth_detach(sc); 1163 hn_detach(dev); 1164 return (error); 1165 } 1166 1167 static int 1168 hn_detach(device_t dev) 1169 { 1170 struct hn_softc *sc = device_get_softc(dev); 1171 struct ifnet *ifp = sc->hn_ifp; 1172 1173 if (device_is_attached(dev)) { 1174 HN_LOCK(sc); 1175 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) { 1176 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 1177 hn_stop(sc); 1178 /* 1179 * NOTE: 1180 * hn_stop() only suspends data, so managment 1181 * stuffs have to be suspended manually here. 1182 */ 1183 hn_suspend_mgmt(sc); 1184 hn_synth_detach(sc); 1185 } 1186 HN_UNLOCK(sc); 1187 ether_ifdetach(ifp); 1188 } 1189 1190 ifmedia_removeall(&sc->hn_media); 1191 hn_destroy_rx_data(sc); 1192 hn_destroy_tx_data(sc); 1193 1194 if (sc->hn_tx_taskq != hn_tx_taskq) 1195 taskqueue_free(sc->hn_tx_taskq); 1196 taskqueue_free(sc->hn_mgmt_taskq0); 1197 1198 if (sc->hn_xact != NULL) 1199 vmbus_xact_ctx_destroy(sc->hn_xact); 1200 1201 if_free(ifp); 1202 1203 HN_LOCK_DESTROY(sc); 1204 return (0); 1205 } 1206 1207 static int 1208 hn_shutdown(device_t dev) 1209 { 1210 1211 return (0); 1212 } 1213 1214 static void 1215 hn_link_status(struct hn_softc *sc) 1216 { 1217 uint32_t link_status; 1218 int error; 1219 1220 error = hn_rndis_get_linkstatus(sc, &link_status); 1221 if (error) { 1222 /* XXX what to do? */ 1223 return; 1224 } 1225 1226 if (link_status == NDIS_MEDIA_STATE_CONNECTED) 1227 sc->hn_link_flags |= HN_LINK_FLAG_LINKUP; 1228 else 1229 sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP; 1230 if_link_state_change(sc->hn_ifp, 1231 (sc->hn_link_flags & HN_LINK_FLAG_LINKUP) ? 1232 LINK_STATE_UP : LINK_STATE_DOWN); 1233 } 1234 1235 static void 1236 hn_link_taskfunc(void *xsc, int pending __unused) 1237 { 1238 struct hn_softc *sc = xsc; 1239 1240 if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG) 1241 return; 1242 hn_link_status(sc); 1243 } 1244 1245 static void 1246 hn_netchg_init_taskfunc(void *xsc, int pending __unused) 1247 { 1248 struct hn_softc *sc = xsc; 1249 1250 /* Prevent any link status checks from running. */ 1251 sc->hn_link_flags |= HN_LINK_FLAG_NETCHG; 1252 1253 /* 1254 * Fake up a [link down --> link up] state change; 5 seconds 1255 * delay is used, which closely simulates miibus reaction 1256 * upon link down event. 1257 */ 1258 sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP; 1259 if_link_state_change(sc->hn_ifp, LINK_STATE_DOWN); 1260 taskqueue_enqueue_timeout(sc->hn_mgmt_taskq0, 1261 &sc->hn_netchg_status, 5 * hz); 1262 } 1263 1264 static void 1265 hn_netchg_status_taskfunc(void *xsc, int pending __unused) 1266 { 1267 struct hn_softc *sc = xsc; 1268 1269 /* Re-allow link status checks. */ 1270 sc->hn_link_flags &= ~HN_LINK_FLAG_NETCHG; 1271 hn_link_status(sc); 1272 } 1273 1274 static void 1275 hn_update_link_status(struct hn_softc *sc) 1276 { 1277 1278 if (sc->hn_mgmt_taskq != NULL) 1279 taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_link_task); 1280 } 1281 1282 static void 1283 hn_change_network(struct hn_softc *sc) 1284 { 1285 1286 if (sc->hn_mgmt_taskq != NULL) 1287 taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_netchg_init); 1288 } 1289 1290 static __inline int 1291 hn_txdesc_dmamap_load(struct hn_tx_ring *txr, struct hn_txdesc *txd, 1292 struct mbuf **m_head, bus_dma_segment_t *segs, int *nsegs) 1293 { 1294 struct mbuf *m = *m_head; 1295 int error; 1296 1297 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID, ("txd uses chim")); 1298 1299 error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap, 1300 m, segs, nsegs, BUS_DMA_NOWAIT); 1301 if (error == EFBIG) { 1302 struct mbuf *m_new; 1303 1304 m_new = m_collapse(m, M_NOWAIT, HN_TX_DATA_SEGCNT_MAX); 1305 if (m_new == NULL) 1306 return ENOBUFS; 1307 else 1308 *m_head = m = m_new; 1309 txr->hn_tx_collapsed++; 1310 1311 error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, 1312 txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT); 1313 } 1314 if (!error) { 1315 bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap, 1316 BUS_DMASYNC_PREWRITE); 1317 txd->flags |= HN_TXD_FLAG_DMAMAP; 1318 } 1319 return error; 1320 } 1321 1322 static __inline int 1323 hn_txdesc_put(struct hn_tx_ring *txr, struct hn_txdesc *txd) 1324 { 1325 1326 KASSERT((txd->flags & HN_TXD_FLAG_ONLIST) == 0, 1327 ("put an onlist txd %#x", txd->flags)); 1328 KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0, 1329 ("put an onagg txd %#x", txd->flags)); 1330 1331 KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs)); 1332 if (atomic_fetchadd_int(&txd->refs, -1) != 1) 1333 return 0; 1334 1335 if (!STAILQ_EMPTY(&txd->agg_list)) { 1336 struct hn_txdesc *tmp_txd; 1337 1338 while ((tmp_txd = STAILQ_FIRST(&txd->agg_list)) != NULL) { 1339 int freed; 1340 1341 KASSERT(STAILQ_EMPTY(&tmp_txd->agg_list), 1342 ("resursive aggregation on aggregated txdesc")); 1343 KASSERT((tmp_txd->flags & HN_TXD_FLAG_ONAGG), 1344 ("not aggregated txdesc")); 1345 KASSERT((tmp_txd->flags & HN_TXD_FLAG_DMAMAP) == 0, 1346 ("aggregated txdesc uses dmamap")); 1347 KASSERT(tmp_txd->chim_index == HN_NVS_CHIM_IDX_INVALID, 1348 ("aggregated txdesc consumes " 1349 "chimney sending buffer")); 1350 KASSERT(tmp_txd->chim_size == 0, 1351 ("aggregated txdesc has non-zero " 1352 "chimney sending size")); 1353 1354 STAILQ_REMOVE_HEAD(&txd->agg_list, agg_link); 1355 tmp_txd->flags &= ~HN_TXD_FLAG_ONAGG; 1356 freed = hn_txdesc_put(txr, tmp_txd); 1357 KASSERT(freed, ("failed to free aggregated txdesc")); 1358 } 1359 } 1360 1361 if (txd->chim_index != HN_NVS_CHIM_IDX_INVALID) { 1362 KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, 1363 ("chim txd uses dmamap")); 1364 hn_chim_free(txr->hn_sc, txd->chim_index); 1365 txd->chim_index = HN_NVS_CHIM_IDX_INVALID; 1366 txd->chim_size = 0; 1367 } else if (txd->flags & HN_TXD_FLAG_DMAMAP) { 1368 bus_dmamap_sync(txr->hn_tx_data_dtag, 1369 txd->data_dmap, BUS_DMASYNC_POSTWRITE); 1370 bus_dmamap_unload(txr->hn_tx_data_dtag, 1371 txd->data_dmap); 1372 txd->flags &= ~HN_TXD_FLAG_DMAMAP; 1373 } 1374 1375 if (txd->m != NULL) { 1376 m_freem(txd->m); 1377 txd->m = NULL; 1378 } 1379 1380 txd->flags |= HN_TXD_FLAG_ONLIST; 1381 #ifndef HN_USE_TXDESC_BUFRING 1382 mtx_lock_spin(&txr->hn_txlist_spin); 1383 KASSERT(txr->hn_txdesc_avail >= 0 && 1384 txr->hn_txdesc_avail < txr->hn_txdesc_cnt, 1385 ("txdesc_put: invalid txd avail %d", txr->hn_txdesc_avail)); 1386 txr->hn_txdesc_avail++; 1387 SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link); 1388 mtx_unlock_spin(&txr->hn_txlist_spin); 1389 #else 1390 atomic_add_int(&txr->hn_txdesc_avail, 1); 1391 buf_ring_enqueue(txr->hn_txdesc_br, txd); 1392 #endif 1393 1394 return 1; 1395 } 1396 1397 static __inline struct hn_txdesc * 1398 hn_txdesc_get(struct hn_tx_ring *txr) 1399 { 1400 struct hn_txdesc *txd; 1401 1402 #ifndef HN_USE_TXDESC_BUFRING 1403 mtx_lock_spin(&txr->hn_txlist_spin); 1404 txd = SLIST_FIRST(&txr->hn_txlist); 1405 if (txd != NULL) { 1406 KASSERT(txr->hn_txdesc_avail > 0, 1407 ("txdesc_get: invalid txd avail %d", txr->hn_txdesc_avail)); 1408 txr->hn_txdesc_avail--; 1409 SLIST_REMOVE_HEAD(&txr->hn_txlist, link); 1410 } 1411 mtx_unlock_spin(&txr->hn_txlist_spin); 1412 #else 1413 txd = buf_ring_dequeue_sc(txr->hn_txdesc_br); 1414 #endif 1415 1416 if (txd != NULL) { 1417 #ifdef HN_USE_TXDESC_BUFRING 1418 atomic_subtract_int(&txr->hn_txdesc_avail, 1); 1419 #endif 1420 KASSERT(txd->m == NULL && txd->refs == 0 && 1421 STAILQ_EMPTY(&txd->agg_list) && 1422 txd->chim_index == HN_NVS_CHIM_IDX_INVALID && 1423 txd->chim_size == 0 && 1424 (txd->flags & HN_TXD_FLAG_ONLIST) && 1425 (txd->flags & HN_TXD_FLAG_ONAGG) == 0 && 1426 (txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("invalid txd")); 1427 txd->flags &= ~HN_TXD_FLAG_ONLIST; 1428 txd->refs = 1; 1429 } 1430 return txd; 1431 } 1432 1433 static __inline void 1434 hn_txdesc_hold(struct hn_txdesc *txd) 1435 { 1436 1437 /* 0->1 transition will never work */ 1438 KASSERT(txd->refs > 0, ("invalid refs %d", txd->refs)); 1439 atomic_add_int(&txd->refs, 1); 1440 } 1441 1442 static __inline void 1443 hn_txdesc_agg(struct hn_txdesc *agg_txd, struct hn_txdesc *txd) 1444 { 1445 1446 KASSERT((agg_txd->flags & HN_TXD_FLAG_ONAGG) == 0, 1447 ("recursive aggregation on aggregating txdesc")); 1448 1449 KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0, 1450 ("already aggregated")); 1451 KASSERT(STAILQ_EMPTY(&txd->agg_list), 1452 ("recursive aggregation on to-be-aggregated txdesc")); 1453 1454 txd->flags |= HN_TXD_FLAG_ONAGG; 1455 STAILQ_INSERT_TAIL(&agg_txd->agg_list, txd, agg_link); 1456 } 1457 1458 static bool 1459 hn_tx_ring_pending(struct hn_tx_ring *txr) 1460 { 1461 bool pending = false; 1462 1463 #ifndef HN_USE_TXDESC_BUFRING 1464 mtx_lock_spin(&txr->hn_txlist_spin); 1465 if (txr->hn_txdesc_avail != txr->hn_txdesc_cnt) 1466 pending = true; 1467 mtx_unlock_spin(&txr->hn_txlist_spin); 1468 #else 1469 if (!buf_ring_full(txr->hn_txdesc_br)) 1470 pending = true; 1471 #endif 1472 return (pending); 1473 } 1474 1475 static __inline void 1476 hn_txeof(struct hn_tx_ring *txr) 1477 { 1478 txr->hn_has_txeof = 0; 1479 txr->hn_txeof(txr); 1480 } 1481 1482 static void 1483 hn_txpkt_done(struct hn_nvs_sendctx *sndc, struct hn_softc *sc, 1484 struct vmbus_channel *chan, const void *data __unused, int dlen __unused) 1485 { 1486 struct hn_txdesc *txd = sndc->hn_cbarg; 1487 struct hn_tx_ring *txr; 1488 1489 txr = txd->txr; 1490 KASSERT(txr->hn_chan == chan, 1491 ("channel mismatch, on chan%u, should be chan%u", 1492 vmbus_chan_subidx(chan), vmbus_chan_subidx(txr->hn_chan))); 1493 1494 txr->hn_has_txeof = 1; 1495 hn_txdesc_put(txr, txd); 1496 1497 ++txr->hn_txdone_cnt; 1498 if (txr->hn_txdone_cnt >= HN_EARLY_TXEOF_THRESH) { 1499 txr->hn_txdone_cnt = 0; 1500 if (txr->hn_oactive) 1501 hn_txeof(txr); 1502 } 1503 } 1504 1505 static void 1506 hn_chan_rollup(struct hn_rx_ring *rxr, struct hn_tx_ring *txr) 1507 { 1508 #if defined(INET) || defined(INET6) 1509 tcp_lro_flush_all(&rxr->hn_lro); 1510 #endif 1511 1512 /* 1513 * NOTE: 1514 * 'txr' could be NULL, if multiple channels and 1515 * ifnet.if_start method are enabled. 1516 */ 1517 if (txr == NULL || !txr->hn_has_txeof) 1518 return; 1519 1520 txr->hn_txdone_cnt = 0; 1521 hn_txeof(txr); 1522 } 1523 1524 static __inline uint32_t 1525 hn_rndis_pktmsg_offset(uint32_t ofs) 1526 { 1527 1528 KASSERT(ofs >= sizeof(struct rndis_packet_msg), 1529 ("invalid RNDIS packet msg offset %u", ofs)); 1530 return (ofs - __offsetof(struct rndis_packet_msg, rm_dataoffset)); 1531 } 1532 1533 static __inline void * 1534 hn_rndis_pktinfo_append(struct rndis_packet_msg *pkt, size_t pktsize, 1535 size_t pi_dlen, uint32_t pi_type) 1536 { 1537 const size_t pi_size = HN_RNDIS_PKTINFO_SIZE(pi_dlen); 1538 struct rndis_pktinfo *pi; 1539 1540 KASSERT((pi_size & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK) == 0, 1541 ("unaligned pktinfo size %zu, pktinfo dlen %zu", pi_size, pi_dlen)); 1542 1543 /* 1544 * Per-packet-info does not move; it only grows. 1545 * 1546 * NOTE: 1547 * rm_pktinfooffset in this phase counts from the beginning 1548 * of rndis_packet_msg. 1549 */ 1550 KASSERT(pkt->rm_pktinfooffset + pkt->rm_pktinfolen + pi_size <= pktsize, 1551 ("%u pktinfo overflows RNDIS packet msg", pi_type)); 1552 pi = (struct rndis_pktinfo *)((uint8_t *)pkt + pkt->rm_pktinfooffset + 1553 pkt->rm_pktinfolen); 1554 pkt->rm_pktinfolen += pi_size; 1555 1556 pi->rm_size = pi_size; 1557 pi->rm_type = pi_type; 1558 pi->rm_pktinfooffset = RNDIS_PKTINFO_OFFSET; 1559 1560 /* Data immediately follow per-packet-info. */ 1561 pkt->rm_dataoffset += pi_size; 1562 1563 /* Update RNDIS packet msg length */ 1564 pkt->rm_len += pi_size; 1565 1566 return (pi->rm_data); 1567 } 1568 1569 static __inline int 1570 hn_flush_txagg(struct ifnet *ifp, struct hn_tx_ring *txr) 1571 { 1572 struct hn_txdesc *txd; 1573 struct mbuf *m; 1574 int error, pkts; 1575 1576 txd = txr->hn_agg_txd; 1577 KASSERT(txd != NULL, ("no aggregate txdesc")); 1578 1579 /* 1580 * Since hn_txpkt() will reset this temporary stat, save 1581 * it now, so that oerrors can be updated properly, if 1582 * hn_txpkt() ever fails. 1583 */ 1584 pkts = txr->hn_stat_pkts; 1585 1586 /* 1587 * Since txd's mbuf will _not_ be freed upon hn_txpkt() 1588 * failure, save it for later freeing, if hn_txpkt() ever 1589 * fails. 1590 */ 1591 m = txd->m; 1592 error = hn_txpkt(ifp, txr, txd); 1593 if (__predict_false(error)) { 1594 /* txd is freed, but m is not. */ 1595 m_freem(m); 1596 1597 txr->hn_flush_failed++; 1598 if_inc_counter(ifp, IFCOUNTER_OERRORS, pkts); 1599 } 1600 1601 /* Reset all aggregation states. */ 1602 txr->hn_agg_txd = NULL; 1603 txr->hn_agg_szleft = 0; 1604 txr->hn_agg_pktleft = 0; 1605 txr->hn_agg_prevpkt = NULL; 1606 1607 return (error); 1608 } 1609 1610 static void * 1611 hn_try_txagg(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd, 1612 int pktsize) 1613 { 1614 void *chim; 1615 1616 if (txr->hn_agg_txd != NULL) { 1617 if (txr->hn_agg_pktleft >= 1 && txr->hn_agg_szleft > pktsize) { 1618 struct hn_txdesc *agg_txd = txr->hn_agg_txd; 1619 struct rndis_packet_msg *pkt = txr->hn_agg_prevpkt; 1620 int olen; 1621 1622 /* 1623 * Update the previous RNDIS packet's total length, 1624 * it can be increased due to the mandatory alignment 1625 * padding for this RNDIS packet. And update the 1626 * aggregating txdesc's chimney sending buffer size 1627 * accordingly. 1628 * 1629 * XXX 1630 * Zero-out the padding, as required by the RNDIS spec. 1631 */ 1632 olen = pkt->rm_len; 1633 pkt->rm_len = roundup2(olen, txr->hn_agg_align); 1634 agg_txd->chim_size += pkt->rm_len - olen; 1635 1636 /* Link this txdesc to the parent. */ 1637 hn_txdesc_agg(agg_txd, txd); 1638 1639 chim = (uint8_t *)pkt + pkt->rm_len; 1640 /* Save the current packet for later fixup. */ 1641 txr->hn_agg_prevpkt = chim; 1642 1643 txr->hn_agg_pktleft--; 1644 txr->hn_agg_szleft -= pktsize; 1645 if (txr->hn_agg_szleft <= 1646 HN_PKTSIZE_MIN(txr->hn_agg_align)) { 1647 /* 1648 * Probably can't aggregate more packets, 1649 * flush this aggregating txdesc proactively. 1650 */ 1651 txr->hn_agg_pktleft = 0; 1652 } 1653 /* Done! */ 1654 return (chim); 1655 } 1656 hn_flush_txagg(ifp, txr); 1657 } 1658 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc")); 1659 1660 txr->hn_tx_chimney_tried++; 1661 txd->chim_index = hn_chim_alloc(txr->hn_sc); 1662 if (txd->chim_index == HN_NVS_CHIM_IDX_INVALID) 1663 return (NULL); 1664 txr->hn_tx_chimney++; 1665 1666 chim = txr->hn_sc->hn_chim + 1667 (txd->chim_index * txr->hn_sc->hn_chim_szmax); 1668 1669 if (txr->hn_agg_pktmax > 1 && 1670 txr->hn_agg_szmax > pktsize + HN_PKTSIZE_MIN(txr->hn_agg_align)) { 1671 txr->hn_agg_txd = txd; 1672 txr->hn_agg_pktleft = txr->hn_agg_pktmax - 1; 1673 txr->hn_agg_szleft = txr->hn_agg_szmax - pktsize; 1674 txr->hn_agg_prevpkt = chim; 1675 } 1676 return (chim); 1677 } 1678 1679 /* 1680 * NOTE: 1681 * If this function fails, then both txd and m_head0 will be freed. 1682 */ 1683 static int 1684 hn_encap(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd, 1685 struct mbuf **m_head0) 1686 { 1687 bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX]; 1688 int error, nsegs, i; 1689 struct mbuf *m_head = *m_head0; 1690 struct rndis_packet_msg *pkt; 1691 uint32_t *pi_data; 1692 void *chim = NULL; 1693 int pkt_hlen, pkt_size; 1694 1695 pkt = txd->rndis_pkt; 1696 pkt_size = HN_PKTSIZE(m_head, txr->hn_agg_align); 1697 if (pkt_size < txr->hn_chim_size) { 1698 chim = hn_try_txagg(ifp, txr, txd, pkt_size); 1699 if (chim != NULL) 1700 pkt = chim; 1701 } else { 1702 if (txr->hn_agg_txd != NULL) 1703 hn_flush_txagg(ifp, txr); 1704 } 1705 1706 pkt->rm_type = REMOTE_NDIS_PACKET_MSG; 1707 pkt->rm_len = sizeof(*pkt) + m_head->m_pkthdr.len; 1708 pkt->rm_dataoffset = sizeof(*pkt); 1709 pkt->rm_datalen = m_head->m_pkthdr.len; 1710 pkt->rm_oobdataoffset = 0; 1711 pkt->rm_oobdatalen = 0; 1712 pkt->rm_oobdataelements = 0; 1713 pkt->rm_pktinfooffset = sizeof(*pkt); 1714 pkt->rm_pktinfolen = 0; 1715 pkt->rm_vchandle = 0; 1716 pkt->rm_reserved = 0; 1717 1718 if (txr->hn_tx_flags & HN_TX_FLAG_HASHVAL) { 1719 /* 1720 * Set the hash value for this packet, so that the host could 1721 * dispatch the TX done event for this packet back to this TX 1722 * ring's channel. 1723 */ 1724 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 1725 HN_NDIS_HASH_VALUE_SIZE, HN_NDIS_PKTINFO_TYPE_HASHVAL); 1726 *pi_data = txr->hn_tx_idx; 1727 } 1728 1729 if (m_head->m_flags & M_VLANTAG) { 1730 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 1731 NDIS_VLAN_INFO_SIZE, NDIS_PKTINFO_TYPE_VLAN); 1732 *pi_data = NDIS_VLAN_INFO_MAKE( 1733 EVL_VLANOFTAG(m_head->m_pkthdr.ether_vtag), 1734 EVL_PRIOFTAG(m_head->m_pkthdr.ether_vtag), 1735 EVL_CFIOFTAG(m_head->m_pkthdr.ether_vtag)); 1736 } 1737 1738 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 1739 #if defined(INET6) || defined(INET) 1740 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 1741 NDIS_LSO2_INFO_SIZE, NDIS_PKTINFO_TYPE_LSO); 1742 #ifdef INET 1743 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) { 1744 *pi_data = NDIS_LSO2_INFO_MAKEIPV4(0, 1745 m_head->m_pkthdr.tso_segsz); 1746 } 1747 #endif 1748 #if defined(INET6) && defined(INET) 1749 else 1750 #endif 1751 #ifdef INET6 1752 { 1753 *pi_data = NDIS_LSO2_INFO_MAKEIPV6(0, 1754 m_head->m_pkthdr.tso_segsz); 1755 } 1756 #endif 1757 #endif /* INET6 || INET */ 1758 } else if (m_head->m_pkthdr.csum_flags & txr->hn_csum_assist) { 1759 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 1760 NDIS_TXCSUM_INFO_SIZE, NDIS_PKTINFO_TYPE_CSUM); 1761 if (m_head->m_pkthdr.csum_flags & 1762 (CSUM_IP6_TCP | CSUM_IP6_UDP)) { 1763 *pi_data = NDIS_TXCSUM_INFO_IPV6; 1764 } else { 1765 *pi_data = NDIS_TXCSUM_INFO_IPV4; 1766 if (m_head->m_pkthdr.csum_flags & CSUM_IP) 1767 *pi_data |= NDIS_TXCSUM_INFO_IPCS; 1768 } 1769 1770 if (m_head->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP)) 1771 *pi_data |= NDIS_TXCSUM_INFO_TCPCS; 1772 else if (m_head->m_pkthdr.csum_flags & 1773 (CSUM_IP_UDP | CSUM_IP6_UDP)) 1774 *pi_data |= NDIS_TXCSUM_INFO_UDPCS; 1775 } 1776 1777 pkt_hlen = pkt->rm_pktinfooffset + pkt->rm_pktinfolen; 1778 /* Convert RNDIS packet message offsets */ 1779 pkt->rm_dataoffset = hn_rndis_pktmsg_offset(pkt->rm_dataoffset); 1780 pkt->rm_pktinfooffset = hn_rndis_pktmsg_offset(pkt->rm_pktinfooffset); 1781 1782 /* 1783 * Fast path: Chimney sending. 1784 */ 1785 if (chim != NULL) { 1786 struct hn_txdesc *tgt_txd = txd; 1787 1788 if (txr->hn_agg_txd != NULL) { 1789 tgt_txd = txr->hn_agg_txd; 1790 #ifdef INVARIANTS 1791 *m_head0 = NULL; 1792 #endif 1793 } 1794 1795 KASSERT(pkt == chim, 1796 ("RNDIS pkt not in chimney sending buffer")); 1797 KASSERT(tgt_txd->chim_index != HN_NVS_CHIM_IDX_INVALID, 1798 ("chimney sending buffer is not used")); 1799 tgt_txd->chim_size += pkt->rm_len; 1800 1801 m_copydata(m_head, 0, m_head->m_pkthdr.len, 1802 ((uint8_t *)chim) + pkt_hlen); 1803 1804 txr->hn_gpa_cnt = 0; 1805 txr->hn_sendpkt = hn_txpkt_chim; 1806 goto done; 1807 } 1808 1809 KASSERT(txr->hn_agg_txd == NULL, ("aggregating sglist txdesc")); 1810 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID, 1811 ("chimney buffer is used")); 1812 KASSERT(pkt == txd->rndis_pkt, ("RNDIS pkt not in txdesc")); 1813 1814 error = hn_txdesc_dmamap_load(txr, txd, &m_head, segs, &nsegs); 1815 if (__predict_false(error)) { 1816 int freed; 1817 1818 /* 1819 * This mbuf is not linked w/ the txd yet, so free it now. 1820 */ 1821 m_freem(m_head); 1822 *m_head0 = NULL; 1823 1824 freed = hn_txdesc_put(txr, txd); 1825 KASSERT(freed != 0, 1826 ("fail to free txd upon txdma error")); 1827 1828 txr->hn_txdma_failed++; 1829 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 1830 return error; 1831 } 1832 *m_head0 = m_head; 1833 1834 /* +1 RNDIS packet message */ 1835 txr->hn_gpa_cnt = nsegs + 1; 1836 1837 /* send packet with page buffer */ 1838 txr->hn_gpa[0].gpa_page = atop(txd->rndis_pkt_paddr); 1839 txr->hn_gpa[0].gpa_ofs = txd->rndis_pkt_paddr & PAGE_MASK; 1840 txr->hn_gpa[0].gpa_len = pkt_hlen; 1841 1842 /* 1843 * Fill the page buffers with mbuf info after the page 1844 * buffer for RNDIS packet message. 1845 */ 1846 for (i = 0; i < nsegs; ++i) { 1847 struct vmbus_gpa *gpa = &txr->hn_gpa[i + 1]; 1848 1849 gpa->gpa_page = atop(segs[i].ds_addr); 1850 gpa->gpa_ofs = segs[i].ds_addr & PAGE_MASK; 1851 gpa->gpa_len = segs[i].ds_len; 1852 } 1853 1854 txd->chim_index = HN_NVS_CHIM_IDX_INVALID; 1855 txd->chim_size = 0; 1856 txr->hn_sendpkt = hn_txpkt_sglist; 1857 done: 1858 txd->m = m_head; 1859 1860 /* Set the completion routine */ 1861 hn_nvs_sendctx_init(&txd->send_ctx, hn_txpkt_done, txd); 1862 1863 /* Update temporary stats for later use. */ 1864 txr->hn_stat_pkts++; 1865 txr->hn_stat_size += m_head->m_pkthdr.len; 1866 if (m_head->m_flags & M_MCAST) 1867 txr->hn_stat_mcasts++; 1868 1869 return 0; 1870 } 1871 1872 /* 1873 * NOTE: 1874 * If this function fails, then txd will be freed, but the mbuf 1875 * associated w/ the txd will _not_ be freed. 1876 */ 1877 static int 1878 hn_txpkt(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd) 1879 { 1880 int error, send_failed = 0; 1881 1882 again: 1883 /* 1884 * Make sure that this txd and any aggregated txds are not freed 1885 * before ETHER_BPF_MTAP. 1886 */ 1887 hn_txdesc_hold(txd); 1888 error = txr->hn_sendpkt(txr, txd); 1889 if (!error) { 1890 if (bpf_peers_present(ifp->if_bpf)) { 1891 const struct hn_txdesc *tmp_txd; 1892 1893 ETHER_BPF_MTAP(ifp, txd->m); 1894 STAILQ_FOREACH(tmp_txd, &txd->agg_list, agg_link) 1895 ETHER_BPF_MTAP(ifp, tmp_txd->m); 1896 } 1897 1898 if_inc_counter(ifp, IFCOUNTER_OPACKETS, txr->hn_stat_pkts); 1899 #ifdef HN_IFSTART_SUPPORT 1900 if (!hn_use_if_start) 1901 #endif 1902 { 1903 if_inc_counter(ifp, IFCOUNTER_OBYTES, 1904 txr->hn_stat_size); 1905 if (txr->hn_stat_mcasts != 0) { 1906 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1907 txr->hn_stat_mcasts); 1908 } 1909 } 1910 txr->hn_pkts += txr->hn_stat_pkts; 1911 txr->hn_sends++; 1912 } 1913 hn_txdesc_put(txr, txd); 1914 1915 if (__predict_false(error)) { 1916 int freed; 1917 1918 /* 1919 * This should "really rarely" happen. 1920 * 1921 * XXX Too many RX to be acked or too many sideband 1922 * commands to run? Ask netvsc_channel_rollup() 1923 * to kick start later. 1924 */ 1925 txr->hn_has_txeof = 1; 1926 if (!send_failed) { 1927 txr->hn_send_failed++; 1928 send_failed = 1; 1929 /* 1930 * Try sending again after set hn_has_txeof; 1931 * in case that we missed the last 1932 * netvsc_channel_rollup(). 1933 */ 1934 goto again; 1935 } 1936 if_printf(ifp, "send failed\n"); 1937 1938 /* 1939 * Caller will perform further processing on the 1940 * associated mbuf, so don't free it in hn_txdesc_put(); 1941 * only unload it from the DMA map in hn_txdesc_put(), 1942 * if it was loaded. 1943 */ 1944 txd->m = NULL; 1945 freed = hn_txdesc_put(txr, txd); 1946 KASSERT(freed != 0, 1947 ("fail to free txd upon send error")); 1948 1949 txr->hn_send_failed++; 1950 } 1951 1952 /* Reset temporary stats, after this sending is done. */ 1953 txr->hn_stat_size = 0; 1954 txr->hn_stat_pkts = 0; 1955 txr->hn_stat_mcasts = 0; 1956 1957 return (error); 1958 } 1959 1960 /* 1961 * Append the specified data to the indicated mbuf chain, 1962 * Extend the mbuf chain if the new data does not fit in 1963 * existing space. 1964 * 1965 * This is a minor rewrite of m_append() from sys/kern/uipc_mbuf.c. 1966 * There should be an equivalent in the kernel mbuf code, 1967 * but there does not appear to be one yet. 1968 * 1969 * Differs from m_append() in that additional mbufs are 1970 * allocated with cluster size MJUMPAGESIZE, and filled 1971 * accordingly. 1972 * 1973 * Return 1 if able to complete the job; otherwise 0. 1974 */ 1975 static int 1976 hv_m_append(struct mbuf *m0, int len, c_caddr_t cp) 1977 { 1978 struct mbuf *m, *n; 1979 int remainder, space; 1980 1981 for (m = m0; m->m_next != NULL; m = m->m_next) 1982 ; 1983 remainder = len; 1984 space = M_TRAILINGSPACE(m); 1985 if (space > 0) { 1986 /* 1987 * Copy into available space. 1988 */ 1989 if (space > remainder) 1990 space = remainder; 1991 bcopy(cp, mtod(m, caddr_t) + m->m_len, space); 1992 m->m_len += space; 1993 cp += space; 1994 remainder -= space; 1995 } 1996 while (remainder > 0) { 1997 /* 1998 * Allocate a new mbuf; could check space 1999 * and allocate a cluster instead. 2000 */ 2001 n = m_getjcl(M_NOWAIT, m->m_type, 0, MJUMPAGESIZE); 2002 if (n == NULL) 2003 break; 2004 n->m_len = min(MJUMPAGESIZE, remainder); 2005 bcopy(cp, mtod(n, caddr_t), n->m_len); 2006 cp += n->m_len; 2007 remainder -= n->m_len; 2008 m->m_next = n; 2009 m = n; 2010 } 2011 if (m0->m_flags & M_PKTHDR) 2012 m0->m_pkthdr.len += len - remainder; 2013 2014 return (remainder == 0); 2015 } 2016 2017 #if defined(INET) || defined(INET6) 2018 static __inline int 2019 hn_lro_rx(struct lro_ctrl *lc, struct mbuf *m) 2020 { 2021 #if __FreeBSD_version >= 1100095 2022 if (hn_lro_mbufq_depth) { 2023 tcp_lro_queue_mbuf(lc, m); 2024 return 0; 2025 } 2026 #endif 2027 return tcp_lro_rx(lc, m, 0); 2028 } 2029 #endif 2030 2031 static int 2032 hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen, 2033 const struct hn_rxinfo *info) 2034 { 2035 struct ifnet *ifp = rxr->hn_ifp; 2036 struct mbuf *m_new; 2037 int size, do_lro = 0, do_csum = 1; 2038 int hash_type; 2039 2040 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) 2041 return (0); 2042 2043 /* 2044 * Bail out if packet contains more data than configured MTU. 2045 */ 2046 if (dlen > (ifp->if_mtu + ETHER_HDR_LEN)) { 2047 return (0); 2048 } else if (dlen <= MHLEN) { 2049 m_new = m_gethdr(M_NOWAIT, MT_DATA); 2050 if (m_new == NULL) { 2051 if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); 2052 return (0); 2053 } 2054 memcpy(mtod(m_new, void *), data, dlen); 2055 m_new->m_pkthdr.len = m_new->m_len = dlen; 2056 rxr->hn_small_pkts++; 2057 } else { 2058 /* 2059 * Get an mbuf with a cluster. For packets 2K or less, 2060 * get a standard 2K cluster. For anything larger, get a 2061 * 4K cluster. Any buffers larger than 4K can cause problems 2062 * if looped around to the Hyper-V TX channel, so avoid them. 2063 */ 2064 size = MCLBYTES; 2065 if (dlen > MCLBYTES) { 2066 /* 4096 */ 2067 size = MJUMPAGESIZE; 2068 } 2069 2070 m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size); 2071 if (m_new == NULL) { 2072 if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); 2073 return (0); 2074 } 2075 2076 hv_m_append(m_new, dlen, data); 2077 } 2078 m_new->m_pkthdr.rcvif = ifp; 2079 2080 if (__predict_false((ifp->if_capenable & IFCAP_RXCSUM) == 0)) 2081 do_csum = 0; 2082 2083 /* receive side checksum offload */ 2084 if (info->csum_info != HN_NDIS_RXCSUM_INFO_INVALID) { 2085 /* IP csum offload */ 2086 if ((info->csum_info & NDIS_RXCSUM_INFO_IPCS_OK) && do_csum) { 2087 m_new->m_pkthdr.csum_flags |= 2088 (CSUM_IP_CHECKED | CSUM_IP_VALID); 2089 rxr->hn_csum_ip++; 2090 } 2091 2092 /* TCP/UDP csum offload */ 2093 if ((info->csum_info & (NDIS_RXCSUM_INFO_UDPCS_OK | 2094 NDIS_RXCSUM_INFO_TCPCS_OK)) && do_csum) { 2095 m_new->m_pkthdr.csum_flags |= 2096 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 2097 m_new->m_pkthdr.csum_data = 0xffff; 2098 if (info->csum_info & NDIS_RXCSUM_INFO_TCPCS_OK) 2099 rxr->hn_csum_tcp++; 2100 else 2101 rxr->hn_csum_udp++; 2102 } 2103 2104 /* 2105 * XXX 2106 * As of this write (Oct 28th, 2016), host side will turn 2107 * on only TCPCS_OK and IPCS_OK even for UDP datagrams, so 2108 * the do_lro setting here is actually _not_ accurate. We 2109 * depend on the RSS hash type check to reset do_lro. 2110 */ 2111 if ((info->csum_info & 2112 (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) == 2113 (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) 2114 do_lro = 1; 2115 } else { 2116 const struct ether_header *eh; 2117 uint16_t etype; 2118 int hoff; 2119 2120 hoff = sizeof(*eh); 2121 if (m_new->m_len < hoff) 2122 goto skip; 2123 eh = mtod(m_new, struct ether_header *); 2124 etype = ntohs(eh->ether_type); 2125 if (etype == ETHERTYPE_VLAN) { 2126 const struct ether_vlan_header *evl; 2127 2128 hoff = sizeof(*evl); 2129 if (m_new->m_len < hoff) 2130 goto skip; 2131 evl = mtod(m_new, struct ether_vlan_header *); 2132 etype = ntohs(evl->evl_proto); 2133 } 2134 2135 if (etype == ETHERTYPE_IP) { 2136 int pr; 2137 2138 pr = hn_check_iplen(m_new, hoff); 2139 if (pr == IPPROTO_TCP) { 2140 if (do_csum && 2141 (rxr->hn_trust_hcsum & 2142 HN_TRUST_HCSUM_TCP)) { 2143 rxr->hn_csum_trusted++; 2144 m_new->m_pkthdr.csum_flags |= 2145 (CSUM_IP_CHECKED | CSUM_IP_VALID | 2146 CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 2147 m_new->m_pkthdr.csum_data = 0xffff; 2148 } 2149 do_lro = 1; 2150 } else if (pr == IPPROTO_UDP) { 2151 if (do_csum && 2152 (rxr->hn_trust_hcsum & 2153 HN_TRUST_HCSUM_UDP)) { 2154 rxr->hn_csum_trusted++; 2155 m_new->m_pkthdr.csum_flags |= 2156 (CSUM_IP_CHECKED | CSUM_IP_VALID | 2157 CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 2158 m_new->m_pkthdr.csum_data = 0xffff; 2159 } 2160 } else if (pr != IPPROTO_DONE && do_csum && 2161 (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_IP)) { 2162 rxr->hn_csum_trusted++; 2163 m_new->m_pkthdr.csum_flags |= 2164 (CSUM_IP_CHECKED | CSUM_IP_VALID); 2165 } 2166 } 2167 } 2168 skip: 2169 if (info->vlan_info != HN_NDIS_VLAN_INFO_INVALID) { 2170 m_new->m_pkthdr.ether_vtag = EVL_MAKETAG( 2171 NDIS_VLAN_INFO_ID(info->vlan_info), 2172 NDIS_VLAN_INFO_PRI(info->vlan_info), 2173 NDIS_VLAN_INFO_CFI(info->vlan_info)); 2174 m_new->m_flags |= M_VLANTAG; 2175 } 2176 2177 if (info->hash_info != HN_NDIS_HASH_INFO_INVALID) { 2178 rxr->hn_rss_pkts++; 2179 m_new->m_pkthdr.flowid = info->hash_value; 2180 hash_type = M_HASHTYPE_OPAQUE_HASH; 2181 if ((info->hash_info & NDIS_HASH_FUNCTION_MASK) == 2182 NDIS_HASH_FUNCTION_TOEPLITZ) { 2183 uint32_t type = (info->hash_info & NDIS_HASH_TYPE_MASK); 2184 2185 /* 2186 * NOTE: 2187 * do_lro is resetted, if the hash types are not TCP 2188 * related. See the comment in the above csum_flags 2189 * setup section. 2190 */ 2191 switch (type) { 2192 case NDIS_HASH_IPV4: 2193 hash_type = M_HASHTYPE_RSS_IPV4; 2194 do_lro = 0; 2195 break; 2196 2197 case NDIS_HASH_TCP_IPV4: 2198 hash_type = M_HASHTYPE_RSS_TCP_IPV4; 2199 break; 2200 2201 case NDIS_HASH_IPV6: 2202 hash_type = M_HASHTYPE_RSS_IPV6; 2203 do_lro = 0; 2204 break; 2205 2206 case NDIS_HASH_IPV6_EX: 2207 hash_type = M_HASHTYPE_RSS_IPV6_EX; 2208 do_lro = 0; 2209 break; 2210 2211 case NDIS_HASH_TCP_IPV6: 2212 hash_type = M_HASHTYPE_RSS_TCP_IPV6; 2213 break; 2214 2215 case NDIS_HASH_TCP_IPV6_EX: 2216 hash_type = M_HASHTYPE_RSS_TCP_IPV6_EX; 2217 break; 2218 } 2219 } 2220 } else { 2221 m_new->m_pkthdr.flowid = rxr->hn_rx_idx; 2222 hash_type = M_HASHTYPE_OPAQUE; 2223 } 2224 M_HASHTYPE_SET(m_new, hash_type); 2225 2226 /* 2227 * Note: Moved RX completion back to hv_nv_on_receive() so all 2228 * messages (not just data messages) will trigger a response. 2229 */ 2230 2231 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); 2232 rxr->hn_pkts++; 2233 2234 if ((ifp->if_capenable & IFCAP_LRO) && do_lro) { 2235 #if defined(INET) || defined(INET6) 2236 struct lro_ctrl *lro = &rxr->hn_lro; 2237 2238 if (lro->lro_cnt) { 2239 rxr->hn_lro_tried++; 2240 if (hn_lro_rx(lro, m_new) == 0) { 2241 /* DONE! */ 2242 return 0; 2243 } 2244 } 2245 #endif 2246 } 2247 2248 /* We're not holding the lock here, so don't release it */ 2249 (*ifp->if_input)(ifp, m_new); 2250 2251 return (0); 2252 } 2253 2254 static int 2255 hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 2256 { 2257 struct hn_softc *sc = ifp->if_softc; 2258 struct ifreq *ifr = (struct ifreq *)data; 2259 int mask, error = 0; 2260 2261 switch (cmd) { 2262 case SIOCSIFMTU: 2263 if (ifr->ifr_mtu > HN_MTU_MAX) { 2264 error = EINVAL; 2265 break; 2266 } 2267 2268 HN_LOCK(sc); 2269 2270 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) { 2271 HN_UNLOCK(sc); 2272 break; 2273 } 2274 2275 if ((sc->hn_caps & HN_CAP_MTU) == 0) { 2276 /* Can't change MTU */ 2277 HN_UNLOCK(sc); 2278 error = EOPNOTSUPP; 2279 break; 2280 } 2281 2282 if (ifp->if_mtu == ifr->ifr_mtu) { 2283 HN_UNLOCK(sc); 2284 break; 2285 } 2286 2287 /* 2288 * Suspend this interface before the synthetic parts 2289 * are ripped. 2290 */ 2291 hn_suspend(sc); 2292 2293 /* 2294 * Detach the synthetics parts, i.e. NVS and RNDIS. 2295 */ 2296 hn_synth_detach(sc); 2297 2298 /* 2299 * Reattach the synthetic parts, i.e. NVS and RNDIS, 2300 * with the new MTU setting. 2301 */ 2302 error = hn_synth_attach(sc, ifr->ifr_mtu); 2303 if (error) { 2304 HN_UNLOCK(sc); 2305 break; 2306 } 2307 2308 /* 2309 * Commit the requested MTU, after the synthetic parts 2310 * have been successfully attached. 2311 */ 2312 ifp->if_mtu = ifr->ifr_mtu; 2313 2314 /* 2315 * Make sure that various parameters based on MTU are 2316 * still valid, after the MTU change. 2317 */ 2318 if (sc->hn_tx_ring[0].hn_chim_size > sc->hn_chim_szmax) 2319 hn_set_chim_size(sc, sc->hn_chim_szmax); 2320 hn_set_tso_maxsize(sc, hn_tso_maxlen, ifp->if_mtu); 2321 #if __FreeBSD_version >= 1100099 2322 if (sc->hn_rx_ring[0].hn_lro.lro_length_lim < 2323 HN_LRO_LENLIM_MIN(ifp)) 2324 hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MIN(ifp)); 2325 #endif 2326 2327 /* 2328 * All done! Resume the interface now. 2329 */ 2330 hn_resume(sc); 2331 2332 HN_UNLOCK(sc); 2333 break; 2334 2335 case SIOCSIFFLAGS: 2336 HN_LOCK(sc); 2337 2338 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) { 2339 HN_UNLOCK(sc); 2340 break; 2341 } 2342 2343 if (ifp->if_flags & IFF_UP) { 2344 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 2345 /* 2346 * Caller meight hold mutex, e.g. 2347 * bpf; use busy-wait for the RNDIS 2348 * reply. 2349 */ 2350 HN_NO_SLEEPING(sc); 2351 hn_set_rxfilter(sc); 2352 HN_SLEEPING_OK(sc); 2353 } else { 2354 hn_init_locked(sc); 2355 } 2356 } else { 2357 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 2358 hn_stop(sc); 2359 } 2360 sc->hn_if_flags = ifp->if_flags; 2361 2362 HN_UNLOCK(sc); 2363 break; 2364 2365 case SIOCSIFCAP: 2366 HN_LOCK(sc); 2367 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 2368 2369 if (mask & IFCAP_TXCSUM) { 2370 ifp->if_capenable ^= IFCAP_TXCSUM; 2371 if (ifp->if_capenable & IFCAP_TXCSUM) 2372 ifp->if_hwassist |= HN_CSUM_IP_HWASSIST(sc); 2373 else 2374 ifp->if_hwassist &= ~HN_CSUM_IP_HWASSIST(sc); 2375 } 2376 if (mask & IFCAP_TXCSUM_IPV6) { 2377 ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; 2378 if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) 2379 ifp->if_hwassist |= HN_CSUM_IP6_HWASSIST(sc); 2380 else 2381 ifp->if_hwassist &= ~HN_CSUM_IP6_HWASSIST(sc); 2382 } 2383 2384 /* TODO: flip RNDIS offload parameters for RXCSUM. */ 2385 if (mask & IFCAP_RXCSUM) 2386 ifp->if_capenable ^= IFCAP_RXCSUM; 2387 #ifdef foo 2388 /* We can't diff IPv6 packets from IPv4 packets on RX path. */ 2389 if (mask & IFCAP_RXCSUM_IPV6) 2390 ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; 2391 #endif 2392 2393 if (mask & IFCAP_LRO) 2394 ifp->if_capenable ^= IFCAP_LRO; 2395 2396 if (mask & IFCAP_TSO4) { 2397 ifp->if_capenable ^= IFCAP_TSO4; 2398 if (ifp->if_capenable & IFCAP_TSO4) 2399 ifp->if_hwassist |= CSUM_IP_TSO; 2400 else 2401 ifp->if_hwassist &= ~CSUM_IP_TSO; 2402 } 2403 if (mask & IFCAP_TSO6) { 2404 ifp->if_capenable ^= IFCAP_TSO6; 2405 if (ifp->if_capenable & IFCAP_TSO6) 2406 ifp->if_hwassist |= CSUM_IP6_TSO; 2407 else 2408 ifp->if_hwassist &= ~CSUM_IP6_TSO; 2409 } 2410 2411 HN_UNLOCK(sc); 2412 break; 2413 2414 case SIOCADDMULTI: 2415 case SIOCDELMULTI: 2416 HN_LOCK(sc); 2417 2418 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) { 2419 HN_UNLOCK(sc); 2420 break; 2421 } 2422 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 2423 /* 2424 * Multicast uses mutex; use busy-wait for 2425 * the RNDIS reply. 2426 */ 2427 HN_NO_SLEEPING(sc); 2428 hn_set_rxfilter(sc); 2429 HN_SLEEPING_OK(sc); 2430 } 2431 2432 HN_UNLOCK(sc); 2433 break; 2434 2435 case SIOCSIFMEDIA: 2436 case SIOCGIFMEDIA: 2437 error = ifmedia_ioctl(ifp, ifr, &sc->hn_media, cmd); 2438 break; 2439 2440 default: 2441 error = ether_ioctl(ifp, cmd, data); 2442 break; 2443 } 2444 return (error); 2445 } 2446 2447 static void 2448 hn_stop(struct hn_softc *sc) 2449 { 2450 struct ifnet *ifp = sc->hn_ifp; 2451 int i; 2452 2453 HN_LOCK_ASSERT(sc); 2454 2455 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, 2456 ("synthetic parts were not attached")); 2457 2458 /* Clear RUNNING bit _before_ hn_suspend_data() */ 2459 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_RUNNING); 2460 hn_suspend_data(sc); 2461 2462 /* Clear OACTIVE bit. */ 2463 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 2464 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) 2465 sc->hn_tx_ring[i].hn_oactive = 0; 2466 } 2467 2468 static void 2469 hn_init_locked(struct hn_softc *sc) 2470 { 2471 struct ifnet *ifp = sc->hn_ifp; 2472 int i; 2473 2474 HN_LOCK_ASSERT(sc); 2475 2476 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) 2477 return; 2478 2479 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 2480 return; 2481 2482 /* Configure RX filter */ 2483 hn_set_rxfilter(sc); 2484 2485 /* Clear OACTIVE bit. */ 2486 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 2487 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) 2488 sc->hn_tx_ring[i].hn_oactive = 0; 2489 2490 /* Clear TX 'suspended' bit. */ 2491 hn_resume_tx(sc, sc->hn_tx_ring_inuse); 2492 2493 /* Everything is ready; unleash! */ 2494 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_RUNNING); 2495 } 2496 2497 static void 2498 hn_init(void *xsc) 2499 { 2500 struct hn_softc *sc = xsc; 2501 2502 HN_LOCK(sc); 2503 hn_init_locked(sc); 2504 HN_UNLOCK(sc); 2505 } 2506 2507 #if __FreeBSD_version >= 1100099 2508 2509 static int 2510 hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS) 2511 { 2512 struct hn_softc *sc = arg1; 2513 unsigned int lenlim; 2514 int error; 2515 2516 lenlim = sc->hn_rx_ring[0].hn_lro.lro_length_lim; 2517 error = sysctl_handle_int(oidp, &lenlim, 0, req); 2518 if (error || req->newptr == NULL) 2519 return error; 2520 2521 HN_LOCK(sc); 2522 if (lenlim < HN_LRO_LENLIM_MIN(sc->hn_ifp) || 2523 lenlim > TCP_LRO_LENGTH_MAX) { 2524 HN_UNLOCK(sc); 2525 return EINVAL; 2526 } 2527 hn_set_lro_lenlim(sc, lenlim); 2528 HN_UNLOCK(sc); 2529 2530 return 0; 2531 } 2532 2533 static int 2534 hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS) 2535 { 2536 struct hn_softc *sc = arg1; 2537 int ackcnt, error, i; 2538 2539 /* 2540 * lro_ackcnt_lim is append count limit, 2541 * +1 to turn it into aggregation limit. 2542 */ 2543 ackcnt = sc->hn_rx_ring[0].hn_lro.lro_ackcnt_lim + 1; 2544 error = sysctl_handle_int(oidp, &ackcnt, 0, req); 2545 if (error || req->newptr == NULL) 2546 return error; 2547 2548 if (ackcnt < 2 || ackcnt > (TCP_LRO_ACKCNT_MAX + 1)) 2549 return EINVAL; 2550 2551 /* 2552 * Convert aggregation limit back to append 2553 * count limit. 2554 */ 2555 --ackcnt; 2556 HN_LOCK(sc); 2557 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) 2558 sc->hn_rx_ring[i].hn_lro.lro_ackcnt_lim = ackcnt; 2559 HN_UNLOCK(sc); 2560 return 0; 2561 } 2562 2563 #endif 2564 2565 static int 2566 hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS) 2567 { 2568 struct hn_softc *sc = arg1; 2569 int hcsum = arg2; 2570 int on, error, i; 2571 2572 on = 0; 2573 if (sc->hn_rx_ring[0].hn_trust_hcsum & hcsum) 2574 on = 1; 2575 2576 error = sysctl_handle_int(oidp, &on, 0, req); 2577 if (error || req->newptr == NULL) 2578 return error; 2579 2580 HN_LOCK(sc); 2581 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 2582 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 2583 2584 if (on) 2585 rxr->hn_trust_hcsum |= hcsum; 2586 else 2587 rxr->hn_trust_hcsum &= ~hcsum; 2588 } 2589 HN_UNLOCK(sc); 2590 return 0; 2591 } 2592 2593 static int 2594 hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS) 2595 { 2596 struct hn_softc *sc = arg1; 2597 int chim_size, error; 2598 2599 chim_size = sc->hn_tx_ring[0].hn_chim_size; 2600 error = sysctl_handle_int(oidp, &chim_size, 0, req); 2601 if (error || req->newptr == NULL) 2602 return error; 2603 2604 if (chim_size > sc->hn_chim_szmax || chim_size <= 0) 2605 return EINVAL; 2606 2607 HN_LOCK(sc); 2608 hn_set_chim_size(sc, chim_size); 2609 HN_UNLOCK(sc); 2610 return 0; 2611 } 2612 2613 #if __FreeBSD_version < 1100095 2614 static int 2615 hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS) 2616 { 2617 struct hn_softc *sc = arg1; 2618 int ofs = arg2, i, error; 2619 struct hn_rx_ring *rxr; 2620 uint64_t stat; 2621 2622 stat = 0; 2623 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 2624 rxr = &sc->hn_rx_ring[i]; 2625 stat += *((int *)((uint8_t *)rxr + ofs)); 2626 } 2627 2628 error = sysctl_handle_64(oidp, &stat, 0, req); 2629 if (error || req->newptr == NULL) 2630 return error; 2631 2632 /* Zero out this stat. */ 2633 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 2634 rxr = &sc->hn_rx_ring[i]; 2635 *((int *)((uint8_t *)rxr + ofs)) = 0; 2636 } 2637 return 0; 2638 } 2639 #else 2640 static int 2641 hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS) 2642 { 2643 struct hn_softc *sc = arg1; 2644 int ofs = arg2, i, error; 2645 struct hn_rx_ring *rxr; 2646 uint64_t stat; 2647 2648 stat = 0; 2649 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 2650 rxr = &sc->hn_rx_ring[i]; 2651 stat += *((uint64_t *)((uint8_t *)rxr + ofs)); 2652 } 2653 2654 error = sysctl_handle_64(oidp, &stat, 0, req); 2655 if (error || req->newptr == NULL) 2656 return error; 2657 2658 /* Zero out this stat. */ 2659 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 2660 rxr = &sc->hn_rx_ring[i]; 2661 *((uint64_t *)((uint8_t *)rxr + ofs)) = 0; 2662 } 2663 return 0; 2664 } 2665 2666 #endif 2667 2668 static int 2669 hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS) 2670 { 2671 struct hn_softc *sc = arg1; 2672 int ofs = arg2, i, error; 2673 struct hn_rx_ring *rxr; 2674 u_long stat; 2675 2676 stat = 0; 2677 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 2678 rxr = &sc->hn_rx_ring[i]; 2679 stat += *((u_long *)((uint8_t *)rxr + ofs)); 2680 } 2681 2682 error = sysctl_handle_long(oidp, &stat, 0, req); 2683 if (error || req->newptr == NULL) 2684 return error; 2685 2686 /* Zero out this stat. */ 2687 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 2688 rxr = &sc->hn_rx_ring[i]; 2689 *((u_long *)((uint8_t *)rxr + ofs)) = 0; 2690 } 2691 return 0; 2692 } 2693 2694 static int 2695 hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS) 2696 { 2697 struct hn_softc *sc = arg1; 2698 int ofs = arg2, i, error; 2699 struct hn_tx_ring *txr; 2700 u_long stat; 2701 2702 stat = 0; 2703 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 2704 txr = &sc->hn_tx_ring[i]; 2705 stat += *((u_long *)((uint8_t *)txr + ofs)); 2706 } 2707 2708 error = sysctl_handle_long(oidp, &stat, 0, req); 2709 if (error || req->newptr == NULL) 2710 return error; 2711 2712 /* Zero out this stat. */ 2713 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 2714 txr = &sc->hn_tx_ring[i]; 2715 *((u_long *)((uint8_t *)txr + ofs)) = 0; 2716 } 2717 return 0; 2718 } 2719 2720 static int 2721 hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS) 2722 { 2723 struct hn_softc *sc = arg1; 2724 int ofs = arg2, i, error, conf; 2725 struct hn_tx_ring *txr; 2726 2727 txr = &sc->hn_tx_ring[0]; 2728 conf = *((int *)((uint8_t *)txr + ofs)); 2729 2730 error = sysctl_handle_int(oidp, &conf, 0, req); 2731 if (error || req->newptr == NULL) 2732 return error; 2733 2734 HN_LOCK(sc); 2735 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 2736 txr = &sc->hn_tx_ring[i]; 2737 *((int *)((uint8_t *)txr + ofs)) = conf; 2738 } 2739 HN_UNLOCK(sc); 2740 2741 return 0; 2742 } 2743 2744 static int 2745 hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS) 2746 { 2747 struct hn_softc *sc = arg1; 2748 int error, size; 2749 2750 size = sc->hn_agg_size; 2751 error = sysctl_handle_int(oidp, &size, 0, req); 2752 if (error || req->newptr == NULL) 2753 return (error); 2754 2755 HN_LOCK(sc); 2756 sc->hn_agg_size = size; 2757 hn_set_txagg(sc); 2758 HN_UNLOCK(sc); 2759 2760 return (0); 2761 } 2762 2763 static int 2764 hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS) 2765 { 2766 struct hn_softc *sc = arg1; 2767 int error, pkts; 2768 2769 pkts = sc->hn_agg_pkts; 2770 error = sysctl_handle_int(oidp, &pkts, 0, req); 2771 if (error || req->newptr == NULL) 2772 return (error); 2773 2774 HN_LOCK(sc); 2775 sc->hn_agg_pkts = pkts; 2776 hn_set_txagg(sc); 2777 HN_UNLOCK(sc); 2778 2779 return (0); 2780 } 2781 2782 static int 2783 hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS) 2784 { 2785 struct hn_softc *sc = arg1; 2786 int pkts; 2787 2788 pkts = sc->hn_tx_ring[0].hn_agg_pktmax; 2789 return (sysctl_handle_int(oidp, &pkts, 0, req)); 2790 } 2791 2792 static int 2793 hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS) 2794 { 2795 struct hn_softc *sc = arg1; 2796 int align; 2797 2798 align = sc->hn_tx_ring[0].hn_agg_align; 2799 return (sysctl_handle_int(oidp, &align, 0, req)); 2800 } 2801 2802 static int 2803 hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS) 2804 { 2805 struct hn_softc *sc = arg1; 2806 char verstr[16]; 2807 2808 snprintf(verstr, sizeof(verstr), "%u.%u", 2809 HN_NDIS_VERSION_MAJOR(sc->hn_ndis_ver), 2810 HN_NDIS_VERSION_MINOR(sc->hn_ndis_ver)); 2811 return sysctl_handle_string(oidp, verstr, sizeof(verstr), req); 2812 } 2813 2814 static int 2815 hn_caps_sysctl(SYSCTL_HANDLER_ARGS) 2816 { 2817 struct hn_softc *sc = arg1; 2818 char caps_str[128]; 2819 uint32_t caps; 2820 2821 HN_LOCK(sc); 2822 caps = sc->hn_caps; 2823 HN_UNLOCK(sc); 2824 snprintf(caps_str, sizeof(caps_str), "%b", caps, HN_CAP_BITS); 2825 return sysctl_handle_string(oidp, caps_str, sizeof(caps_str), req); 2826 } 2827 2828 static int 2829 hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS) 2830 { 2831 struct hn_softc *sc = arg1; 2832 char assist_str[128]; 2833 uint32_t hwassist; 2834 2835 HN_LOCK(sc); 2836 hwassist = sc->hn_ifp->if_hwassist; 2837 HN_UNLOCK(sc); 2838 snprintf(assist_str, sizeof(assist_str), "%b", hwassist, CSUM_BITS); 2839 return sysctl_handle_string(oidp, assist_str, sizeof(assist_str), req); 2840 } 2841 2842 static int 2843 hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS) 2844 { 2845 struct hn_softc *sc = arg1; 2846 char filter_str[128]; 2847 uint32_t filter; 2848 2849 HN_LOCK(sc); 2850 filter = sc->hn_rx_filter; 2851 HN_UNLOCK(sc); 2852 snprintf(filter_str, sizeof(filter_str), "%b", filter, 2853 NDIS_PACKET_TYPES); 2854 return sysctl_handle_string(oidp, filter_str, sizeof(filter_str), req); 2855 } 2856 2857 static int 2858 hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS) 2859 { 2860 struct hn_softc *sc = arg1; 2861 int error; 2862 2863 HN_LOCK(sc); 2864 2865 error = SYSCTL_OUT(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key)); 2866 if (error || req->newptr == NULL) 2867 goto back; 2868 2869 error = SYSCTL_IN(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key)); 2870 if (error) 2871 goto back; 2872 sc->hn_flags |= HN_FLAG_HAS_RSSKEY; 2873 2874 if (sc->hn_rx_ring_inuse > 1) { 2875 error = hn_rss_reconfig(sc); 2876 } else { 2877 /* Not RSS capable, at least for now; just save the RSS key. */ 2878 error = 0; 2879 } 2880 back: 2881 HN_UNLOCK(sc); 2882 return (error); 2883 } 2884 2885 static int 2886 hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS) 2887 { 2888 struct hn_softc *sc = arg1; 2889 int error; 2890 2891 HN_LOCK(sc); 2892 2893 error = SYSCTL_OUT(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind)); 2894 if (error || req->newptr == NULL) 2895 goto back; 2896 2897 /* 2898 * Don't allow RSS indirect table change, if this interface is not 2899 * RSS capable currently. 2900 */ 2901 if (sc->hn_rx_ring_inuse == 1) { 2902 error = EOPNOTSUPP; 2903 goto back; 2904 } 2905 2906 error = SYSCTL_IN(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind)); 2907 if (error) 2908 goto back; 2909 sc->hn_flags |= HN_FLAG_HAS_RSSIND; 2910 2911 hn_rss_ind_fixup(sc, sc->hn_rx_ring_inuse); 2912 error = hn_rss_reconfig(sc); 2913 back: 2914 HN_UNLOCK(sc); 2915 return (error); 2916 } 2917 2918 static int 2919 hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS) 2920 { 2921 struct hn_softc *sc = arg1; 2922 char hash_str[128]; 2923 uint32_t hash; 2924 2925 HN_LOCK(sc); 2926 hash = sc->hn_rss_hash; 2927 HN_UNLOCK(sc); 2928 snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS); 2929 return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req); 2930 } 2931 2932 static int 2933 hn_check_iplen(const struct mbuf *m, int hoff) 2934 { 2935 const struct ip *ip; 2936 int len, iphlen, iplen; 2937 const struct tcphdr *th; 2938 int thoff; /* TCP data offset */ 2939 2940 len = hoff + sizeof(struct ip); 2941 2942 /* The packet must be at least the size of an IP header. */ 2943 if (m->m_pkthdr.len < len) 2944 return IPPROTO_DONE; 2945 2946 /* The fixed IP header must reside completely in the first mbuf. */ 2947 if (m->m_len < len) 2948 return IPPROTO_DONE; 2949 2950 ip = mtodo(m, hoff); 2951 2952 /* Bound check the packet's stated IP header length. */ 2953 iphlen = ip->ip_hl << 2; 2954 if (iphlen < sizeof(struct ip)) /* minimum header length */ 2955 return IPPROTO_DONE; 2956 2957 /* The full IP header must reside completely in the one mbuf. */ 2958 if (m->m_len < hoff + iphlen) 2959 return IPPROTO_DONE; 2960 2961 iplen = ntohs(ip->ip_len); 2962 2963 /* 2964 * Check that the amount of data in the buffers is as 2965 * at least much as the IP header would have us expect. 2966 */ 2967 if (m->m_pkthdr.len < hoff + iplen) 2968 return IPPROTO_DONE; 2969 2970 /* 2971 * Ignore IP fragments. 2972 */ 2973 if (ntohs(ip->ip_off) & (IP_OFFMASK | IP_MF)) 2974 return IPPROTO_DONE; 2975 2976 /* 2977 * The TCP/IP or UDP/IP header must be entirely contained within 2978 * the first fragment of a packet. 2979 */ 2980 switch (ip->ip_p) { 2981 case IPPROTO_TCP: 2982 if (iplen < iphlen + sizeof(struct tcphdr)) 2983 return IPPROTO_DONE; 2984 if (m->m_len < hoff + iphlen + sizeof(struct tcphdr)) 2985 return IPPROTO_DONE; 2986 th = (const struct tcphdr *)((const uint8_t *)ip + iphlen); 2987 thoff = th->th_off << 2; 2988 if (thoff < sizeof(struct tcphdr) || thoff + iphlen > iplen) 2989 return IPPROTO_DONE; 2990 if (m->m_len < hoff + iphlen + thoff) 2991 return IPPROTO_DONE; 2992 break; 2993 case IPPROTO_UDP: 2994 if (iplen < iphlen + sizeof(struct udphdr)) 2995 return IPPROTO_DONE; 2996 if (m->m_len < hoff + iphlen + sizeof(struct udphdr)) 2997 return IPPROTO_DONE; 2998 break; 2999 default: 3000 if (iplen < iphlen) 3001 return IPPROTO_DONE; 3002 break; 3003 } 3004 return ip->ip_p; 3005 } 3006 3007 static int 3008 hn_create_rx_data(struct hn_softc *sc, int ring_cnt) 3009 { 3010 struct sysctl_oid_list *child; 3011 struct sysctl_ctx_list *ctx; 3012 device_t dev = sc->hn_dev; 3013 #if defined(INET) || defined(INET6) 3014 #if __FreeBSD_version >= 1100095 3015 int lroent_cnt; 3016 #endif 3017 #endif 3018 int i; 3019 3020 /* 3021 * Create RXBUF for reception. 3022 * 3023 * NOTE: 3024 * - It is shared by all channels. 3025 * - A large enough buffer is allocated, certain version of NVSes 3026 * may further limit the usable space. 3027 */ 3028 sc->hn_rxbuf = hyperv_dmamem_alloc(bus_get_dma_tag(dev), 3029 PAGE_SIZE, 0, HN_RXBUF_SIZE, &sc->hn_rxbuf_dma, 3030 BUS_DMA_WAITOK | BUS_DMA_ZERO); 3031 if (sc->hn_rxbuf == NULL) { 3032 device_printf(sc->hn_dev, "allocate rxbuf failed\n"); 3033 return (ENOMEM); 3034 } 3035 3036 sc->hn_rx_ring_cnt = ring_cnt; 3037 sc->hn_rx_ring_inuse = sc->hn_rx_ring_cnt; 3038 3039 sc->hn_rx_ring = malloc(sizeof(struct hn_rx_ring) * sc->hn_rx_ring_cnt, 3040 M_DEVBUF, M_WAITOK | M_ZERO); 3041 3042 #if defined(INET) || defined(INET6) 3043 #if __FreeBSD_version >= 1100095 3044 lroent_cnt = hn_lro_entry_count; 3045 if (lroent_cnt < TCP_LRO_ENTRIES) 3046 lroent_cnt = TCP_LRO_ENTRIES; 3047 if (bootverbose) 3048 device_printf(dev, "LRO: entry count %d\n", lroent_cnt); 3049 #endif 3050 #endif /* INET || INET6 */ 3051 3052 ctx = device_get_sysctl_ctx(dev); 3053 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); 3054 3055 /* Create dev.hn.UNIT.rx sysctl tree */ 3056 sc->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rx", 3057 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 3058 3059 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 3060 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 3061 3062 rxr->hn_br = hyperv_dmamem_alloc(bus_get_dma_tag(dev), 3063 PAGE_SIZE, 0, HN_TXBR_SIZE + HN_RXBR_SIZE, 3064 &rxr->hn_br_dma, BUS_DMA_WAITOK); 3065 if (rxr->hn_br == NULL) { 3066 device_printf(dev, "allocate bufring failed\n"); 3067 return (ENOMEM); 3068 } 3069 3070 if (hn_trust_hosttcp) 3071 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_TCP; 3072 if (hn_trust_hostudp) 3073 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_UDP; 3074 if (hn_trust_hostip) 3075 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_IP; 3076 rxr->hn_ifp = sc->hn_ifp; 3077 if (i < sc->hn_tx_ring_cnt) 3078 rxr->hn_txr = &sc->hn_tx_ring[i]; 3079 rxr->hn_pktbuf_len = HN_PKTBUF_LEN_DEF; 3080 rxr->hn_pktbuf = malloc(rxr->hn_pktbuf_len, M_DEVBUF, M_WAITOK); 3081 rxr->hn_rx_idx = i; 3082 rxr->hn_rxbuf = sc->hn_rxbuf; 3083 3084 /* 3085 * Initialize LRO. 3086 */ 3087 #if defined(INET) || defined(INET6) 3088 #if __FreeBSD_version >= 1100095 3089 tcp_lro_init_args(&rxr->hn_lro, sc->hn_ifp, lroent_cnt, 3090 hn_lro_mbufq_depth); 3091 #else 3092 tcp_lro_init(&rxr->hn_lro); 3093 rxr->hn_lro.ifp = sc->hn_ifp; 3094 #endif 3095 #if __FreeBSD_version >= 1100099 3096 rxr->hn_lro.lro_length_lim = HN_LRO_LENLIM_DEF; 3097 rxr->hn_lro.lro_ackcnt_lim = HN_LRO_ACKCNT_DEF; 3098 #endif 3099 #endif /* INET || INET6 */ 3100 3101 if (sc->hn_rx_sysctl_tree != NULL) { 3102 char name[16]; 3103 3104 /* 3105 * Create per RX ring sysctl tree: 3106 * dev.hn.UNIT.rx.RINGID 3107 */ 3108 snprintf(name, sizeof(name), "%d", i); 3109 rxr->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, 3110 SYSCTL_CHILDREN(sc->hn_rx_sysctl_tree), 3111 OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 3112 3113 if (rxr->hn_rx_sysctl_tree != NULL) { 3114 SYSCTL_ADD_ULONG(ctx, 3115 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), 3116 OID_AUTO, "packets", CTLFLAG_RW, 3117 &rxr->hn_pkts, "# of packets received"); 3118 SYSCTL_ADD_ULONG(ctx, 3119 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), 3120 OID_AUTO, "rss_pkts", CTLFLAG_RW, 3121 &rxr->hn_rss_pkts, 3122 "# of packets w/ RSS info received"); 3123 SYSCTL_ADD_INT(ctx, 3124 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), 3125 OID_AUTO, "pktbuf_len", CTLFLAG_RD, 3126 &rxr->hn_pktbuf_len, 0, 3127 "Temporary channel packet buffer length"); 3128 } 3129 } 3130 } 3131 3132 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_queued", 3133 CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3134 __offsetof(struct hn_rx_ring, hn_lro.lro_queued), 3135 #if __FreeBSD_version < 1100095 3136 hn_rx_stat_int_sysctl, 3137 #else 3138 hn_rx_stat_u64_sysctl, 3139 #endif 3140 "LU", "LRO queued"); 3141 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_flushed", 3142 CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3143 __offsetof(struct hn_rx_ring, hn_lro.lro_flushed), 3144 #if __FreeBSD_version < 1100095 3145 hn_rx_stat_int_sysctl, 3146 #else 3147 hn_rx_stat_u64_sysctl, 3148 #endif 3149 "LU", "LRO flushed"); 3150 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_tried", 3151 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3152 __offsetof(struct hn_rx_ring, hn_lro_tried), 3153 hn_rx_stat_ulong_sysctl, "LU", "# of LRO tries"); 3154 #if __FreeBSD_version >= 1100099 3155 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_length_lim", 3156 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 3157 hn_lro_lenlim_sysctl, "IU", 3158 "Max # of data bytes to be aggregated by LRO"); 3159 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_ackcnt_lim", 3160 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 3161 hn_lro_ackcnt_sysctl, "I", 3162 "Max # of ACKs to be aggregated by LRO"); 3163 #endif 3164 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hosttcp", 3165 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_TCP, 3166 hn_trust_hcsum_sysctl, "I", 3167 "Trust tcp segement verification on host side, " 3168 "when csum info is missing"); 3169 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostudp", 3170 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_UDP, 3171 hn_trust_hcsum_sysctl, "I", 3172 "Trust udp datagram verification on host side, " 3173 "when csum info is missing"); 3174 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostip", 3175 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_IP, 3176 hn_trust_hcsum_sysctl, "I", 3177 "Trust ip packet verification on host side, " 3178 "when csum info is missing"); 3179 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_ip", 3180 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3181 __offsetof(struct hn_rx_ring, hn_csum_ip), 3182 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM IP"); 3183 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_tcp", 3184 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3185 __offsetof(struct hn_rx_ring, hn_csum_tcp), 3186 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM TCP"); 3187 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_udp", 3188 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3189 __offsetof(struct hn_rx_ring, hn_csum_udp), 3190 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM UDP"); 3191 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_trusted", 3192 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3193 __offsetof(struct hn_rx_ring, hn_csum_trusted), 3194 hn_rx_stat_ulong_sysctl, "LU", 3195 "# of packets that we trust host's csum verification"); 3196 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "small_pkts", 3197 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3198 __offsetof(struct hn_rx_ring, hn_small_pkts), 3199 hn_rx_stat_ulong_sysctl, "LU", "# of small packets received"); 3200 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_ack_failed", 3201 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3202 __offsetof(struct hn_rx_ring, hn_ack_failed), 3203 hn_rx_stat_ulong_sysctl, "LU", "# of RXBUF ack failures"); 3204 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_cnt", 3205 CTLFLAG_RD, &sc->hn_rx_ring_cnt, 0, "# created RX rings"); 3206 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_inuse", 3207 CTLFLAG_RD, &sc->hn_rx_ring_inuse, 0, "# used RX rings"); 3208 3209 return (0); 3210 } 3211 3212 static void 3213 hn_destroy_rx_data(struct hn_softc *sc) 3214 { 3215 int i; 3216 3217 if (sc->hn_rxbuf != NULL) { 3218 hyperv_dmamem_free(&sc->hn_rxbuf_dma, sc->hn_rxbuf); 3219 sc->hn_rxbuf = NULL; 3220 } 3221 3222 if (sc->hn_rx_ring_cnt == 0) 3223 return; 3224 3225 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 3226 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 3227 3228 if (rxr->hn_br == NULL) 3229 continue; 3230 hyperv_dmamem_free(&rxr->hn_br_dma, rxr->hn_br); 3231 rxr->hn_br = NULL; 3232 3233 #if defined(INET) || defined(INET6) 3234 tcp_lro_free(&rxr->hn_lro); 3235 #endif 3236 free(rxr->hn_pktbuf, M_DEVBUF); 3237 } 3238 free(sc->hn_rx_ring, M_DEVBUF); 3239 sc->hn_rx_ring = NULL; 3240 3241 sc->hn_rx_ring_cnt = 0; 3242 sc->hn_rx_ring_inuse = 0; 3243 } 3244 3245 static int 3246 hn_tx_ring_create(struct hn_softc *sc, int id) 3247 { 3248 struct hn_tx_ring *txr = &sc->hn_tx_ring[id]; 3249 device_t dev = sc->hn_dev; 3250 bus_dma_tag_t parent_dtag; 3251 int error, i; 3252 3253 txr->hn_sc = sc; 3254 txr->hn_tx_idx = id; 3255 3256 #ifndef HN_USE_TXDESC_BUFRING 3257 mtx_init(&txr->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN); 3258 #endif 3259 mtx_init(&txr->hn_tx_lock, "hn tx", NULL, MTX_DEF); 3260 3261 txr->hn_txdesc_cnt = HN_TX_DESC_CNT; 3262 txr->hn_txdesc = malloc(sizeof(struct hn_txdesc) * txr->hn_txdesc_cnt, 3263 M_DEVBUF, M_WAITOK | M_ZERO); 3264 #ifndef HN_USE_TXDESC_BUFRING 3265 SLIST_INIT(&txr->hn_txlist); 3266 #else 3267 txr->hn_txdesc_br = buf_ring_alloc(txr->hn_txdesc_cnt, M_DEVBUF, 3268 M_WAITOK, &txr->hn_tx_lock); 3269 #endif 3270 3271 txr->hn_tx_taskq = sc->hn_tx_taskq; 3272 3273 #ifdef HN_IFSTART_SUPPORT 3274 if (hn_use_if_start) { 3275 txr->hn_txeof = hn_start_txeof; 3276 TASK_INIT(&txr->hn_tx_task, 0, hn_start_taskfunc, txr); 3277 TASK_INIT(&txr->hn_txeof_task, 0, hn_start_txeof_taskfunc, txr); 3278 } else 3279 #endif 3280 { 3281 int br_depth; 3282 3283 txr->hn_txeof = hn_xmit_txeof; 3284 TASK_INIT(&txr->hn_tx_task, 0, hn_xmit_taskfunc, txr); 3285 TASK_INIT(&txr->hn_txeof_task, 0, hn_xmit_txeof_taskfunc, txr); 3286 3287 br_depth = hn_get_txswq_depth(txr); 3288 txr->hn_mbuf_br = buf_ring_alloc(br_depth, M_DEVBUF, 3289 M_WAITOK, &txr->hn_tx_lock); 3290 } 3291 3292 txr->hn_direct_tx_size = hn_direct_tx_size; 3293 3294 /* 3295 * Always schedule transmission instead of trying to do direct 3296 * transmission. This one gives the best performance so far. 3297 */ 3298 txr->hn_sched_tx = 1; 3299 3300 parent_dtag = bus_get_dma_tag(dev); 3301 3302 /* DMA tag for RNDIS packet messages. */ 3303 error = bus_dma_tag_create(parent_dtag, /* parent */ 3304 HN_RNDIS_PKT_ALIGN, /* alignment */ 3305 HN_RNDIS_PKT_BOUNDARY, /* boundary */ 3306 BUS_SPACE_MAXADDR, /* lowaddr */ 3307 BUS_SPACE_MAXADDR, /* highaddr */ 3308 NULL, NULL, /* filter, filterarg */ 3309 HN_RNDIS_PKT_LEN, /* maxsize */ 3310 1, /* nsegments */ 3311 HN_RNDIS_PKT_LEN, /* maxsegsize */ 3312 0, /* flags */ 3313 NULL, /* lockfunc */ 3314 NULL, /* lockfuncarg */ 3315 &txr->hn_tx_rndis_dtag); 3316 if (error) { 3317 device_printf(dev, "failed to create rndis dmatag\n"); 3318 return error; 3319 } 3320 3321 /* DMA tag for data. */ 3322 error = bus_dma_tag_create(parent_dtag, /* parent */ 3323 1, /* alignment */ 3324 HN_TX_DATA_BOUNDARY, /* boundary */ 3325 BUS_SPACE_MAXADDR, /* lowaddr */ 3326 BUS_SPACE_MAXADDR, /* highaddr */ 3327 NULL, NULL, /* filter, filterarg */ 3328 HN_TX_DATA_MAXSIZE, /* maxsize */ 3329 HN_TX_DATA_SEGCNT_MAX, /* nsegments */ 3330 HN_TX_DATA_SEGSIZE, /* maxsegsize */ 3331 0, /* flags */ 3332 NULL, /* lockfunc */ 3333 NULL, /* lockfuncarg */ 3334 &txr->hn_tx_data_dtag); 3335 if (error) { 3336 device_printf(dev, "failed to create data dmatag\n"); 3337 return error; 3338 } 3339 3340 for (i = 0; i < txr->hn_txdesc_cnt; ++i) { 3341 struct hn_txdesc *txd = &txr->hn_txdesc[i]; 3342 3343 txd->txr = txr; 3344 txd->chim_index = HN_NVS_CHIM_IDX_INVALID; 3345 STAILQ_INIT(&txd->agg_list); 3346 3347 /* 3348 * Allocate and load RNDIS packet message. 3349 */ 3350 error = bus_dmamem_alloc(txr->hn_tx_rndis_dtag, 3351 (void **)&txd->rndis_pkt, 3352 BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, 3353 &txd->rndis_pkt_dmap); 3354 if (error) { 3355 device_printf(dev, 3356 "failed to allocate rndis_packet_msg, %d\n", i); 3357 return error; 3358 } 3359 3360 error = bus_dmamap_load(txr->hn_tx_rndis_dtag, 3361 txd->rndis_pkt_dmap, 3362 txd->rndis_pkt, HN_RNDIS_PKT_LEN, 3363 hyperv_dma_map_paddr, &txd->rndis_pkt_paddr, 3364 BUS_DMA_NOWAIT); 3365 if (error) { 3366 device_printf(dev, 3367 "failed to load rndis_packet_msg, %d\n", i); 3368 bus_dmamem_free(txr->hn_tx_rndis_dtag, 3369 txd->rndis_pkt, txd->rndis_pkt_dmap); 3370 return error; 3371 } 3372 3373 /* DMA map for TX data. */ 3374 error = bus_dmamap_create(txr->hn_tx_data_dtag, 0, 3375 &txd->data_dmap); 3376 if (error) { 3377 device_printf(dev, 3378 "failed to allocate tx data dmamap\n"); 3379 bus_dmamap_unload(txr->hn_tx_rndis_dtag, 3380 txd->rndis_pkt_dmap); 3381 bus_dmamem_free(txr->hn_tx_rndis_dtag, 3382 txd->rndis_pkt, txd->rndis_pkt_dmap); 3383 return error; 3384 } 3385 3386 /* All set, put it to list */ 3387 txd->flags |= HN_TXD_FLAG_ONLIST; 3388 #ifndef HN_USE_TXDESC_BUFRING 3389 SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link); 3390 #else 3391 buf_ring_enqueue(txr->hn_txdesc_br, txd); 3392 #endif 3393 } 3394 txr->hn_txdesc_avail = txr->hn_txdesc_cnt; 3395 3396 if (sc->hn_tx_sysctl_tree != NULL) { 3397 struct sysctl_oid_list *child; 3398 struct sysctl_ctx_list *ctx; 3399 char name[16]; 3400 3401 /* 3402 * Create per TX ring sysctl tree: 3403 * dev.hn.UNIT.tx.RINGID 3404 */ 3405 ctx = device_get_sysctl_ctx(dev); 3406 child = SYSCTL_CHILDREN(sc->hn_tx_sysctl_tree); 3407 3408 snprintf(name, sizeof(name), "%d", id); 3409 txr->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, 3410 name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 3411 3412 if (txr->hn_tx_sysctl_tree != NULL) { 3413 child = SYSCTL_CHILDREN(txr->hn_tx_sysctl_tree); 3414 3415 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail", 3416 CTLFLAG_RD, &txr->hn_txdesc_avail, 0, 3417 "# of available TX descs"); 3418 #ifdef HN_IFSTART_SUPPORT 3419 if (!hn_use_if_start) 3420 #endif 3421 { 3422 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "oactive", 3423 CTLFLAG_RD, &txr->hn_oactive, 0, 3424 "over active"); 3425 } 3426 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "packets", 3427 CTLFLAG_RW, &txr->hn_pkts, 3428 "# of packets transmitted"); 3429 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "sends", 3430 CTLFLAG_RW, &txr->hn_sends, "# of sends"); 3431 } 3432 } 3433 3434 return 0; 3435 } 3436 3437 static void 3438 hn_txdesc_dmamap_destroy(struct hn_txdesc *txd) 3439 { 3440 struct hn_tx_ring *txr = txd->txr; 3441 3442 KASSERT(txd->m == NULL, ("still has mbuf installed")); 3443 KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("still dma mapped")); 3444 3445 bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_pkt_dmap); 3446 bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_pkt, 3447 txd->rndis_pkt_dmap); 3448 bus_dmamap_destroy(txr->hn_tx_data_dtag, txd->data_dmap); 3449 } 3450 3451 static void 3452 hn_tx_ring_destroy(struct hn_tx_ring *txr) 3453 { 3454 struct hn_txdesc *txd; 3455 3456 if (txr->hn_txdesc == NULL) 3457 return; 3458 3459 #ifndef HN_USE_TXDESC_BUFRING 3460 while ((txd = SLIST_FIRST(&txr->hn_txlist)) != NULL) { 3461 SLIST_REMOVE_HEAD(&txr->hn_txlist, link); 3462 hn_txdesc_dmamap_destroy(txd); 3463 } 3464 #else 3465 mtx_lock(&txr->hn_tx_lock); 3466 while ((txd = buf_ring_dequeue_sc(txr->hn_txdesc_br)) != NULL) 3467 hn_txdesc_dmamap_destroy(txd); 3468 mtx_unlock(&txr->hn_tx_lock); 3469 #endif 3470 3471 if (txr->hn_tx_data_dtag != NULL) 3472 bus_dma_tag_destroy(txr->hn_tx_data_dtag); 3473 if (txr->hn_tx_rndis_dtag != NULL) 3474 bus_dma_tag_destroy(txr->hn_tx_rndis_dtag); 3475 3476 #ifdef HN_USE_TXDESC_BUFRING 3477 buf_ring_free(txr->hn_txdesc_br, M_DEVBUF); 3478 #endif 3479 3480 free(txr->hn_txdesc, M_DEVBUF); 3481 txr->hn_txdesc = NULL; 3482 3483 if (txr->hn_mbuf_br != NULL) 3484 buf_ring_free(txr->hn_mbuf_br, M_DEVBUF); 3485 3486 #ifndef HN_USE_TXDESC_BUFRING 3487 mtx_destroy(&txr->hn_txlist_spin); 3488 #endif 3489 mtx_destroy(&txr->hn_tx_lock); 3490 } 3491 3492 static int 3493 hn_create_tx_data(struct hn_softc *sc, int ring_cnt) 3494 { 3495 struct sysctl_oid_list *child; 3496 struct sysctl_ctx_list *ctx; 3497 int i; 3498 3499 /* 3500 * Create TXBUF for chimney sending. 3501 * 3502 * NOTE: It is shared by all channels. 3503 */ 3504 sc->hn_chim = hyperv_dmamem_alloc(bus_get_dma_tag(sc->hn_dev), 3505 PAGE_SIZE, 0, HN_CHIM_SIZE, &sc->hn_chim_dma, 3506 BUS_DMA_WAITOK | BUS_DMA_ZERO); 3507 if (sc->hn_chim == NULL) { 3508 device_printf(sc->hn_dev, "allocate txbuf failed\n"); 3509 return (ENOMEM); 3510 } 3511 3512 sc->hn_tx_ring_cnt = ring_cnt; 3513 sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt; 3514 3515 sc->hn_tx_ring = malloc(sizeof(struct hn_tx_ring) * sc->hn_tx_ring_cnt, 3516 M_DEVBUF, M_WAITOK | M_ZERO); 3517 3518 ctx = device_get_sysctl_ctx(sc->hn_dev); 3519 child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->hn_dev)); 3520 3521 /* Create dev.hn.UNIT.tx sysctl tree */ 3522 sc->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "tx", 3523 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 3524 3525 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 3526 int error; 3527 3528 error = hn_tx_ring_create(sc, i); 3529 if (error) 3530 return error; 3531 } 3532 3533 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "no_txdescs", 3534 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3535 __offsetof(struct hn_tx_ring, hn_no_txdescs), 3536 hn_tx_stat_ulong_sysctl, "LU", "# of times short of TX descs"); 3537 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "send_failed", 3538 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3539 __offsetof(struct hn_tx_ring, hn_send_failed), 3540 hn_tx_stat_ulong_sysctl, "LU", "# of hyper-v sending failure"); 3541 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txdma_failed", 3542 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3543 __offsetof(struct hn_tx_ring, hn_txdma_failed), 3544 hn_tx_stat_ulong_sysctl, "LU", "# of TX DMA failure"); 3545 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_flush_failed", 3546 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3547 __offsetof(struct hn_tx_ring, hn_flush_failed), 3548 hn_tx_stat_ulong_sysctl, "LU", 3549 "# of packet transmission aggregation flush failure"); 3550 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_collapsed", 3551 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3552 __offsetof(struct hn_tx_ring, hn_tx_collapsed), 3553 hn_tx_stat_ulong_sysctl, "LU", "# of TX mbuf collapsed"); 3554 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney", 3555 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3556 __offsetof(struct hn_tx_ring, hn_tx_chimney), 3557 hn_tx_stat_ulong_sysctl, "LU", "# of chimney send"); 3558 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_tried", 3559 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3560 __offsetof(struct hn_tx_ring, hn_tx_chimney_tried), 3561 hn_tx_stat_ulong_sysctl, "LU", "# of chimney send tries"); 3562 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt", 3563 CTLFLAG_RD, &sc->hn_tx_ring[0].hn_txdesc_cnt, 0, 3564 "# of total TX descs"); 3565 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max", 3566 CTLFLAG_RD, &sc->hn_chim_szmax, 0, 3567 "Chimney send packet size upper boundary"); 3568 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size", 3569 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 3570 hn_chim_size_sysctl, "I", "Chimney send packet size limit"); 3571 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "direct_tx_size", 3572 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3573 __offsetof(struct hn_tx_ring, hn_direct_tx_size), 3574 hn_tx_conf_int_sysctl, "I", 3575 "Size of the packet for direct transmission"); 3576 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "sched_tx", 3577 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3578 __offsetof(struct hn_tx_ring, hn_sched_tx), 3579 hn_tx_conf_int_sysctl, "I", 3580 "Always schedule transmission " 3581 "instead of doing direct transmission"); 3582 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_cnt", 3583 CTLFLAG_RD, &sc->hn_tx_ring_cnt, 0, "# created TX rings"); 3584 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_inuse", 3585 CTLFLAG_RD, &sc->hn_tx_ring_inuse, 0, "# used TX rings"); 3586 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "agg_szmax", 3587 CTLFLAG_RD, &sc->hn_tx_ring[0].hn_agg_szmax, 0, 3588 "Applied packet transmission aggregation size"); 3589 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pktmax", 3590 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 3591 hn_txagg_pktmax_sysctl, "I", 3592 "Applied packet transmission aggregation packets"); 3593 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_align", 3594 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 3595 hn_txagg_align_sysctl, "I", 3596 "Applied packet transmission aggregation alignment"); 3597 3598 return 0; 3599 } 3600 3601 static void 3602 hn_set_chim_size(struct hn_softc *sc, int chim_size) 3603 { 3604 int i; 3605 3606 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 3607 sc->hn_tx_ring[i].hn_chim_size = chim_size; 3608 } 3609 3610 static void 3611 hn_set_tso_maxsize(struct hn_softc *sc, int tso_maxlen, int mtu) 3612 { 3613 struct ifnet *ifp = sc->hn_ifp; 3614 int tso_minlen; 3615 3616 if ((ifp->if_capabilities & (IFCAP_TSO4 | IFCAP_TSO6)) == 0) 3617 return; 3618 3619 KASSERT(sc->hn_ndis_tso_sgmin >= 2, 3620 ("invalid NDIS tso sgmin %d", sc->hn_ndis_tso_sgmin)); 3621 tso_minlen = sc->hn_ndis_tso_sgmin * mtu; 3622 3623 KASSERT(sc->hn_ndis_tso_szmax >= tso_minlen && 3624 sc->hn_ndis_tso_szmax <= IP_MAXPACKET, 3625 ("invalid NDIS tso szmax %d", sc->hn_ndis_tso_szmax)); 3626 3627 if (tso_maxlen < tso_minlen) 3628 tso_maxlen = tso_minlen; 3629 else if (tso_maxlen > IP_MAXPACKET) 3630 tso_maxlen = IP_MAXPACKET; 3631 if (tso_maxlen > sc->hn_ndis_tso_szmax) 3632 tso_maxlen = sc->hn_ndis_tso_szmax; 3633 ifp->if_hw_tsomax = tso_maxlen - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); 3634 if (bootverbose) 3635 if_printf(ifp, "TSO size max %u\n", ifp->if_hw_tsomax); 3636 } 3637 3638 static void 3639 hn_fixup_tx_data(struct hn_softc *sc) 3640 { 3641 uint64_t csum_assist; 3642 int i; 3643 3644 hn_set_chim_size(sc, sc->hn_chim_szmax); 3645 if (hn_tx_chimney_size > 0 && 3646 hn_tx_chimney_size < sc->hn_chim_szmax) 3647 hn_set_chim_size(sc, hn_tx_chimney_size); 3648 3649 csum_assist = 0; 3650 if (sc->hn_caps & HN_CAP_IPCS) 3651 csum_assist |= CSUM_IP; 3652 if (sc->hn_caps & HN_CAP_TCP4CS) 3653 csum_assist |= CSUM_IP_TCP; 3654 if (sc->hn_caps & HN_CAP_UDP4CS) 3655 csum_assist |= CSUM_IP_UDP; 3656 if (sc->hn_caps & HN_CAP_TCP6CS) 3657 csum_assist |= CSUM_IP6_TCP; 3658 if (sc->hn_caps & HN_CAP_UDP6CS) 3659 csum_assist |= CSUM_IP6_UDP; 3660 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 3661 sc->hn_tx_ring[i].hn_csum_assist = csum_assist; 3662 3663 if (sc->hn_caps & HN_CAP_HASHVAL) { 3664 /* 3665 * Support HASHVAL pktinfo on TX path. 3666 */ 3667 if (bootverbose) 3668 if_printf(sc->hn_ifp, "support HASHVAL pktinfo\n"); 3669 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 3670 sc->hn_tx_ring[i].hn_tx_flags |= HN_TX_FLAG_HASHVAL; 3671 } 3672 } 3673 3674 static void 3675 hn_destroy_tx_data(struct hn_softc *sc) 3676 { 3677 int i; 3678 3679 if (sc->hn_chim != NULL) { 3680 hyperv_dmamem_free(&sc->hn_chim_dma, sc->hn_chim); 3681 sc->hn_chim = NULL; 3682 } 3683 3684 if (sc->hn_tx_ring_cnt == 0) 3685 return; 3686 3687 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 3688 hn_tx_ring_destroy(&sc->hn_tx_ring[i]); 3689 3690 free(sc->hn_tx_ring, M_DEVBUF); 3691 sc->hn_tx_ring = NULL; 3692 3693 sc->hn_tx_ring_cnt = 0; 3694 sc->hn_tx_ring_inuse = 0; 3695 } 3696 3697 #ifdef HN_IFSTART_SUPPORT 3698 3699 static void 3700 hn_start_taskfunc(void *xtxr, int pending __unused) 3701 { 3702 struct hn_tx_ring *txr = xtxr; 3703 3704 mtx_lock(&txr->hn_tx_lock); 3705 hn_start_locked(txr, 0); 3706 mtx_unlock(&txr->hn_tx_lock); 3707 } 3708 3709 static int 3710 hn_start_locked(struct hn_tx_ring *txr, int len) 3711 { 3712 struct hn_softc *sc = txr->hn_sc; 3713 struct ifnet *ifp = sc->hn_ifp; 3714 int sched = 0; 3715 3716 KASSERT(hn_use_if_start, 3717 ("hn_start_locked is called, when if_start is disabled")); 3718 KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring")); 3719 mtx_assert(&txr->hn_tx_lock, MA_OWNED); 3720 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc")); 3721 3722 if (__predict_false(txr->hn_suspended)) 3723 return (0); 3724 3725 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != 3726 IFF_DRV_RUNNING) 3727 return (0); 3728 3729 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { 3730 struct hn_txdesc *txd; 3731 struct mbuf *m_head; 3732 int error; 3733 3734 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); 3735 if (m_head == NULL) 3736 break; 3737 3738 if (len > 0 && m_head->m_pkthdr.len > len) { 3739 /* 3740 * This sending could be time consuming; let callers 3741 * dispatch this packet sending (and sending of any 3742 * following up packets) to tx taskqueue. 3743 */ 3744 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 3745 sched = 1; 3746 break; 3747 } 3748 3749 #if defined(INET6) || defined(INET) 3750 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 3751 m_head = hn_tso_fixup(m_head); 3752 if (__predict_false(m_head == NULL)) { 3753 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 3754 continue; 3755 } 3756 } 3757 #endif 3758 3759 txd = hn_txdesc_get(txr); 3760 if (txd == NULL) { 3761 txr->hn_no_txdescs++; 3762 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 3763 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 3764 break; 3765 } 3766 3767 error = hn_encap(ifp, txr, txd, &m_head); 3768 if (error) { 3769 /* Both txd and m_head are freed */ 3770 KASSERT(txr->hn_agg_txd == NULL, 3771 ("encap failed w/ pending aggregating txdesc")); 3772 continue; 3773 } 3774 3775 if (txr->hn_agg_pktleft == 0) { 3776 if (txr->hn_agg_txd != NULL) { 3777 KASSERT(m_head == NULL, 3778 ("pending mbuf for aggregating txdesc")); 3779 error = hn_flush_txagg(ifp, txr); 3780 if (__predict_false(error)) { 3781 atomic_set_int(&ifp->if_drv_flags, 3782 IFF_DRV_OACTIVE); 3783 break; 3784 } 3785 } else { 3786 KASSERT(m_head != NULL, ("mbuf was freed")); 3787 error = hn_txpkt(ifp, txr, txd); 3788 if (__predict_false(error)) { 3789 /* txd is freed, but m_head is not */ 3790 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 3791 atomic_set_int(&ifp->if_drv_flags, 3792 IFF_DRV_OACTIVE); 3793 break; 3794 } 3795 } 3796 } 3797 #ifdef INVARIANTS 3798 else { 3799 KASSERT(txr->hn_agg_txd != NULL, 3800 ("no aggregating txdesc")); 3801 KASSERT(m_head == NULL, 3802 ("pending mbuf for aggregating txdesc")); 3803 } 3804 #endif 3805 } 3806 3807 /* Flush pending aggerated transmission. */ 3808 if (txr->hn_agg_txd != NULL) 3809 hn_flush_txagg(ifp, txr); 3810 return (sched); 3811 } 3812 3813 static void 3814 hn_start(struct ifnet *ifp) 3815 { 3816 struct hn_softc *sc = ifp->if_softc; 3817 struct hn_tx_ring *txr = &sc->hn_tx_ring[0]; 3818 3819 if (txr->hn_sched_tx) 3820 goto do_sched; 3821 3822 if (mtx_trylock(&txr->hn_tx_lock)) { 3823 int sched; 3824 3825 sched = hn_start_locked(txr, txr->hn_direct_tx_size); 3826 mtx_unlock(&txr->hn_tx_lock); 3827 if (!sched) 3828 return; 3829 } 3830 do_sched: 3831 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); 3832 } 3833 3834 static void 3835 hn_start_txeof_taskfunc(void *xtxr, int pending __unused) 3836 { 3837 struct hn_tx_ring *txr = xtxr; 3838 3839 mtx_lock(&txr->hn_tx_lock); 3840 atomic_clear_int(&txr->hn_sc->hn_ifp->if_drv_flags, IFF_DRV_OACTIVE); 3841 hn_start_locked(txr, 0); 3842 mtx_unlock(&txr->hn_tx_lock); 3843 } 3844 3845 static void 3846 hn_start_txeof(struct hn_tx_ring *txr) 3847 { 3848 struct hn_softc *sc = txr->hn_sc; 3849 struct ifnet *ifp = sc->hn_ifp; 3850 3851 KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring")); 3852 3853 if (txr->hn_sched_tx) 3854 goto do_sched; 3855 3856 if (mtx_trylock(&txr->hn_tx_lock)) { 3857 int sched; 3858 3859 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 3860 sched = hn_start_locked(txr, txr->hn_direct_tx_size); 3861 mtx_unlock(&txr->hn_tx_lock); 3862 if (sched) { 3863 taskqueue_enqueue(txr->hn_tx_taskq, 3864 &txr->hn_tx_task); 3865 } 3866 } else { 3867 do_sched: 3868 /* 3869 * Release the OACTIVE earlier, with the hope, that 3870 * others could catch up. The task will clear the 3871 * flag again with the hn_tx_lock to avoid possible 3872 * races. 3873 */ 3874 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 3875 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); 3876 } 3877 } 3878 3879 #endif /* HN_IFSTART_SUPPORT */ 3880 3881 static int 3882 hn_xmit(struct hn_tx_ring *txr, int len) 3883 { 3884 struct hn_softc *sc = txr->hn_sc; 3885 struct ifnet *ifp = sc->hn_ifp; 3886 struct mbuf *m_head; 3887 int sched = 0; 3888 3889 mtx_assert(&txr->hn_tx_lock, MA_OWNED); 3890 #ifdef HN_IFSTART_SUPPORT 3891 KASSERT(hn_use_if_start == 0, 3892 ("hn_xmit is called, when if_start is enabled")); 3893 #endif 3894 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc")); 3895 3896 if (__predict_false(txr->hn_suspended)) 3897 return (0); 3898 3899 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || txr->hn_oactive) 3900 return (0); 3901 3902 while ((m_head = drbr_peek(ifp, txr->hn_mbuf_br)) != NULL) { 3903 struct hn_txdesc *txd; 3904 int error; 3905 3906 if (len > 0 && m_head->m_pkthdr.len > len) { 3907 /* 3908 * This sending could be time consuming; let callers 3909 * dispatch this packet sending (and sending of any 3910 * following up packets) to tx taskqueue. 3911 */ 3912 drbr_putback(ifp, txr->hn_mbuf_br, m_head); 3913 sched = 1; 3914 break; 3915 } 3916 3917 txd = hn_txdesc_get(txr); 3918 if (txd == NULL) { 3919 txr->hn_no_txdescs++; 3920 drbr_putback(ifp, txr->hn_mbuf_br, m_head); 3921 txr->hn_oactive = 1; 3922 break; 3923 } 3924 3925 error = hn_encap(ifp, txr, txd, &m_head); 3926 if (error) { 3927 /* Both txd and m_head are freed; discard */ 3928 KASSERT(txr->hn_agg_txd == NULL, 3929 ("encap failed w/ pending aggregating txdesc")); 3930 drbr_advance(ifp, txr->hn_mbuf_br); 3931 continue; 3932 } 3933 3934 if (txr->hn_agg_pktleft == 0) { 3935 if (txr->hn_agg_txd != NULL) { 3936 KASSERT(m_head == NULL, 3937 ("pending mbuf for aggregating txdesc")); 3938 error = hn_flush_txagg(ifp, txr); 3939 if (__predict_false(error)) { 3940 txr->hn_oactive = 1; 3941 break; 3942 } 3943 } else { 3944 KASSERT(m_head != NULL, ("mbuf was freed")); 3945 error = hn_txpkt(ifp, txr, txd); 3946 if (__predict_false(error)) { 3947 /* txd is freed, but m_head is not */ 3948 drbr_putback(ifp, txr->hn_mbuf_br, 3949 m_head); 3950 txr->hn_oactive = 1; 3951 break; 3952 } 3953 } 3954 } 3955 #ifdef INVARIANTS 3956 else { 3957 KASSERT(txr->hn_agg_txd != NULL, 3958 ("no aggregating txdesc")); 3959 KASSERT(m_head == NULL, 3960 ("pending mbuf for aggregating txdesc")); 3961 } 3962 #endif 3963 3964 /* Sent */ 3965 drbr_advance(ifp, txr->hn_mbuf_br); 3966 } 3967 3968 /* Flush pending aggerated transmission. */ 3969 if (txr->hn_agg_txd != NULL) 3970 hn_flush_txagg(ifp, txr); 3971 return (sched); 3972 } 3973 3974 static int 3975 hn_transmit(struct ifnet *ifp, struct mbuf *m) 3976 { 3977 struct hn_softc *sc = ifp->if_softc; 3978 struct hn_tx_ring *txr; 3979 int error, idx = 0; 3980 3981 #if defined(INET6) || defined(INET) 3982 /* 3983 * Perform TSO packet header fixup now, since the TSO 3984 * packet header should be cache-hot. 3985 */ 3986 if (m->m_pkthdr.csum_flags & CSUM_TSO) { 3987 m = hn_tso_fixup(m); 3988 if (__predict_false(m == NULL)) { 3989 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 3990 return EIO; 3991 } 3992 } 3993 #endif 3994 3995 /* 3996 * Select the TX ring based on flowid 3997 */ 3998 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) 3999 idx = m->m_pkthdr.flowid % sc->hn_tx_ring_inuse; 4000 txr = &sc->hn_tx_ring[idx]; 4001 4002 error = drbr_enqueue(ifp, txr->hn_mbuf_br, m); 4003 if (error) { 4004 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); 4005 return error; 4006 } 4007 4008 if (txr->hn_oactive) 4009 return 0; 4010 4011 if (txr->hn_sched_tx) 4012 goto do_sched; 4013 4014 if (mtx_trylock(&txr->hn_tx_lock)) { 4015 int sched; 4016 4017 sched = hn_xmit(txr, txr->hn_direct_tx_size); 4018 mtx_unlock(&txr->hn_tx_lock); 4019 if (!sched) 4020 return 0; 4021 } 4022 do_sched: 4023 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); 4024 return 0; 4025 } 4026 4027 static void 4028 hn_tx_ring_qflush(struct hn_tx_ring *txr) 4029 { 4030 struct mbuf *m; 4031 4032 mtx_lock(&txr->hn_tx_lock); 4033 while ((m = buf_ring_dequeue_sc(txr->hn_mbuf_br)) != NULL) 4034 m_freem(m); 4035 mtx_unlock(&txr->hn_tx_lock); 4036 } 4037 4038 static void 4039 hn_xmit_qflush(struct ifnet *ifp) 4040 { 4041 struct hn_softc *sc = ifp->if_softc; 4042 int i; 4043 4044 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) 4045 hn_tx_ring_qflush(&sc->hn_tx_ring[i]); 4046 if_qflush(ifp); 4047 } 4048 4049 static void 4050 hn_xmit_txeof(struct hn_tx_ring *txr) 4051 { 4052 4053 if (txr->hn_sched_tx) 4054 goto do_sched; 4055 4056 if (mtx_trylock(&txr->hn_tx_lock)) { 4057 int sched; 4058 4059 txr->hn_oactive = 0; 4060 sched = hn_xmit(txr, txr->hn_direct_tx_size); 4061 mtx_unlock(&txr->hn_tx_lock); 4062 if (sched) { 4063 taskqueue_enqueue(txr->hn_tx_taskq, 4064 &txr->hn_tx_task); 4065 } 4066 } else { 4067 do_sched: 4068 /* 4069 * Release the oactive earlier, with the hope, that 4070 * others could catch up. The task will clear the 4071 * oactive again with the hn_tx_lock to avoid possible 4072 * races. 4073 */ 4074 txr->hn_oactive = 0; 4075 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); 4076 } 4077 } 4078 4079 static void 4080 hn_xmit_taskfunc(void *xtxr, int pending __unused) 4081 { 4082 struct hn_tx_ring *txr = xtxr; 4083 4084 mtx_lock(&txr->hn_tx_lock); 4085 hn_xmit(txr, 0); 4086 mtx_unlock(&txr->hn_tx_lock); 4087 } 4088 4089 static void 4090 hn_xmit_txeof_taskfunc(void *xtxr, int pending __unused) 4091 { 4092 struct hn_tx_ring *txr = xtxr; 4093 4094 mtx_lock(&txr->hn_tx_lock); 4095 txr->hn_oactive = 0; 4096 hn_xmit(txr, 0); 4097 mtx_unlock(&txr->hn_tx_lock); 4098 } 4099 4100 static int 4101 hn_chan_attach(struct hn_softc *sc, struct vmbus_channel *chan) 4102 { 4103 struct vmbus_chan_br cbr; 4104 struct hn_rx_ring *rxr; 4105 struct hn_tx_ring *txr = NULL; 4106 int idx, error; 4107 4108 idx = vmbus_chan_subidx(chan); 4109 4110 /* 4111 * Link this channel to RX/TX ring. 4112 */ 4113 KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse, 4114 ("invalid channel index %d, should > 0 && < %d", 4115 idx, sc->hn_rx_ring_inuse)); 4116 rxr = &sc->hn_rx_ring[idx]; 4117 KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED) == 0, 4118 ("RX ring %d already attached", idx)); 4119 rxr->hn_rx_flags |= HN_RX_FLAG_ATTACHED; 4120 4121 if (bootverbose) { 4122 if_printf(sc->hn_ifp, "link RX ring %d to chan%u\n", 4123 idx, vmbus_chan_id(chan)); 4124 } 4125 4126 if (idx < sc->hn_tx_ring_inuse) { 4127 txr = &sc->hn_tx_ring[idx]; 4128 KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED) == 0, 4129 ("TX ring %d already attached", idx)); 4130 txr->hn_tx_flags |= HN_TX_FLAG_ATTACHED; 4131 4132 txr->hn_chan = chan; 4133 if (bootverbose) { 4134 if_printf(sc->hn_ifp, "link TX ring %d to chan%u\n", 4135 idx, vmbus_chan_id(chan)); 4136 } 4137 } 4138 4139 /* Bind this channel to a proper CPU. */ 4140 vmbus_chan_cpu_set(chan, (sc->hn_cpu + idx) % mp_ncpus); 4141 4142 /* 4143 * Open this channel 4144 */ 4145 cbr.cbr = rxr->hn_br; 4146 cbr.cbr_paddr = rxr->hn_br_dma.hv_paddr; 4147 cbr.cbr_txsz = HN_TXBR_SIZE; 4148 cbr.cbr_rxsz = HN_RXBR_SIZE; 4149 error = vmbus_chan_open_br(chan, &cbr, NULL, 0, hn_chan_callback, rxr); 4150 if (error) { 4151 if_printf(sc->hn_ifp, "open chan%u failed: %d\n", 4152 vmbus_chan_id(chan), error); 4153 rxr->hn_rx_flags &= ~HN_RX_FLAG_ATTACHED; 4154 if (txr != NULL) 4155 txr->hn_tx_flags &= ~HN_TX_FLAG_ATTACHED; 4156 } 4157 return (error); 4158 } 4159 4160 static void 4161 hn_chan_detach(struct hn_softc *sc, struct vmbus_channel *chan) 4162 { 4163 struct hn_rx_ring *rxr; 4164 int idx; 4165 4166 idx = vmbus_chan_subidx(chan); 4167 4168 /* 4169 * Link this channel to RX/TX ring. 4170 */ 4171 KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse, 4172 ("invalid channel index %d, should > 0 && < %d", 4173 idx, sc->hn_rx_ring_inuse)); 4174 rxr = &sc->hn_rx_ring[idx]; 4175 KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED), 4176 ("RX ring %d is not attached", idx)); 4177 rxr->hn_rx_flags &= ~HN_RX_FLAG_ATTACHED; 4178 4179 if (idx < sc->hn_tx_ring_inuse) { 4180 struct hn_tx_ring *txr = &sc->hn_tx_ring[idx]; 4181 4182 KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED), 4183 ("TX ring %d is not attached attached", idx)); 4184 txr->hn_tx_flags &= ~HN_TX_FLAG_ATTACHED; 4185 } 4186 4187 /* 4188 * Close this channel. 4189 * 4190 * NOTE: 4191 * Channel closing does _not_ destroy the target channel. 4192 */ 4193 vmbus_chan_close(chan); 4194 } 4195 4196 static int 4197 hn_attach_subchans(struct hn_softc *sc) 4198 { 4199 struct vmbus_channel **subchans; 4200 int subchan_cnt = sc->hn_rx_ring_inuse - 1; 4201 int i, error = 0; 4202 4203 if (subchan_cnt == 0) 4204 return (0); 4205 4206 /* Attach the sub-channels. */ 4207 subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt); 4208 for (i = 0; i < subchan_cnt; ++i) { 4209 error = hn_chan_attach(sc, subchans[i]); 4210 if (error) 4211 break; 4212 } 4213 vmbus_subchan_rel(subchans, subchan_cnt); 4214 4215 if (error) { 4216 if_printf(sc->hn_ifp, "sub-channels attach failed: %d\n", error); 4217 } else { 4218 if (bootverbose) { 4219 if_printf(sc->hn_ifp, "%d sub-channels attached\n", 4220 subchan_cnt); 4221 } 4222 } 4223 return (error); 4224 } 4225 4226 static void 4227 hn_detach_allchans(struct hn_softc *sc) 4228 { 4229 struct vmbus_channel **subchans; 4230 int subchan_cnt = sc->hn_rx_ring_inuse - 1; 4231 int i; 4232 4233 if (subchan_cnt == 0) 4234 goto back; 4235 4236 /* Detach the sub-channels. */ 4237 subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt); 4238 for (i = 0; i < subchan_cnt; ++i) 4239 hn_chan_detach(sc, subchans[i]); 4240 vmbus_subchan_rel(subchans, subchan_cnt); 4241 4242 back: 4243 /* 4244 * Detach the primary channel, _after_ all sub-channels 4245 * are detached. 4246 */ 4247 hn_chan_detach(sc, sc->hn_prichan); 4248 4249 /* Wait for sub-channels to be destroyed, if any. */ 4250 vmbus_subchan_drain(sc->hn_prichan); 4251 4252 #ifdef INVARIANTS 4253 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 4254 KASSERT((sc->hn_rx_ring[i].hn_rx_flags & 4255 HN_RX_FLAG_ATTACHED) == 0, 4256 ("%dth RX ring is still attached", i)); 4257 } 4258 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 4259 KASSERT((sc->hn_tx_ring[i].hn_tx_flags & 4260 HN_TX_FLAG_ATTACHED) == 0, 4261 ("%dth TX ring is still attached", i)); 4262 } 4263 #endif 4264 } 4265 4266 static int 4267 hn_synth_alloc_subchans(struct hn_softc *sc, int *nsubch) 4268 { 4269 struct vmbus_channel **subchans; 4270 int nchan, rxr_cnt, error; 4271 4272 nchan = *nsubch + 1; 4273 if (nchan == 1) { 4274 /* 4275 * Multiple RX/TX rings are not requested. 4276 */ 4277 *nsubch = 0; 4278 return (0); 4279 } 4280 4281 /* 4282 * Query RSS capabilities, e.g. # of RX rings, and # of indirect 4283 * table entries. 4284 */ 4285 error = hn_rndis_query_rsscaps(sc, &rxr_cnt); 4286 if (error) { 4287 /* No RSS; this is benign. */ 4288 *nsubch = 0; 4289 return (0); 4290 } 4291 if (bootverbose) { 4292 if_printf(sc->hn_ifp, "RX rings offered %u, requested %d\n", 4293 rxr_cnt, nchan); 4294 } 4295 4296 if (nchan > rxr_cnt) 4297 nchan = rxr_cnt; 4298 if (nchan == 1) { 4299 if_printf(sc->hn_ifp, "only 1 channel is supported, no vRSS\n"); 4300 *nsubch = 0; 4301 return (0); 4302 } 4303 4304 /* 4305 * Allocate sub-channels from NVS. 4306 */ 4307 *nsubch = nchan - 1; 4308 error = hn_nvs_alloc_subchans(sc, nsubch); 4309 if (error || *nsubch == 0) { 4310 /* Failed to allocate sub-channels. */ 4311 *nsubch = 0; 4312 return (0); 4313 } 4314 4315 /* 4316 * Wait for all sub-channels to become ready before moving on. 4317 */ 4318 subchans = vmbus_subchan_get(sc->hn_prichan, *nsubch); 4319 vmbus_subchan_rel(subchans, *nsubch); 4320 return (0); 4321 } 4322 4323 static int 4324 hn_synth_attach(struct hn_softc *sc, int mtu) 4325 { 4326 struct ndis_rssprm_toeplitz *rss = &sc->hn_rss; 4327 int error, nsubch, nchan, i; 4328 uint32_t old_caps; 4329 4330 KASSERT((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0, 4331 ("synthetic parts were attached")); 4332 4333 /* Save capabilities for later verification. */ 4334 old_caps = sc->hn_caps; 4335 sc->hn_caps = 0; 4336 4337 /* Clear RSS stuffs. */ 4338 sc->hn_rss_ind_size = 0; 4339 sc->hn_rss_hash = 0; 4340 4341 /* 4342 * Attach the primary channel _before_ attaching NVS and RNDIS. 4343 */ 4344 error = hn_chan_attach(sc, sc->hn_prichan); 4345 if (error) 4346 return (error); 4347 4348 /* 4349 * Attach NVS. 4350 */ 4351 error = hn_nvs_attach(sc, mtu); 4352 if (error) 4353 return (error); 4354 4355 /* 4356 * Attach RNDIS _after_ NVS is attached. 4357 */ 4358 error = hn_rndis_attach(sc, mtu); 4359 if (error) 4360 return (error); 4361 4362 /* 4363 * Make sure capabilities are not changed. 4364 */ 4365 if (device_is_attached(sc->hn_dev) && old_caps != sc->hn_caps) { 4366 if_printf(sc->hn_ifp, "caps mismatch old 0x%08x, new 0x%08x\n", 4367 old_caps, sc->hn_caps); 4368 /* Restore old capabilities and abort. */ 4369 sc->hn_caps = old_caps; 4370 return ENXIO; 4371 } 4372 4373 /* 4374 * Allocate sub-channels for multi-TX/RX rings. 4375 * 4376 * NOTE: 4377 * The # of RX rings that can be used is equivalent to the # of 4378 * channels to be requested. 4379 */ 4380 nsubch = sc->hn_rx_ring_cnt - 1; 4381 error = hn_synth_alloc_subchans(sc, &nsubch); 4382 if (error) 4383 return (error); 4384 4385 nchan = nsubch + 1; 4386 if (nchan == 1) { 4387 /* Only the primary channel can be used; done */ 4388 goto back; 4389 } 4390 4391 /* 4392 * Configure RSS key and indirect table _after_ all sub-channels 4393 * are allocated. 4394 */ 4395 4396 if ((sc->hn_flags & HN_FLAG_HAS_RSSKEY) == 0) { 4397 /* 4398 * RSS key is not set yet; set it to the default RSS key. 4399 */ 4400 if (bootverbose) 4401 if_printf(sc->hn_ifp, "setup default RSS key\n"); 4402 memcpy(rss->rss_key, hn_rss_key_default, sizeof(rss->rss_key)); 4403 sc->hn_flags |= HN_FLAG_HAS_RSSKEY; 4404 } 4405 4406 if ((sc->hn_flags & HN_FLAG_HAS_RSSIND) == 0) { 4407 /* 4408 * RSS indirect table is not set yet; set it up in round- 4409 * robin fashion. 4410 */ 4411 if (bootverbose) { 4412 if_printf(sc->hn_ifp, "setup default RSS indirect " 4413 "table\n"); 4414 } 4415 for (i = 0; i < NDIS_HASH_INDCNT; ++i) 4416 rss->rss_ind[i] = i % nchan; 4417 sc->hn_flags |= HN_FLAG_HAS_RSSIND; 4418 } else { 4419 /* 4420 * # of usable channels may be changed, so we have to 4421 * make sure that all entries in RSS indirect table 4422 * are valid. 4423 */ 4424 hn_rss_ind_fixup(sc, nchan); 4425 } 4426 4427 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE); 4428 if (error) { 4429 /* 4430 * Failed to configure RSS key or indirect table; only 4431 * the primary channel can be used. 4432 */ 4433 nchan = 1; 4434 } 4435 back: 4436 /* 4437 * Set the # of TX/RX rings that could be used according to 4438 * the # of channels that NVS offered. 4439 */ 4440 hn_set_ring_inuse(sc, nchan); 4441 4442 /* 4443 * Attach the sub-channels, if any. 4444 */ 4445 error = hn_attach_subchans(sc); 4446 if (error) 4447 return (error); 4448 4449 /* 4450 * Fixup transmission aggregation setup. 4451 */ 4452 hn_set_txagg(sc); 4453 4454 sc->hn_flags |= HN_FLAG_SYNTH_ATTACHED; 4455 return (0); 4456 } 4457 4458 /* 4459 * NOTE: 4460 * The interface must have been suspended though hn_suspend(), before 4461 * this function get called. 4462 */ 4463 static void 4464 hn_synth_detach(struct hn_softc *sc) 4465 { 4466 HN_LOCK_ASSERT(sc); 4467 4468 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, 4469 ("synthetic parts were not attached")); 4470 4471 /* Detach the RNDIS first. */ 4472 hn_rndis_detach(sc); 4473 4474 /* Detach NVS. */ 4475 hn_nvs_detach(sc); 4476 4477 /* Detach all of the channels. */ 4478 hn_detach_allchans(sc); 4479 4480 sc->hn_flags &= ~HN_FLAG_SYNTH_ATTACHED; 4481 } 4482 4483 static void 4484 hn_set_ring_inuse(struct hn_softc *sc, int ring_cnt) 4485 { 4486 KASSERT(ring_cnt > 0 && ring_cnt <= sc->hn_rx_ring_cnt, 4487 ("invalid ring count %d", ring_cnt)); 4488 4489 if (sc->hn_tx_ring_cnt > ring_cnt) 4490 sc->hn_tx_ring_inuse = ring_cnt; 4491 else 4492 sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt; 4493 sc->hn_rx_ring_inuse = ring_cnt; 4494 4495 if (bootverbose) { 4496 if_printf(sc->hn_ifp, "%d TX ring, %d RX ring\n", 4497 sc->hn_tx_ring_inuse, sc->hn_rx_ring_inuse); 4498 } 4499 } 4500 4501 static void 4502 hn_chan_drain(struct vmbus_channel *chan) 4503 { 4504 4505 while (!vmbus_chan_rx_empty(chan) || !vmbus_chan_tx_empty(chan)) 4506 pause("waitch", 1); 4507 vmbus_chan_intr_drain(chan); 4508 } 4509 4510 static void 4511 hn_suspend_data(struct hn_softc *sc) 4512 { 4513 struct vmbus_channel **subch = NULL; 4514 int i, nsubch; 4515 4516 HN_LOCK_ASSERT(sc); 4517 4518 /* 4519 * Suspend TX. 4520 */ 4521 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { 4522 struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; 4523 4524 mtx_lock(&txr->hn_tx_lock); 4525 txr->hn_suspended = 1; 4526 mtx_unlock(&txr->hn_tx_lock); 4527 /* No one is able send more packets now. */ 4528 4529 /* Wait for all pending sends to finish. */ 4530 while (hn_tx_ring_pending(txr)) 4531 pause("hnwtx", 1 /* 1 tick */); 4532 4533 taskqueue_drain(txr->hn_tx_taskq, &txr->hn_tx_task); 4534 taskqueue_drain(txr->hn_tx_taskq, &txr->hn_txeof_task); 4535 } 4536 4537 /* 4538 * Disable RX by clearing RX filter. 4539 */ 4540 sc->hn_rx_filter = NDIS_PACKET_TYPE_NONE; 4541 hn_rndis_set_rxfilter(sc, sc->hn_rx_filter); 4542 4543 /* 4544 * Give RNDIS enough time to flush all pending data packets. 4545 */ 4546 pause("waitrx", (200 * hz) / 1000); 4547 4548 /* 4549 * Drain RX/TX bufrings and interrupts. 4550 */ 4551 nsubch = sc->hn_rx_ring_inuse - 1; 4552 if (nsubch > 0) 4553 subch = vmbus_subchan_get(sc->hn_prichan, nsubch); 4554 4555 if (subch != NULL) { 4556 for (i = 0; i < nsubch; ++i) 4557 hn_chan_drain(subch[i]); 4558 } 4559 hn_chan_drain(sc->hn_prichan); 4560 4561 if (subch != NULL) 4562 vmbus_subchan_rel(subch, nsubch); 4563 } 4564 4565 static void 4566 hn_suspend_mgmt_taskfunc(void *xsc, int pending __unused) 4567 { 4568 4569 ((struct hn_softc *)xsc)->hn_mgmt_taskq = NULL; 4570 } 4571 4572 static void 4573 hn_suspend_mgmt(struct hn_softc *sc) 4574 { 4575 struct task task; 4576 4577 HN_LOCK_ASSERT(sc); 4578 4579 /* 4580 * Make sure that hn_mgmt_taskq0 can nolonger be accessed 4581 * through hn_mgmt_taskq. 4582 */ 4583 TASK_INIT(&task, 0, hn_suspend_mgmt_taskfunc, sc); 4584 vmbus_chan_run_task(sc->hn_prichan, &task); 4585 4586 /* 4587 * Make sure that all pending management tasks are completed. 4588 */ 4589 taskqueue_drain(sc->hn_mgmt_taskq0, &sc->hn_netchg_init); 4590 taskqueue_drain_timeout(sc->hn_mgmt_taskq0, &sc->hn_netchg_status); 4591 taskqueue_drain_all(sc->hn_mgmt_taskq0); 4592 } 4593 4594 static void 4595 hn_suspend(struct hn_softc *sc) 4596 { 4597 4598 if (sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) 4599 hn_suspend_data(sc); 4600 hn_suspend_mgmt(sc); 4601 } 4602 4603 static void 4604 hn_resume_tx(struct hn_softc *sc, int tx_ring_cnt) 4605 { 4606 int i; 4607 4608 KASSERT(tx_ring_cnt <= sc->hn_tx_ring_cnt, 4609 ("invalid TX ring count %d", tx_ring_cnt)); 4610 4611 for (i = 0; i < tx_ring_cnt; ++i) { 4612 struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; 4613 4614 mtx_lock(&txr->hn_tx_lock); 4615 txr->hn_suspended = 0; 4616 mtx_unlock(&txr->hn_tx_lock); 4617 } 4618 } 4619 4620 static void 4621 hn_resume_data(struct hn_softc *sc) 4622 { 4623 int i; 4624 4625 HN_LOCK_ASSERT(sc); 4626 4627 /* 4628 * Re-enable RX. 4629 */ 4630 hn_set_rxfilter(sc); 4631 4632 /* 4633 * Make sure to clear suspend status on "all" TX rings, 4634 * since hn_tx_ring_inuse can be changed after 4635 * hn_suspend_data(). 4636 */ 4637 hn_resume_tx(sc, sc->hn_tx_ring_cnt); 4638 4639 #ifdef HN_IFSTART_SUPPORT 4640 if (!hn_use_if_start) 4641 #endif 4642 { 4643 /* 4644 * Flush unused drbrs, since hn_tx_ring_inuse may be 4645 * reduced. 4646 */ 4647 for (i = sc->hn_tx_ring_inuse; i < sc->hn_tx_ring_cnt; ++i) 4648 hn_tx_ring_qflush(&sc->hn_tx_ring[i]); 4649 } 4650 4651 /* 4652 * Kick start TX. 4653 */ 4654 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { 4655 struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; 4656 4657 /* 4658 * Use txeof task, so that any pending oactive can be 4659 * cleared properly. 4660 */ 4661 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); 4662 } 4663 } 4664 4665 static void 4666 hn_resume_mgmt(struct hn_softc *sc) 4667 { 4668 4669 sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0; 4670 4671 /* 4672 * Kick off network change detection, if it was pending. 4673 * If no network change was pending, start link status 4674 * checks, which is more lightweight than network change 4675 * detection. 4676 */ 4677 if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG) 4678 hn_change_network(sc); 4679 else 4680 hn_update_link_status(sc); 4681 } 4682 4683 static void 4684 hn_resume(struct hn_softc *sc) 4685 { 4686 4687 if (sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) 4688 hn_resume_data(sc); 4689 hn_resume_mgmt(sc); 4690 } 4691 4692 static void 4693 hn_rndis_rx_status(struct hn_softc *sc, const void *data, int dlen) 4694 { 4695 const struct rndis_status_msg *msg; 4696 int ofs; 4697 4698 if (dlen < sizeof(*msg)) { 4699 if_printf(sc->hn_ifp, "invalid RNDIS status\n"); 4700 return; 4701 } 4702 msg = data; 4703 4704 switch (msg->rm_status) { 4705 case RNDIS_STATUS_MEDIA_CONNECT: 4706 case RNDIS_STATUS_MEDIA_DISCONNECT: 4707 hn_update_link_status(sc); 4708 break; 4709 4710 case RNDIS_STATUS_TASK_OFFLOAD_CURRENT_CONFIG: 4711 /* Not really useful; ignore. */ 4712 break; 4713 4714 case RNDIS_STATUS_NETWORK_CHANGE: 4715 ofs = RNDIS_STBUFOFFSET_ABS(msg->rm_stbufoffset); 4716 if (dlen < ofs + msg->rm_stbuflen || 4717 msg->rm_stbuflen < sizeof(uint32_t)) { 4718 if_printf(sc->hn_ifp, "network changed\n"); 4719 } else { 4720 uint32_t change; 4721 4722 memcpy(&change, ((const uint8_t *)msg) + ofs, 4723 sizeof(change)); 4724 if_printf(sc->hn_ifp, "network changed, change %u\n", 4725 change); 4726 } 4727 hn_change_network(sc); 4728 break; 4729 4730 default: 4731 if_printf(sc->hn_ifp, "unknown RNDIS status 0x%08x\n", 4732 msg->rm_status); 4733 break; 4734 } 4735 } 4736 4737 static int 4738 hn_rndis_rxinfo(const void *info_data, int info_dlen, struct hn_rxinfo *info) 4739 { 4740 const struct rndis_pktinfo *pi = info_data; 4741 uint32_t mask = 0; 4742 4743 while (info_dlen != 0) { 4744 const void *data; 4745 uint32_t dlen; 4746 4747 if (__predict_false(info_dlen < sizeof(*pi))) 4748 return (EINVAL); 4749 if (__predict_false(info_dlen < pi->rm_size)) 4750 return (EINVAL); 4751 info_dlen -= pi->rm_size; 4752 4753 if (__predict_false(pi->rm_size & RNDIS_PKTINFO_SIZE_ALIGNMASK)) 4754 return (EINVAL); 4755 if (__predict_false(pi->rm_size < pi->rm_pktinfooffset)) 4756 return (EINVAL); 4757 dlen = pi->rm_size - pi->rm_pktinfooffset; 4758 data = pi->rm_data; 4759 4760 switch (pi->rm_type) { 4761 case NDIS_PKTINFO_TYPE_VLAN: 4762 if (__predict_false(dlen < NDIS_VLAN_INFO_SIZE)) 4763 return (EINVAL); 4764 info->vlan_info = *((const uint32_t *)data); 4765 mask |= HN_RXINFO_VLAN; 4766 break; 4767 4768 case NDIS_PKTINFO_TYPE_CSUM: 4769 if (__predict_false(dlen < NDIS_RXCSUM_INFO_SIZE)) 4770 return (EINVAL); 4771 info->csum_info = *((const uint32_t *)data); 4772 mask |= HN_RXINFO_CSUM; 4773 break; 4774 4775 case HN_NDIS_PKTINFO_TYPE_HASHVAL: 4776 if (__predict_false(dlen < HN_NDIS_HASH_VALUE_SIZE)) 4777 return (EINVAL); 4778 info->hash_value = *((const uint32_t *)data); 4779 mask |= HN_RXINFO_HASHVAL; 4780 break; 4781 4782 case HN_NDIS_PKTINFO_TYPE_HASHINF: 4783 if (__predict_false(dlen < HN_NDIS_HASH_INFO_SIZE)) 4784 return (EINVAL); 4785 info->hash_info = *((const uint32_t *)data); 4786 mask |= HN_RXINFO_HASHINF; 4787 break; 4788 4789 default: 4790 goto next; 4791 } 4792 4793 if (mask == HN_RXINFO_ALL) { 4794 /* All found; done */ 4795 break; 4796 } 4797 next: 4798 pi = (const struct rndis_pktinfo *) 4799 ((const uint8_t *)pi + pi->rm_size); 4800 } 4801 4802 /* 4803 * Final fixup. 4804 * - If there is no hash value, invalidate the hash info. 4805 */ 4806 if ((mask & HN_RXINFO_HASHVAL) == 0) 4807 info->hash_info = HN_NDIS_HASH_INFO_INVALID; 4808 return (0); 4809 } 4810 4811 static __inline bool 4812 hn_rndis_check_overlap(int off, int len, int check_off, int check_len) 4813 { 4814 4815 if (off < check_off) { 4816 if (__predict_true(off + len <= check_off)) 4817 return (false); 4818 } else if (off > check_off) { 4819 if (__predict_true(check_off + check_len <= off)) 4820 return (false); 4821 } 4822 return (true); 4823 } 4824 4825 static void 4826 hn_rndis_rx_data(struct hn_rx_ring *rxr, const void *data, int dlen) 4827 { 4828 const struct rndis_packet_msg *pkt; 4829 struct hn_rxinfo info; 4830 int data_off, pktinfo_off, data_len, pktinfo_len; 4831 4832 /* 4833 * Check length. 4834 */ 4835 if (__predict_false(dlen < sizeof(*pkt))) { 4836 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg\n"); 4837 return; 4838 } 4839 pkt = data; 4840 4841 if (__predict_false(dlen < pkt->rm_len)) { 4842 if_printf(rxr->hn_ifp, "truncated RNDIS packet msg, " 4843 "dlen %d, msglen %u\n", dlen, pkt->rm_len); 4844 return; 4845 } 4846 if (__predict_false(pkt->rm_len < 4847 pkt->rm_datalen + pkt->rm_oobdatalen + pkt->rm_pktinfolen)) { 4848 if_printf(rxr->hn_ifp, "invalid RNDIS packet msglen, " 4849 "msglen %u, data %u, oob %u, pktinfo %u\n", 4850 pkt->rm_len, pkt->rm_datalen, pkt->rm_oobdatalen, 4851 pkt->rm_pktinfolen); 4852 return; 4853 } 4854 if (__predict_false(pkt->rm_datalen == 0)) { 4855 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, no data\n"); 4856 return; 4857 } 4858 4859 /* 4860 * Check offests. 4861 */ 4862 #define IS_OFFSET_INVALID(ofs) \ 4863 ((ofs) < RNDIS_PACKET_MSG_OFFSET_MIN || \ 4864 ((ofs) & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK)) 4865 4866 /* XXX Hyper-V does not meet data offset alignment requirement */ 4867 if (__predict_false(pkt->rm_dataoffset < RNDIS_PACKET_MSG_OFFSET_MIN)) { 4868 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 4869 "data offset %u\n", pkt->rm_dataoffset); 4870 return; 4871 } 4872 if (__predict_false(pkt->rm_oobdataoffset > 0 && 4873 IS_OFFSET_INVALID(pkt->rm_oobdataoffset))) { 4874 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 4875 "oob offset %u\n", pkt->rm_oobdataoffset); 4876 return; 4877 } 4878 if (__predict_true(pkt->rm_pktinfooffset > 0) && 4879 __predict_false(IS_OFFSET_INVALID(pkt->rm_pktinfooffset))) { 4880 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 4881 "pktinfo offset %u\n", pkt->rm_pktinfooffset); 4882 return; 4883 } 4884 4885 #undef IS_OFFSET_INVALID 4886 4887 data_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_dataoffset); 4888 data_len = pkt->rm_datalen; 4889 pktinfo_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_pktinfooffset); 4890 pktinfo_len = pkt->rm_pktinfolen; 4891 4892 /* 4893 * Check OOB coverage. 4894 */ 4895 if (__predict_false(pkt->rm_oobdatalen != 0)) { 4896 int oob_off, oob_len; 4897 4898 if_printf(rxr->hn_ifp, "got oobdata\n"); 4899 oob_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_oobdataoffset); 4900 oob_len = pkt->rm_oobdatalen; 4901 4902 if (__predict_false(oob_off + oob_len > pkt->rm_len)) { 4903 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 4904 "oob overflow, msglen %u, oob abs %d len %d\n", 4905 pkt->rm_len, oob_off, oob_len); 4906 return; 4907 } 4908 4909 /* 4910 * Check against data. 4911 */ 4912 if (hn_rndis_check_overlap(oob_off, oob_len, 4913 data_off, data_len)) { 4914 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 4915 "oob overlaps data, oob abs %d len %d, " 4916 "data abs %d len %d\n", 4917 oob_off, oob_len, data_off, data_len); 4918 return; 4919 } 4920 4921 /* 4922 * Check against pktinfo. 4923 */ 4924 if (pktinfo_len != 0 && 4925 hn_rndis_check_overlap(oob_off, oob_len, 4926 pktinfo_off, pktinfo_len)) { 4927 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 4928 "oob overlaps pktinfo, oob abs %d len %d, " 4929 "pktinfo abs %d len %d\n", 4930 oob_off, oob_len, pktinfo_off, pktinfo_len); 4931 return; 4932 } 4933 } 4934 4935 /* 4936 * Check per-packet-info coverage and find useful per-packet-info. 4937 */ 4938 info.vlan_info = HN_NDIS_VLAN_INFO_INVALID; 4939 info.csum_info = HN_NDIS_RXCSUM_INFO_INVALID; 4940 info.hash_info = HN_NDIS_HASH_INFO_INVALID; 4941 if (__predict_true(pktinfo_len != 0)) { 4942 bool overlap; 4943 int error; 4944 4945 if (__predict_false(pktinfo_off + pktinfo_len > pkt->rm_len)) { 4946 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 4947 "pktinfo overflow, msglen %u, " 4948 "pktinfo abs %d len %d\n", 4949 pkt->rm_len, pktinfo_off, pktinfo_len); 4950 return; 4951 } 4952 4953 /* 4954 * Check packet info coverage. 4955 */ 4956 overlap = hn_rndis_check_overlap(pktinfo_off, pktinfo_len, 4957 data_off, data_len); 4958 if (__predict_false(overlap)) { 4959 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 4960 "pktinfo overlap data, pktinfo abs %d len %d, " 4961 "data abs %d len %d\n", 4962 pktinfo_off, pktinfo_len, data_off, data_len); 4963 return; 4964 } 4965 4966 /* 4967 * Find useful per-packet-info. 4968 */ 4969 error = hn_rndis_rxinfo(((const uint8_t *)pkt) + pktinfo_off, 4970 pktinfo_len, &info); 4971 if (__predict_false(error)) { 4972 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg " 4973 "pktinfo\n"); 4974 return; 4975 } 4976 } 4977 4978 if (__predict_false(data_off + data_len > pkt->rm_len)) { 4979 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 4980 "data overflow, msglen %u, data abs %d len %d\n", 4981 pkt->rm_len, data_off, data_len); 4982 return; 4983 } 4984 hn_rxpkt(rxr, ((const uint8_t *)pkt) + data_off, data_len, &info); 4985 } 4986 4987 static __inline void 4988 hn_rndis_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen) 4989 { 4990 const struct rndis_msghdr *hdr; 4991 4992 if (__predict_false(dlen < sizeof(*hdr))) { 4993 if_printf(rxr->hn_ifp, "invalid RNDIS msg\n"); 4994 return; 4995 } 4996 hdr = data; 4997 4998 if (__predict_true(hdr->rm_type == REMOTE_NDIS_PACKET_MSG)) { 4999 /* Hot data path. */ 5000 hn_rndis_rx_data(rxr, data, dlen); 5001 /* Done! */ 5002 return; 5003 } 5004 5005 if (hdr->rm_type == REMOTE_NDIS_INDICATE_STATUS_MSG) 5006 hn_rndis_rx_status(rxr->hn_ifp->if_softc, data, dlen); 5007 else 5008 hn_rndis_rx_ctrl(rxr->hn_ifp->if_softc, data, dlen); 5009 } 5010 5011 static void 5012 hn_nvs_handle_notify(struct hn_softc *sc, const struct vmbus_chanpkt_hdr *pkt) 5013 { 5014 const struct hn_nvs_hdr *hdr; 5015 5016 if (VMBUS_CHANPKT_DATALEN(pkt) < sizeof(*hdr)) { 5017 if_printf(sc->hn_ifp, "invalid nvs notify\n"); 5018 return; 5019 } 5020 hdr = VMBUS_CHANPKT_CONST_DATA(pkt); 5021 5022 if (hdr->nvs_type == HN_NVS_TYPE_TXTBL_NOTE) { 5023 /* Useless; ignore */ 5024 return; 5025 } 5026 if_printf(sc->hn_ifp, "got notify, nvs type %u\n", hdr->nvs_type); 5027 } 5028 5029 static void 5030 hn_nvs_handle_comp(struct hn_softc *sc, struct vmbus_channel *chan, 5031 const struct vmbus_chanpkt_hdr *pkt) 5032 { 5033 struct hn_nvs_sendctx *sndc; 5034 5035 sndc = (struct hn_nvs_sendctx *)(uintptr_t)pkt->cph_xactid; 5036 sndc->hn_cb(sndc, sc, chan, VMBUS_CHANPKT_CONST_DATA(pkt), 5037 VMBUS_CHANPKT_DATALEN(pkt)); 5038 /* 5039 * NOTE: 5040 * 'sndc' CAN NOT be accessed anymore, since it can be freed by 5041 * its callback. 5042 */ 5043 } 5044 5045 static void 5046 hn_nvs_handle_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan, 5047 const struct vmbus_chanpkt_hdr *pkthdr) 5048 { 5049 const struct vmbus_chanpkt_rxbuf *pkt; 5050 const struct hn_nvs_hdr *nvs_hdr; 5051 int count, i, hlen; 5052 5053 if (__predict_false(VMBUS_CHANPKT_DATALEN(pkthdr) < sizeof(*nvs_hdr))) { 5054 if_printf(rxr->hn_ifp, "invalid nvs RNDIS\n"); 5055 return; 5056 } 5057 nvs_hdr = VMBUS_CHANPKT_CONST_DATA(pkthdr); 5058 5059 /* Make sure that this is a RNDIS message. */ 5060 if (__predict_false(nvs_hdr->nvs_type != HN_NVS_TYPE_RNDIS)) { 5061 if_printf(rxr->hn_ifp, "nvs type %u, not RNDIS\n", 5062 nvs_hdr->nvs_type); 5063 return; 5064 } 5065 5066 hlen = VMBUS_CHANPKT_GETLEN(pkthdr->cph_hlen); 5067 if (__predict_false(hlen < sizeof(*pkt))) { 5068 if_printf(rxr->hn_ifp, "invalid rxbuf chanpkt\n"); 5069 return; 5070 } 5071 pkt = (const struct vmbus_chanpkt_rxbuf *)pkthdr; 5072 5073 if (__predict_false(pkt->cp_rxbuf_id != HN_NVS_RXBUF_SIG)) { 5074 if_printf(rxr->hn_ifp, "invalid rxbuf_id 0x%08x\n", 5075 pkt->cp_rxbuf_id); 5076 return; 5077 } 5078 5079 count = pkt->cp_rxbuf_cnt; 5080 if (__predict_false(hlen < 5081 __offsetof(struct vmbus_chanpkt_rxbuf, cp_rxbuf[count]))) { 5082 if_printf(rxr->hn_ifp, "invalid rxbuf_cnt %d\n", count); 5083 return; 5084 } 5085 5086 /* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */ 5087 for (i = 0; i < count; ++i) { 5088 int ofs, len; 5089 5090 ofs = pkt->cp_rxbuf[i].rb_ofs; 5091 len = pkt->cp_rxbuf[i].rb_len; 5092 if (__predict_false(ofs + len > HN_RXBUF_SIZE)) { 5093 if_printf(rxr->hn_ifp, "%dth RNDIS msg overflow rxbuf, " 5094 "ofs %d, len %d\n", i, ofs, len); 5095 continue; 5096 } 5097 hn_rndis_rxpkt(rxr, rxr->hn_rxbuf + ofs, len); 5098 } 5099 5100 /* 5101 * Ack the consumed RXBUF associated w/ this channel packet, 5102 * so that this RXBUF can be recycled by the hypervisor. 5103 */ 5104 hn_nvs_ack_rxbuf(rxr, chan, pkt->cp_hdr.cph_xactid); 5105 } 5106 5107 static void 5108 hn_nvs_ack_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan, 5109 uint64_t tid) 5110 { 5111 struct hn_nvs_rndis_ack ack; 5112 int retries, error; 5113 5114 ack.nvs_type = HN_NVS_TYPE_RNDIS_ACK; 5115 ack.nvs_status = HN_NVS_STATUS_OK; 5116 5117 retries = 0; 5118 again: 5119 error = vmbus_chan_send(chan, VMBUS_CHANPKT_TYPE_COMP, 5120 VMBUS_CHANPKT_FLAG_NONE, &ack, sizeof(ack), tid); 5121 if (__predict_false(error == EAGAIN)) { 5122 /* 5123 * NOTE: 5124 * This should _not_ happen in real world, since the 5125 * consumption of the TX bufring from the TX path is 5126 * controlled. 5127 */ 5128 if (rxr->hn_ack_failed == 0) 5129 if_printf(rxr->hn_ifp, "RXBUF ack retry\n"); 5130 rxr->hn_ack_failed++; 5131 retries++; 5132 if (retries < 10) { 5133 DELAY(100); 5134 goto again; 5135 } 5136 /* RXBUF leaks! */ 5137 if_printf(rxr->hn_ifp, "RXBUF ack failed\n"); 5138 } 5139 } 5140 5141 static void 5142 hn_chan_callback(struct vmbus_channel *chan, void *xrxr) 5143 { 5144 struct hn_rx_ring *rxr = xrxr; 5145 struct hn_softc *sc = rxr->hn_ifp->if_softc; 5146 5147 for (;;) { 5148 struct vmbus_chanpkt_hdr *pkt = rxr->hn_pktbuf; 5149 int error, pktlen; 5150 5151 pktlen = rxr->hn_pktbuf_len; 5152 error = vmbus_chan_recv_pkt(chan, pkt, &pktlen); 5153 if (__predict_false(error == ENOBUFS)) { 5154 void *nbuf; 5155 int nlen; 5156 5157 /* 5158 * Expand channel packet buffer. 5159 * 5160 * XXX 5161 * Use M_WAITOK here, since allocation failure 5162 * is fatal. 5163 */ 5164 nlen = rxr->hn_pktbuf_len * 2; 5165 while (nlen < pktlen) 5166 nlen *= 2; 5167 nbuf = malloc(nlen, M_DEVBUF, M_WAITOK); 5168 5169 if_printf(rxr->hn_ifp, "expand pktbuf %d -> %d\n", 5170 rxr->hn_pktbuf_len, nlen); 5171 5172 free(rxr->hn_pktbuf, M_DEVBUF); 5173 rxr->hn_pktbuf = nbuf; 5174 rxr->hn_pktbuf_len = nlen; 5175 /* Retry! */ 5176 continue; 5177 } else if (__predict_false(error == EAGAIN)) { 5178 /* No more channel packets; done! */ 5179 break; 5180 } 5181 KASSERT(!error, ("vmbus_chan_recv_pkt failed: %d", error)); 5182 5183 switch (pkt->cph_type) { 5184 case VMBUS_CHANPKT_TYPE_COMP: 5185 hn_nvs_handle_comp(sc, chan, pkt); 5186 break; 5187 5188 case VMBUS_CHANPKT_TYPE_RXBUF: 5189 hn_nvs_handle_rxbuf(rxr, chan, pkt); 5190 break; 5191 5192 case VMBUS_CHANPKT_TYPE_INBAND: 5193 hn_nvs_handle_notify(sc, pkt); 5194 break; 5195 5196 default: 5197 if_printf(rxr->hn_ifp, "unknown chan pkt %u\n", 5198 pkt->cph_type); 5199 break; 5200 } 5201 } 5202 hn_chan_rollup(rxr, rxr->hn_txr); 5203 } 5204 5205 static void 5206 hn_tx_taskq_create(void *arg __unused) 5207 { 5208 5209 if (vm_guest != VM_GUEST_HV) 5210 return; 5211 5212 if (!hn_share_tx_taskq) 5213 return; 5214 5215 hn_tx_taskq = taskqueue_create("hn_tx", M_WAITOK, 5216 taskqueue_thread_enqueue, &hn_tx_taskq); 5217 if (hn_bind_tx_taskq >= 0) { 5218 int cpu = hn_bind_tx_taskq; 5219 cpuset_t cpu_set; 5220 5221 if (cpu > mp_ncpus - 1) 5222 cpu = mp_ncpus - 1; 5223 CPU_SETOF(cpu, &cpu_set); 5224 taskqueue_start_threads_cpuset(&hn_tx_taskq, 1, PI_NET, 5225 &cpu_set, "hn tx"); 5226 } else { 5227 taskqueue_start_threads(&hn_tx_taskq, 1, PI_NET, "hn tx"); 5228 } 5229 } 5230 SYSINIT(hn_txtq_create, SI_SUB_DRIVERS, SI_ORDER_SECOND, 5231 hn_tx_taskq_create, NULL); 5232 5233 static void 5234 hn_tx_taskq_destroy(void *arg __unused) 5235 { 5236 5237 if (hn_tx_taskq != NULL) 5238 taskqueue_free(hn_tx_taskq); 5239 } 5240 SYSUNINIT(hn_txtq_destroy, SI_SUB_DRIVERS, SI_ORDER_SECOND, 5241 hn_tx_taskq_destroy, NULL); 5242