1 /*- 2 * Copyright (c) 2010-2012 Citrix Inc. 3 * Copyright (c) 2009-2012,2016 Microsoft Corp. 4 * Copyright (c) 2012 NetApp Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /*- 30 * Copyright (c) 2004-2006 Kip Macy 31 * All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 45 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 52 * SUCH DAMAGE. 53 */ 54 55 #include <sys/cdefs.h> 56 __FBSDID("$FreeBSD$"); 57 58 #include "opt_inet6.h" 59 #include "opt_inet.h" 60 61 #include <sys/param.h> 62 #include <sys/bus.h> 63 #include <sys/kernel.h> 64 #include <sys/limits.h> 65 #include <sys/malloc.h> 66 #include <sys/mbuf.h> 67 #include <sys/module.h> 68 #include <sys/queue.h> 69 #include <sys/lock.h> 70 #include <sys/smp.h> 71 #include <sys/socket.h> 72 #include <sys/sockio.h> 73 #include <sys/sx.h> 74 #include <sys/sysctl.h> 75 #include <sys/systm.h> 76 #include <sys/taskqueue.h> 77 #include <sys/buf_ring.h> 78 79 #include <machine/atomic.h> 80 #include <machine/in_cksum.h> 81 82 #include <net/bpf.h> 83 #include <net/ethernet.h> 84 #include <net/if.h> 85 #include <net/if_media.h> 86 #include <net/if_types.h> 87 #include <net/if_var.h> 88 #include <net/rndis.h> 89 90 #include <netinet/in_systm.h> 91 #include <netinet/in.h> 92 #include <netinet/ip.h> 93 #include <netinet/ip6.h> 94 #include <netinet/tcp.h> 95 #include <netinet/tcp_lro.h> 96 #include <netinet/udp.h> 97 98 #include <dev/hyperv/include/hyperv.h> 99 #include <dev/hyperv/include/hyperv_busdma.h> 100 #include <dev/hyperv/include/vmbus.h> 101 #include <dev/hyperv/include/vmbus_xact.h> 102 103 #include <dev/hyperv/netvsc/ndis.h> 104 #include <dev/hyperv/netvsc/if_hnreg.h> 105 #include <dev/hyperv/netvsc/if_hnvar.h> 106 #include <dev/hyperv/netvsc/hn_nvs.h> 107 #include <dev/hyperv/netvsc/hn_rndis.h> 108 109 #include "vmbus_if.h" 110 111 #define HN_IFSTART_SUPPORT 112 113 #define HN_RING_CNT_DEF_MAX 8 114 115 /* YYY should get it from the underlying channel */ 116 #define HN_TX_DESC_CNT 512 117 118 #define HN_RNDIS_PKT_LEN \ 119 (sizeof(struct rndis_packet_msg) + \ 120 HN_RNDIS_PKTINFO_SIZE(HN_NDIS_HASH_VALUE_SIZE) + \ 121 HN_RNDIS_PKTINFO_SIZE(NDIS_VLAN_INFO_SIZE) + \ 122 HN_RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) + \ 123 HN_RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE)) 124 #define HN_RNDIS_PKT_BOUNDARY PAGE_SIZE 125 #define HN_RNDIS_PKT_ALIGN CACHE_LINE_SIZE 126 127 #define HN_TX_DATA_BOUNDARY PAGE_SIZE 128 #define HN_TX_DATA_MAXSIZE IP_MAXPACKET 129 #define HN_TX_DATA_SEGSIZE PAGE_SIZE 130 /* -1 for RNDIS packet message */ 131 #define HN_TX_DATA_SEGCNT_MAX (HN_GPACNT_MAX - 1) 132 133 #define HN_DIRECT_TX_SIZE_DEF 128 134 135 #define HN_EARLY_TXEOF_THRESH 8 136 137 #define HN_PKTBUF_LEN_DEF (16 * 1024) 138 139 #define HN_LROENT_CNT_DEF 128 140 141 #define HN_LRO_LENLIM_MULTIRX_DEF (12 * ETHERMTU) 142 #define HN_LRO_LENLIM_DEF (25 * ETHERMTU) 143 /* YYY 2*MTU is a bit rough, but should be good enough. */ 144 #define HN_LRO_LENLIM_MIN(ifp) (2 * (ifp)->if_mtu) 145 146 #define HN_LRO_ACKCNT_DEF 1 147 148 #define HN_LOCK_INIT(sc) \ 149 sx_init(&(sc)->hn_lock, device_get_nameunit((sc)->hn_dev)) 150 #define HN_LOCK_DESTROY(sc) sx_destroy(&(sc)->hn_lock) 151 #define HN_LOCK_ASSERT(sc) sx_assert(&(sc)->hn_lock, SA_XLOCKED) 152 #define HN_LOCK(sc) sx_xlock(&(sc)->hn_lock) 153 #define HN_UNLOCK(sc) sx_xunlock(&(sc)->hn_lock) 154 155 #define HN_CSUM_IP_MASK (CSUM_IP | CSUM_IP_TCP | CSUM_IP_UDP) 156 #define HN_CSUM_IP6_MASK (CSUM_IP6_TCP | CSUM_IP6_UDP) 157 #define HN_CSUM_IP_HWASSIST(sc) \ 158 ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP_MASK) 159 #define HN_CSUM_IP6_HWASSIST(sc) \ 160 ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP6_MASK) 161 162 struct hn_txdesc { 163 #ifndef HN_USE_TXDESC_BUFRING 164 SLIST_ENTRY(hn_txdesc) link; 165 #endif 166 struct mbuf *m; 167 struct hn_tx_ring *txr; 168 int refs; 169 uint32_t flags; /* HN_TXD_FLAG_ */ 170 struct hn_nvs_sendctx send_ctx; 171 uint32_t chim_index; 172 int chim_size; 173 174 bus_dmamap_t data_dmap; 175 176 bus_addr_t rndis_pkt_paddr; 177 struct rndis_packet_msg *rndis_pkt; 178 bus_dmamap_t rndis_pkt_dmap; 179 }; 180 181 #define HN_TXD_FLAG_ONLIST 0x0001 182 #define HN_TXD_FLAG_DMAMAP 0x0002 183 184 struct hn_rxinfo { 185 uint32_t vlan_info; 186 uint32_t csum_info; 187 uint32_t hash_info; 188 uint32_t hash_value; 189 }; 190 191 #define HN_RXINFO_VLAN 0x0001 192 #define HN_RXINFO_CSUM 0x0002 193 #define HN_RXINFO_HASHINF 0x0004 194 #define HN_RXINFO_HASHVAL 0x0008 195 #define HN_RXINFO_ALL \ 196 (HN_RXINFO_VLAN | \ 197 HN_RXINFO_CSUM | \ 198 HN_RXINFO_HASHINF | \ 199 HN_RXINFO_HASHVAL) 200 201 #define HN_NDIS_VLAN_INFO_INVALID 0xffffffff 202 #define HN_NDIS_RXCSUM_INFO_INVALID 0 203 #define HN_NDIS_HASH_INFO_INVALID 0 204 205 static int hn_probe(device_t); 206 static int hn_attach(device_t); 207 static int hn_detach(device_t); 208 static int hn_shutdown(device_t); 209 static void hn_chan_callback(struct vmbus_channel *, 210 void *); 211 212 static void hn_init(void *); 213 static int hn_ioctl(struct ifnet *, u_long, caddr_t); 214 #ifdef HN_IFSTART_SUPPORT 215 static void hn_start(struct ifnet *); 216 #endif 217 static int hn_transmit(struct ifnet *, struct mbuf *); 218 static void hn_xmit_qflush(struct ifnet *); 219 static int hn_ifmedia_upd(struct ifnet *); 220 static void hn_ifmedia_sts(struct ifnet *, 221 struct ifmediareq *); 222 223 static int hn_rndis_rxinfo(const void *, int, 224 struct hn_rxinfo *); 225 static void hn_rndis_rx_data(struct hn_rx_ring *, 226 const void *, int); 227 static void hn_rndis_rx_status(struct hn_softc *, 228 const void *, int); 229 230 static void hn_nvs_handle_notify(struct hn_softc *, 231 const struct vmbus_chanpkt_hdr *); 232 static void hn_nvs_handle_comp(struct hn_softc *, 233 struct vmbus_channel *, 234 const struct vmbus_chanpkt_hdr *); 235 static void hn_nvs_handle_rxbuf(struct hn_rx_ring *, 236 struct vmbus_channel *, 237 const struct vmbus_chanpkt_hdr *); 238 static void hn_nvs_ack_rxbuf(struct hn_rx_ring *, 239 struct vmbus_channel *, uint64_t); 240 241 #if __FreeBSD_version >= 1100099 242 static int hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS); 243 static int hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS); 244 #endif 245 static int hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS); 246 static int hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS); 247 #if __FreeBSD_version < 1100095 248 static int hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS); 249 #else 250 static int hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS); 251 #endif 252 static int hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS); 253 static int hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS); 254 static int hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS); 255 static int hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS); 256 static int hn_caps_sysctl(SYSCTL_HANDLER_ARGS); 257 static int hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS); 258 static int hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS); 259 static int hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS); 260 static int hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS); 261 static int hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS); 262 263 static void hn_stop(struct hn_softc *); 264 static void hn_init_locked(struct hn_softc *); 265 static int hn_chan_attach(struct hn_softc *, 266 struct vmbus_channel *); 267 static void hn_chan_detach(struct hn_softc *, 268 struct vmbus_channel *); 269 static int hn_attach_subchans(struct hn_softc *); 270 static void hn_detach_allchans(struct hn_softc *); 271 static void hn_chan_rollup(struct hn_rx_ring *, 272 struct hn_tx_ring *); 273 static void hn_set_ring_inuse(struct hn_softc *, int); 274 static int hn_synth_attach(struct hn_softc *, int); 275 static void hn_synth_detach(struct hn_softc *); 276 static int hn_synth_alloc_subchans(struct hn_softc *, 277 int *); 278 static void hn_suspend(struct hn_softc *); 279 static void hn_suspend_data(struct hn_softc *); 280 static void hn_suspend_mgmt(struct hn_softc *); 281 static void hn_resume(struct hn_softc *); 282 static void hn_resume_data(struct hn_softc *); 283 static void hn_resume_mgmt(struct hn_softc *); 284 static void hn_suspend_mgmt_taskfunc(void *, int); 285 static void hn_chan_drain(struct vmbus_channel *); 286 287 static void hn_update_link_status(struct hn_softc *); 288 static void hn_change_network(struct hn_softc *); 289 static void hn_link_taskfunc(void *, int); 290 static void hn_netchg_init_taskfunc(void *, int); 291 static void hn_netchg_status_taskfunc(void *, int); 292 static void hn_link_status(struct hn_softc *); 293 294 static int hn_create_rx_data(struct hn_softc *, int); 295 static void hn_destroy_rx_data(struct hn_softc *); 296 static int hn_check_iplen(const struct mbuf *, int); 297 static int hn_set_rxfilter(struct hn_softc *); 298 static int hn_rss_reconfig(struct hn_softc *); 299 static void hn_rss_ind_fixup(struct hn_softc *, int); 300 static int hn_rxpkt(struct hn_rx_ring *, const void *, 301 int, const struct hn_rxinfo *); 302 303 static int hn_tx_ring_create(struct hn_softc *, int); 304 static void hn_tx_ring_destroy(struct hn_tx_ring *); 305 static int hn_create_tx_data(struct hn_softc *, int); 306 static void hn_fixup_tx_data(struct hn_softc *); 307 static void hn_destroy_tx_data(struct hn_softc *); 308 static void hn_txdesc_dmamap_destroy(struct hn_txdesc *); 309 static int hn_encap(struct hn_tx_ring *, 310 struct hn_txdesc *, struct mbuf **); 311 static int hn_txpkt(struct ifnet *, struct hn_tx_ring *, 312 struct hn_txdesc *); 313 static void hn_set_chim_size(struct hn_softc *, int); 314 static void hn_set_tso_maxsize(struct hn_softc *, int, int); 315 static bool hn_tx_ring_pending(struct hn_tx_ring *); 316 static void hn_tx_ring_qflush(struct hn_tx_ring *); 317 static void hn_resume_tx(struct hn_softc *, int); 318 static int hn_get_txswq_depth(const struct hn_tx_ring *); 319 static void hn_txpkt_done(struct hn_nvs_sendctx *, 320 struct hn_softc *, struct vmbus_channel *, 321 const void *, int); 322 static int hn_txpkt_sglist(struct hn_tx_ring *, 323 struct hn_txdesc *); 324 static int hn_txpkt_chim(struct hn_tx_ring *, 325 struct hn_txdesc *); 326 static int hn_xmit(struct hn_tx_ring *, int); 327 static void hn_xmit_taskfunc(void *, int); 328 static void hn_xmit_txeof(struct hn_tx_ring *); 329 static void hn_xmit_txeof_taskfunc(void *, int); 330 #ifdef HN_IFSTART_SUPPORT 331 static int hn_start_locked(struct hn_tx_ring *, int); 332 static void hn_start_taskfunc(void *, int); 333 static void hn_start_txeof(struct hn_tx_ring *); 334 static void hn_start_txeof_taskfunc(void *, int); 335 #endif 336 337 SYSCTL_NODE(_hw, OID_AUTO, hn, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 338 "Hyper-V network interface"); 339 340 /* Trust tcp segements verification on host side. */ 341 static int hn_trust_hosttcp = 1; 342 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hosttcp, CTLFLAG_RDTUN, 343 &hn_trust_hosttcp, 0, 344 "Trust tcp segement verification on host side, " 345 "when csum info is missing (global setting)"); 346 347 /* Trust udp datagrams verification on host side. */ 348 static int hn_trust_hostudp = 1; 349 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostudp, CTLFLAG_RDTUN, 350 &hn_trust_hostudp, 0, 351 "Trust udp datagram verification on host side, " 352 "when csum info is missing (global setting)"); 353 354 /* Trust ip packets verification on host side. */ 355 static int hn_trust_hostip = 1; 356 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostip, CTLFLAG_RDTUN, 357 &hn_trust_hostip, 0, 358 "Trust ip packet verification on host side, " 359 "when csum info is missing (global setting)"); 360 361 /* Limit TSO burst size */ 362 static int hn_tso_maxlen = IP_MAXPACKET; 363 SYSCTL_INT(_hw_hn, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN, 364 &hn_tso_maxlen, 0, "TSO burst limit"); 365 366 /* Limit chimney send size */ 367 static int hn_tx_chimney_size = 0; 368 SYSCTL_INT(_hw_hn, OID_AUTO, tx_chimney_size, CTLFLAG_RDTUN, 369 &hn_tx_chimney_size, 0, "Chimney send packet size limit"); 370 371 /* Limit the size of packet for direct transmission */ 372 static int hn_direct_tx_size = HN_DIRECT_TX_SIZE_DEF; 373 SYSCTL_INT(_hw_hn, OID_AUTO, direct_tx_size, CTLFLAG_RDTUN, 374 &hn_direct_tx_size, 0, "Size of the packet for direct transmission"); 375 376 /* # of LRO entries per RX ring */ 377 #if defined(INET) || defined(INET6) 378 #if __FreeBSD_version >= 1100095 379 static int hn_lro_entry_count = HN_LROENT_CNT_DEF; 380 SYSCTL_INT(_hw_hn, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN, 381 &hn_lro_entry_count, 0, "LRO entry count"); 382 #endif 383 #endif 384 385 /* Use shared TX taskqueue */ 386 static int hn_share_tx_taskq = 0; 387 SYSCTL_INT(_hw_hn, OID_AUTO, share_tx_taskq, CTLFLAG_RDTUN, 388 &hn_share_tx_taskq, 0, "Enable shared TX taskqueue"); 389 390 #ifndef HN_USE_TXDESC_BUFRING 391 static int hn_use_txdesc_bufring = 0; 392 #else 393 static int hn_use_txdesc_bufring = 1; 394 #endif 395 SYSCTL_INT(_hw_hn, OID_AUTO, use_txdesc_bufring, CTLFLAG_RD, 396 &hn_use_txdesc_bufring, 0, "Use buf_ring for TX descriptors"); 397 398 /* Bind TX taskqueue to the target CPU */ 399 static int hn_bind_tx_taskq = -1; 400 SYSCTL_INT(_hw_hn, OID_AUTO, bind_tx_taskq, CTLFLAG_RDTUN, 401 &hn_bind_tx_taskq, 0, "Bind TX taskqueue to the specified cpu"); 402 403 #ifdef HN_IFSTART_SUPPORT 404 /* Use ifnet.if_start instead of ifnet.if_transmit */ 405 static int hn_use_if_start = 0; 406 SYSCTL_INT(_hw_hn, OID_AUTO, use_if_start, CTLFLAG_RDTUN, 407 &hn_use_if_start, 0, "Use if_start TX method"); 408 #endif 409 410 /* # of channels to use */ 411 static int hn_chan_cnt = 0; 412 SYSCTL_INT(_hw_hn, OID_AUTO, chan_cnt, CTLFLAG_RDTUN, 413 &hn_chan_cnt, 0, 414 "# of channels to use; each channel has one RX ring and one TX ring"); 415 416 /* # of transmit rings to use */ 417 static int hn_tx_ring_cnt = 0; 418 SYSCTL_INT(_hw_hn, OID_AUTO, tx_ring_cnt, CTLFLAG_RDTUN, 419 &hn_tx_ring_cnt, 0, "# of TX rings to use"); 420 421 /* Software TX ring deptch */ 422 static int hn_tx_swq_depth = 0; 423 SYSCTL_INT(_hw_hn, OID_AUTO, tx_swq_depth, CTLFLAG_RDTUN, 424 &hn_tx_swq_depth, 0, "Depth of IFQ or BUFRING"); 425 426 /* Enable sorted LRO, and the depth of the per-channel mbuf queue */ 427 #if __FreeBSD_version >= 1100095 428 static u_int hn_lro_mbufq_depth = 0; 429 SYSCTL_UINT(_hw_hn, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN, 430 &hn_lro_mbufq_depth, 0, "Depth of LRO mbuf queue"); 431 #endif 432 433 static u_int hn_cpu_index; /* next CPU for channel */ 434 static struct taskqueue *hn_tx_taskq; /* shared TX taskqueue */ 435 436 static const uint8_t 437 hn_rss_key_default[NDIS_HASH_KEYSIZE_TOEPLITZ] = { 438 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 439 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 440 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, 441 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, 442 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa 443 }; 444 445 static device_method_t hn_methods[] = { 446 /* Device interface */ 447 DEVMETHOD(device_probe, hn_probe), 448 DEVMETHOD(device_attach, hn_attach), 449 DEVMETHOD(device_detach, hn_detach), 450 DEVMETHOD(device_shutdown, hn_shutdown), 451 DEVMETHOD_END 452 }; 453 454 static driver_t hn_driver = { 455 "hn", 456 hn_methods, 457 sizeof(struct hn_softc) 458 }; 459 460 static devclass_t hn_devclass; 461 462 DRIVER_MODULE(hn, vmbus, hn_driver, hn_devclass, 0, 0); 463 MODULE_VERSION(hn, 1); 464 MODULE_DEPEND(hn, vmbus, 1, 1, 1); 465 466 #if __FreeBSD_version >= 1100099 467 static void 468 hn_set_lro_lenlim(struct hn_softc *sc, int lenlim) 469 { 470 int i; 471 472 for (i = 0; i < sc->hn_rx_ring_inuse; ++i) 473 sc->hn_rx_ring[i].hn_lro.lro_length_lim = lenlim; 474 } 475 #endif 476 477 static int 478 hn_txpkt_sglist(struct hn_tx_ring *txr, struct hn_txdesc *txd) 479 { 480 481 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID && 482 txd->chim_size == 0, ("invalid rndis sglist txd")); 483 return (hn_nvs_send_rndis_sglist(txr->hn_chan, HN_NVS_RNDIS_MTYPE_DATA, 484 &txd->send_ctx, txr->hn_gpa, txr->hn_gpa_cnt)); 485 } 486 487 static int 488 hn_txpkt_chim(struct hn_tx_ring *txr, struct hn_txdesc *txd) 489 { 490 struct hn_nvs_rndis rndis; 491 492 KASSERT(txd->chim_index != HN_NVS_CHIM_IDX_INVALID && 493 txd->chim_size > 0, ("invalid rndis chim txd")); 494 495 rndis.nvs_type = HN_NVS_TYPE_RNDIS; 496 rndis.nvs_rndis_mtype = HN_NVS_RNDIS_MTYPE_DATA; 497 rndis.nvs_chim_idx = txd->chim_index; 498 rndis.nvs_chim_sz = txd->chim_size; 499 500 return (hn_nvs_send(txr->hn_chan, VMBUS_CHANPKT_FLAG_RC, 501 &rndis, sizeof(rndis), &txd->send_ctx)); 502 } 503 504 static __inline uint32_t 505 hn_chim_alloc(struct hn_softc *sc) 506 { 507 int i, bmap_cnt = sc->hn_chim_bmap_cnt; 508 u_long *bmap = sc->hn_chim_bmap; 509 uint32_t ret = HN_NVS_CHIM_IDX_INVALID; 510 511 for (i = 0; i < bmap_cnt; ++i) { 512 int idx; 513 514 idx = ffsl(~bmap[i]); 515 if (idx == 0) 516 continue; 517 518 --idx; /* ffsl is 1-based */ 519 KASSERT(i * LONG_BIT + idx < sc->hn_chim_cnt, 520 ("invalid i %d and idx %d", i, idx)); 521 522 if (atomic_testandset_long(&bmap[i], idx)) 523 continue; 524 525 ret = i * LONG_BIT + idx; 526 break; 527 } 528 return (ret); 529 } 530 531 static __inline void 532 hn_chim_free(struct hn_softc *sc, uint32_t chim_idx) 533 { 534 u_long mask; 535 uint32_t idx; 536 537 idx = chim_idx / LONG_BIT; 538 KASSERT(idx < sc->hn_chim_bmap_cnt, 539 ("invalid chimney index 0x%x", chim_idx)); 540 541 mask = 1UL << (chim_idx % LONG_BIT); 542 KASSERT(sc->hn_chim_bmap[idx] & mask, 543 ("index bitmap 0x%lx, chimney index %u, " 544 "bitmap idx %d, bitmask 0x%lx", 545 sc->hn_chim_bmap[idx], chim_idx, idx, mask)); 546 547 atomic_clear_long(&sc->hn_chim_bmap[idx], mask); 548 } 549 550 #if defined(INET6) || defined(INET) 551 /* 552 * NOTE: If this function failed, the m_head would be freed. 553 */ 554 static __inline struct mbuf * 555 hn_tso_fixup(struct mbuf *m_head) 556 { 557 struct ether_vlan_header *evl; 558 struct tcphdr *th; 559 int ehlen; 560 561 KASSERT(M_WRITABLE(m_head), ("TSO mbuf not writable")); 562 563 #define PULLUP_HDR(m, len) \ 564 do { \ 565 if (__predict_false((m)->m_len < (len))) { \ 566 (m) = m_pullup((m), (len)); \ 567 if ((m) == NULL) \ 568 return (NULL); \ 569 } \ 570 } while (0) 571 572 PULLUP_HDR(m_head, sizeof(*evl)); 573 evl = mtod(m_head, struct ether_vlan_header *); 574 if (evl->evl_encap_proto == ntohs(ETHERTYPE_VLAN)) 575 ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 576 else 577 ehlen = ETHER_HDR_LEN; 578 579 #ifdef INET 580 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) { 581 struct ip *ip; 582 int iphlen; 583 584 PULLUP_HDR(m_head, ehlen + sizeof(*ip)); 585 ip = mtodo(m_head, ehlen); 586 iphlen = ip->ip_hl << 2; 587 588 PULLUP_HDR(m_head, ehlen + iphlen + sizeof(*th)); 589 th = mtodo(m_head, ehlen + iphlen); 590 591 ip->ip_len = 0; 592 ip->ip_sum = 0; 593 th->th_sum = in_pseudo(ip->ip_src.s_addr, 594 ip->ip_dst.s_addr, htons(IPPROTO_TCP)); 595 } 596 #endif 597 #if defined(INET6) && defined(INET) 598 else 599 #endif 600 #ifdef INET6 601 { 602 struct ip6_hdr *ip6; 603 604 PULLUP_HDR(m_head, ehlen + sizeof(*ip6)); 605 ip6 = mtodo(m_head, ehlen); 606 if (ip6->ip6_nxt != IPPROTO_TCP) { 607 m_freem(m_head); 608 return (NULL); 609 } 610 611 PULLUP_HDR(m_head, ehlen + sizeof(*ip6) + sizeof(*th)); 612 th = mtodo(m_head, ehlen + sizeof(*ip6)); 613 614 ip6->ip6_plen = 0; 615 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); 616 } 617 #endif 618 return (m_head); 619 620 #undef PULLUP_HDR 621 } 622 #endif /* INET6 || INET */ 623 624 static int 625 hn_set_rxfilter(struct hn_softc *sc) 626 { 627 struct ifnet *ifp = sc->hn_ifp; 628 uint32_t filter; 629 int error = 0; 630 631 HN_LOCK_ASSERT(sc); 632 633 if (ifp->if_flags & IFF_PROMISC) { 634 filter = NDIS_PACKET_TYPE_PROMISCUOUS; 635 } else { 636 filter = NDIS_PACKET_TYPE_DIRECTED; 637 if (ifp->if_flags & IFF_BROADCAST) 638 filter |= NDIS_PACKET_TYPE_BROADCAST; 639 #ifdef notyet 640 /* 641 * See the comment in SIOCADDMULTI/SIOCDELMULTI. 642 */ 643 /* TODO: support multicast list */ 644 if ((ifp->if_flags & IFF_ALLMULTI) || 645 !TAILQ_EMPTY(&ifp->if_multiaddrs)) 646 filter |= NDIS_PACKET_TYPE_ALL_MULTICAST; 647 #else 648 /* Always enable ALLMULTI */ 649 filter |= NDIS_PACKET_TYPE_ALL_MULTICAST; 650 #endif 651 } 652 653 if (sc->hn_rx_filter != filter) { 654 error = hn_rndis_set_rxfilter(sc, filter); 655 if (!error) 656 sc->hn_rx_filter = filter; 657 } 658 return (error); 659 } 660 661 static int 662 hn_get_txswq_depth(const struct hn_tx_ring *txr) 663 { 664 665 KASSERT(txr->hn_txdesc_cnt > 0, ("tx ring is not setup yet")); 666 if (hn_tx_swq_depth < txr->hn_txdesc_cnt) 667 return txr->hn_txdesc_cnt; 668 return hn_tx_swq_depth; 669 } 670 671 static int 672 hn_rss_reconfig(struct hn_softc *sc) 673 { 674 int error; 675 676 HN_LOCK_ASSERT(sc); 677 678 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) 679 return (ENXIO); 680 681 /* 682 * Disable RSS first. 683 * 684 * NOTE: 685 * Direct reconfiguration by setting the UNCHG flags does 686 * _not_ work properly. 687 */ 688 if (bootverbose) 689 if_printf(sc->hn_ifp, "disable RSS\n"); 690 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_DISABLE); 691 if (error) { 692 if_printf(sc->hn_ifp, "RSS disable failed\n"); 693 return (error); 694 } 695 696 /* 697 * Reenable the RSS w/ the updated RSS key or indirect 698 * table. 699 */ 700 if (bootverbose) 701 if_printf(sc->hn_ifp, "reconfig RSS\n"); 702 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE); 703 if (error) { 704 if_printf(sc->hn_ifp, "RSS reconfig failed\n"); 705 return (error); 706 } 707 return (0); 708 } 709 710 static void 711 hn_rss_ind_fixup(struct hn_softc *sc, int nchan) 712 { 713 struct ndis_rssprm_toeplitz *rss = &sc->hn_rss; 714 int i; 715 716 KASSERT(nchan > 1, ("invalid # of channels %d", nchan)); 717 718 /* 719 * Check indirect table to make sure that all channels in it 720 * can be used. 721 */ 722 for (i = 0; i < NDIS_HASH_INDCNT; ++i) { 723 if (rss->rss_ind[i] >= nchan) { 724 if_printf(sc->hn_ifp, 725 "RSS indirect table %d fixup: %u -> %d\n", 726 i, rss->rss_ind[i], nchan - 1); 727 rss->rss_ind[i] = nchan - 1; 728 } 729 } 730 } 731 732 static int 733 hn_ifmedia_upd(struct ifnet *ifp __unused) 734 { 735 736 return EOPNOTSUPP; 737 } 738 739 static void 740 hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) 741 { 742 struct hn_softc *sc = ifp->if_softc; 743 744 ifmr->ifm_status = IFM_AVALID; 745 ifmr->ifm_active = IFM_ETHER; 746 747 if ((sc->hn_link_flags & HN_LINK_FLAG_LINKUP) == 0) { 748 ifmr->ifm_active |= IFM_NONE; 749 return; 750 } 751 ifmr->ifm_status |= IFM_ACTIVE; 752 ifmr->ifm_active |= IFM_10G_T | IFM_FDX; 753 } 754 755 /* {F8615163-DF3E-46c5-913F-F2D2F965ED0E} */ 756 static const struct hyperv_guid g_net_vsc_device_type = { 757 .hv_guid = {0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46, 758 0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E} 759 }; 760 761 static int 762 hn_probe(device_t dev) 763 { 764 765 if (VMBUS_PROBE_GUID(device_get_parent(dev), dev, 766 &g_net_vsc_device_type) == 0) { 767 device_set_desc(dev, "Hyper-V Network Interface"); 768 return BUS_PROBE_DEFAULT; 769 } 770 return ENXIO; 771 } 772 773 static int 774 hn_attach(device_t dev) 775 { 776 struct hn_softc *sc = device_get_softc(dev); 777 struct sysctl_oid_list *child; 778 struct sysctl_ctx_list *ctx; 779 uint8_t eaddr[ETHER_ADDR_LEN]; 780 struct ifnet *ifp = NULL; 781 int error, ring_cnt, tx_ring_cnt; 782 783 sc->hn_dev = dev; 784 sc->hn_prichan = vmbus_get_channel(dev); 785 HN_LOCK_INIT(sc); 786 787 /* 788 * Setup taskqueue for transmission. 789 */ 790 if (hn_tx_taskq == NULL) { 791 sc->hn_tx_taskq = taskqueue_create("hn_tx", M_WAITOK, 792 taskqueue_thread_enqueue, &sc->hn_tx_taskq); 793 if (hn_bind_tx_taskq >= 0) { 794 int cpu = hn_bind_tx_taskq; 795 cpuset_t cpu_set; 796 797 if (cpu > mp_ncpus - 1) 798 cpu = mp_ncpus - 1; 799 CPU_SETOF(cpu, &cpu_set); 800 taskqueue_start_threads_cpuset(&sc->hn_tx_taskq, 1, 801 PI_NET, &cpu_set, "%s tx", 802 device_get_nameunit(dev)); 803 } else { 804 taskqueue_start_threads(&sc->hn_tx_taskq, 1, PI_NET, 805 "%s tx", device_get_nameunit(dev)); 806 } 807 } else { 808 sc->hn_tx_taskq = hn_tx_taskq; 809 } 810 811 /* 812 * Setup taskqueue for mangement tasks, e.g. link status. 813 */ 814 sc->hn_mgmt_taskq0 = taskqueue_create("hn_mgmt", M_WAITOK, 815 taskqueue_thread_enqueue, &sc->hn_mgmt_taskq0); 816 taskqueue_start_threads(&sc->hn_mgmt_taskq0, 1, PI_NET, "%s mgmt", 817 device_get_nameunit(dev)); 818 TASK_INIT(&sc->hn_link_task, 0, hn_link_taskfunc, sc); 819 TASK_INIT(&sc->hn_netchg_init, 0, hn_netchg_init_taskfunc, sc); 820 TIMEOUT_TASK_INIT(sc->hn_mgmt_taskq0, &sc->hn_netchg_status, 0, 821 hn_netchg_status_taskfunc, sc); 822 823 /* 824 * Allocate ifnet and setup its name earlier, so that if_printf 825 * can be used by functions, which will be called after 826 * ether_ifattach(). 827 */ 828 ifp = sc->hn_ifp = if_alloc(IFT_ETHER); 829 ifp->if_softc = sc; 830 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 831 832 /* 833 * Initialize ifmedia earlier so that it can be unconditionally 834 * destroyed, if error happened later on. 835 */ 836 ifmedia_init(&sc->hn_media, 0, hn_ifmedia_upd, hn_ifmedia_sts); 837 838 /* 839 * Figure out the # of RX rings (ring_cnt) and the # of TX rings 840 * to use (tx_ring_cnt). 841 * 842 * NOTE: 843 * The # of RX rings to use is same as the # of channels to use. 844 */ 845 ring_cnt = hn_chan_cnt; 846 if (ring_cnt <= 0) { 847 /* Default */ 848 ring_cnt = mp_ncpus; 849 if (ring_cnt > HN_RING_CNT_DEF_MAX) 850 ring_cnt = HN_RING_CNT_DEF_MAX; 851 } else if (ring_cnt > mp_ncpus) { 852 ring_cnt = mp_ncpus; 853 } 854 855 tx_ring_cnt = hn_tx_ring_cnt; 856 if (tx_ring_cnt <= 0 || tx_ring_cnt > ring_cnt) 857 tx_ring_cnt = ring_cnt; 858 #ifdef HN_IFSTART_SUPPORT 859 if (hn_use_if_start) { 860 /* ifnet.if_start only needs one TX ring. */ 861 tx_ring_cnt = 1; 862 } 863 #endif 864 865 /* 866 * Set the leader CPU for channels. 867 */ 868 sc->hn_cpu = atomic_fetchadd_int(&hn_cpu_index, ring_cnt) % mp_ncpus; 869 870 /* 871 * Create enough TX/RX rings, even if only limited number of 872 * channels can be allocated. 873 */ 874 error = hn_create_tx_data(sc, tx_ring_cnt); 875 if (error) 876 goto failed; 877 error = hn_create_rx_data(sc, ring_cnt); 878 if (error) 879 goto failed; 880 881 /* 882 * Create transaction context for NVS and RNDIS transactions. 883 */ 884 sc->hn_xact = vmbus_xact_ctx_create(bus_get_dma_tag(dev), 885 HN_XACT_REQ_SIZE, HN_XACT_RESP_SIZE, 0); 886 if (sc->hn_xact == NULL) 887 goto failed; 888 889 /* 890 * Attach the synthetic parts, i.e. NVS and RNDIS. 891 */ 892 error = hn_synth_attach(sc, ETHERMTU); 893 if (error) 894 goto failed; 895 896 error = hn_rndis_get_eaddr(sc, eaddr); 897 if (error) 898 goto failed; 899 900 #if __FreeBSD_version >= 1100099 901 if (sc->hn_rx_ring_inuse > 1) { 902 /* 903 * Reduce TCP segment aggregation limit for multiple 904 * RX rings to increase ACK timeliness. 905 */ 906 hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MULTIRX_DEF); 907 } 908 #endif 909 910 /* 911 * Fixup TX stuffs after synthetic parts are attached. 912 */ 913 hn_fixup_tx_data(sc); 914 915 ctx = device_get_sysctl_ctx(dev); 916 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); 917 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "nvs_version", CTLFLAG_RD, 918 &sc->hn_nvs_ver, 0, "NVS version"); 919 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "ndis_version", 920 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 921 hn_ndis_version_sysctl, "A", "NDIS version"); 922 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "caps", 923 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 924 hn_caps_sysctl, "A", "capabilities"); 925 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "hwassist", 926 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 927 hn_hwassist_sysctl, "A", "hwassist"); 928 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxfilter", 929 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 930 hn_rxfilter_sysctl, "A", "rxfilter"); 931 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_hash", 932 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 933 hn_rss_hash_sysctl, "A", "RSS hash"); 934 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rss_ind_size", 935 CTLFLAG_RD, &sc->hn_rss_ind_size, 0, "RSS indirect entry count"); 936 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_key", 937 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 938 hn_rss_key_sysctl, "IU", "RSS key"); 939 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_ind", 940 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 941 hn_rss_ind_sysctl, "IU", "RSS indirect table"); 942 943 /* 944 * Setup the ifmedia, which has been initialized earlier. 945 */ 946 ifmedia_add(&sc->hn_media, IFM_ETHER | IFM_AUTO, 0, NULL); 947 ifmedia_set(&sc->hn_media, IFM_ETHER | IFM_AUTO); 948 /* XXX ifmedia_set really should do this for us */ 949 sc->hn_media.ifm_media = sc->hn_media.ifm_cur->ifm_media; 950 951 /* 952 * Setup the ifnet for this interface. 953 */ 954 955 ifp->if_baudrate = IF_Gbps(10); 956 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 957 ifp->if_ioctl = hn_ioctl; 958 ifp->if_init = hn_init; 959 #ifdef HN_IFSTART_SUPPORT 960 if (hn_use_if_start) { 961 int qdepth = hn_get_txswq_depth(&sc->hn_tx_ring[0]); 962 963 ifp->if_start = hn_start; 964 IFQ_SET_MAXLEN(&ifp->if_snd, qdepth); 965 ifp->if_snd.ifq_drv_maxlen = qdepth - 1; 966 IFQ_SET_READY(&ifp->if_snd); 967 } else 968 #endif 969 { 970 ifp->if_transmit = hn_transmit; 971 ifp->if_qflush = hn_xmit_qflush; 972 } 973 974 ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_LRO; 975 #ifdef foo 976 /* We can't diff IPv6 packets from IPv4 packets on RX path. */ 977 ifp->if_capabilities |= IFCAP_RXCSUM_IPV6; 978 #endif 979 if (sc->hn_caps & HN_CAP_VLAN) { 980 /* XXX not sure about VLAN_MTU. */ 981 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU; 982 } 983 984 ifp->if_hwassist = sc->hn_tx_ring[0].hn_csum_assist; 985 if (ifp->if_hwassist & HN_CSUM_IP_MASK) 986 ifp->if_capabilities |= IFCAP_TXCSUM; 987 if (ifp->if_hwassist & HN_CSUM_IP6_MASK) 988 ifp->if_capabilities |= IFCAP_TXCSUM_IPV6; 989 if (sc->hn_caps & HN_CAP_TSO4) { 990 ifp->if_capabilities |= IFCAP_TSO4; 991 ifp->if_hwassist |= CSUM_IP_TSO; 992 } 993 if (sc->hn_caps & HN_CAP_TSO6) { 994 ifp->if_capabilities |= IFCAP_TSO6; 995 ifp->if_hwassist |= CSUM_IP6_TSO; 996 } 997 998 /* Enable all available capabilities by default. */ 999 ifp->if_capenable = ifp->if_capabilities; 1000 1001 if (ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) { 1002 hn_set_tso_maxsize(sc, hn_tso_maxlen, ETHERMTU); 1003 ifp->if_hw_tsomaxsegcount = HN_TX_DATA_SEGCNT_MAX; 1004 ifp->if_hw_tsomaxsegsize = PAGE_SIZE; 1005 } 1006 1007 ether_ifattach(ifp, eaddr); 1008 1009 if ((ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) && bootverbose) { 1010 if_printf(ifp, "TSO segcnt %u segsz %u\n", 1011 ifp->if_hw_tsomaxsegcount, ifp->if_hw_tsomaxsegsize); 1012 } 1013 1014 /* Inform the upper layer about the long frame support. */ 1015 ifp->if_hdrlen = sizeof(struct ether_vlan_header); 1016 1017 /* 1018 * Kick off link status check. 1019 */ 1020 sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0; 1021 hn_update_link_status(sc); 1022 1023 return (0); 1024 failed: 1025 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) 1026 hn_synth_detach(sc); 1027 hn_detach(dev); 1028 return (error); 1029 } 1030 1031 static int 1032 hn_detach(device_t dev) 1033 { 1034 struct hn_softc *sc = device_get_softc(dev); 1035 struct ifnet *ifp = sc->hn_ifp; 1036 1037 if (device_is_attached(dev)) { 1038 HN_LOCK(sc); 1039 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) { 1040 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 1041 hn_stop(sc); 1042 /* 1043 * NOTE: 1044 * hn_stop() only suspends data, so managment 1045 * stuffs have to be suspended manually here. 1046 */ 1047 hn_suspend_mgmt(sc); 1048 hn_synth_detach(sc); 1049 } 1050 HN_UNLOCK(sc); 1051 ether_ifdetach(ifp); 1052 } 1053 1054 ifmedia_removeall(&sc->hn_media); 1055 hn_destroy_rx_data(sc); 1056 hn_destroy_tx_data(sc); 1057 1058 if (sc->hn_tx_taskq != hn_tx_taskq) 1059 taskqueue_free(sc->hn_tx_taskq); 1060 taskqueue_free(sc->hn_mgmt_taskq0); 1061 1062 if (sc->hn_xact != NULL) 1063 vmbus_xact_ctx_destroy(sc->hn_xact); 1064 1065 if_free(ifp); 1066 1067 HN_LOCK_DESTROY(sc); 1068 return (0); 1069 } 1070 1071 static int 1072 hn_shutdown(device_t dev) 1073 { 1074 1075 return (0); 1076 } 1077 1078 static void 1079 hn_link_status(struct hn_softc *sc) 1080 { 1081 uint32_t link_status; 1082 int error; 1083 1084 error = hn_rndis_get_linkstatus(sc, &link_status); 1085 if (error) { 1086 /* XXX what to do? */ 1087 return; 1088 } 1089 1090 if (link_status == NDIS_MEDIA_STATE_CONNECTED) 1091 sc->hn_link_flags |= HN_LINK_FLAG_LINKUP; 1092 else 1093 sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP; 1094 if_link_state_change(sc->hn_ifp, 1095 (sc->hn_link_flags & HN_LINK_FLAG_LINKUP) ? 1096 LINK_STATE_UP : LINK_STATE_DOWN); 1097 } 1098 1099 static void 1100 hn_link_taskfunc(void *xsc, int pending __unused) 1101 { 1102 struct hn_softc *sc = xsc; 1103 1104 if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG) 1105 return; 1106 hn_link_status(sc); 1107 } 1108 1109 static void 1110 hn_netchg_init_taskfunc(void *xsc, int pending __unused) 1111 { 1112 struct hn_softc *sc = xsc; 1113 1114 /* Prevent any link status checks from running. */ 1115 sc->hn_link_flags |= HN_LINK_FLAG_NETCHG; 1116 1117 /* 1118 * Fake up a [link down --> link up] state change; 5 seconds 1119 * delay is used, which closely simulates miibus reaction 1120 * upon link down event. 1121 */ 1122 sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP; 1123 if_link_state_change(sc->hn_ifp, LINK_STATE_DOWN); 1124 taskqueue_enqueue_timeout(sc->hn_mgmt_taskq0, 1125 &sc->hn_netchg_status, 5 * hz); 1126 } 1127 1128 static void 1129 hn_netchg_status_taskfunc(void *xsc, int pending __unused) 1130 { 1131 struct hn_softc *sc = xsc; 1132 1133 /* Re-allow link status checks. */ 1134 sc->hn_link_flags &= ~HN_LINK_FLAG_NETCHG; 1135 hn_link_status(sc); 1136 } 1137 1138 static void 1139 hn_update_link_status(struct hn_softc *sc) 1140 { 1141 1142 if (sc->hn_mgmt_taskq != NULL) 1143 taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_link_task); 1144 } 1145 1146 static void 1147 hn_change_network(struct hn_softc *sc) 1148 { 1149 1150 if (sc->hn_mgmt_taskq != NULL) 1151 taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_netchg_init); 1152 } 1153 1154 static __inline int 1155 hn_txdesc_dmamap_load(struct hn_tx_ring *txr, struct hn_txdesc *txd, 1156 struct mbuf **m_head, bus_dma_segment_t *segs, int *nsegs) 1157 { 1158 struct mbuf *m = *m_head; 1159 int error; 1160 1161 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID, ("txd uses chim")); 1162 1163 error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap, 1164 m, segs, nsegs, BUS_DMA_NOWAIT); 1165 if (error == EFBIG) { 1166 struct mbuf *m_new; 1167 1168 m_new = m_collapse(m, M_NOWAIT, HN_TX_DATA_SEGCNT_MAX); 1169 if (m_new == NULL) 1170 return ENOBUFS; 1171 else 1172 *m_head = m = m_new; 1173 txr->hn_tx_collapsed++; 1174 1175 error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, 1176 txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT); 1177 } 1178 if (!error) { 1179 bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap, 1180 BUS_DMASYNC_PREWRITE); 1181 txd->flags |= HN_TXD_FLAG_DMAMAP; 1182 } 1183 return error; 1184 } 1185 1186 static __inline int 1187 hn_txdesc_put(struct hn_tx_ring *txr, struct hn_txdesc *txd) 1188 { 1189 1190 KASSERT((txd->flags & HN_TXD_FLAG_ONLIST) == 0, 1191 ("put an onlist txd %#x", txd->flags)); 1192 1193 KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs)); 1194 if (atomic_fetchadd_int(&txd->refs, -1) != 1) 1195 return 0; 1196 1197 if (txd->chim_index != HN_NVS_CHIM_IDX_INVALID) { 1198 KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, 1199 ("chim txd uses dmamap")); 1200 hn_chim_free(txr->hn_sc, txd->chim_index); 1201 txd->chim_index = HN_NVS_CHIM_IDX_INVALID; 1202 } else if (txd->flags & HN_TXD_FLAG_DMAMAP) { 1203 bus_dmamap_sync(txr->hn_tx_data_dtag, 1204 txd->data_dmap, BUS_DMASYNC_POSTWRITE); 1205 bus_dmamap_unload(txr->hn_tx_data_dtag, 1206 txd->data_dmap); 1207 txd->flags &= ~HN_TXD_FLAG_DMAMAP; 1208 } 1209 1210 if (txd->m != NULL) { 1211 m_freem(txd->m); 1212 txd->m = NULL; 1213 } 1214 1215 txd->flags |= HN_TXD_FLAG_ONLIST; 1216 #ifndef HN_USE_TXDESC_BUFRING 1217 mtx_lock_spin(&txr->hn_txlist_spin); 1218 KASSERT(txr->hn_txdesc_avail >= 0 && 1219 txr->hn_txdesc_avail < txr->hn_txdesc_cnt, 1220 ("txdesc_put: invalid txd avail %d", txr->hn_txdesc_avail)); 1221 txr->hn_txdesc_avail++; 1222 SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link); 1223 mtx_unlock_spin(&txr->hn_txlist_spin); 1224 #else 1225 atomic_add_int(&txr->hn_txdesc_avail, 1); 1226 buf_ring_enqueue(txr->hn_txdesc_br, txd); 1227 #endif 1228 1229 return 1; 1230 } 1231 1232 static __inline struct hn_txdesc * 1233 hn_txdesc_get(struct hn_tx_ring *txr) 1234 { 1235 struct hn_txdesc *txd; 1236 1237 #ifndef HN_USE_TXDESC_BUFRING 1238 mtx_lock_spin(&txr->hn_txlist_spin); 1239 txd = SLIST_FIRST(&txr->hn_txlist); 1240 if (txd != NULL) { 1241 KASSERT(txr->hn_txdesc_avail > 0, 1242 ("txdesc_get: invalid txd avail %d", txr->hn_txdesc_avail)); 1243 txr->hn_txdesc_avail--; 1244 SLIST_REMOVE_HEAD(&txr->hn_txlist, link); 1245 } 1246 mtx_unlock_spin(&txr->hn_txlist_spin); 1247 #else 1248 txd = buf_ring_dequeue_sc(txr->hn_txdesc_br); 1249 #endif 1250 1251 if (txd != NULL) { 1252 #ifdef HN_USE_TXDESC_BUFRING 1253 atomic_subtract_int(&txr->hn_txdesc_avail, 1); 1254 #endif 1255 KASSERT(txd->m == NULL && txd->refs == 0 && 1256 txd->chim_index == HN_NVS_CHIM_IDX_INVALID && 1257 (txd->flags & HN_TXD_FLAG_ONLIST) && 1258 (txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("invalid txd")); 1259 txd->flags &= ~HN_TXD_FLAG_ONLIST; 1260 txd->refs = 1; 1261 } 1262 return txd; 1263 } 1264 1265 static __inline void 1266 hn_txdesc_hold(struct hn_txdesc *txd) 1267 { 1268 1269 /* 0->1 transition will never work */ 1270 KASSERT(txd->refs > 0, ("invalid refs %d", txd->refs)); 1271 atomic_add_int(&txd->refs, 1); 1272 } 1273 1274 static bool 1275 hn_tx_ring_pending(struct hn_tx_ring *txr) 1276 { 1277 bool pending = false; 1278 1279 #ifndef HN_USE_TXDESC_BUFRING 1280 mtx_lock_spin(&txr->hn_txlist_spin); 1281 if (txr->hn_txdesc_avail != txr->hn_txdesc_cnt) 1282 pending = true; 1283 mtx_unlock_spin(&txr->hn_txlist_spin); 1284 #else 1285 if (!buf_ring_full(txr->hn_txdesc_br)) 1286 pending = true; 1287 #endif 1288 return (pending); 1289 } 1290 1291 static __inline void 1292 hn_txeof(struct hn_tx_ring *txr) 1293 { 1294 txr->hn_has_txeof = 0; 1295 txr->hn_txeof(txr); 1296 } 1297 1298 static void 1299 hn_txpkt_done(struct hn_nvs_sendctx *sndc, struct hn_softc *sc, 1300 struct vmbus_channel *chan, const void *data __unused, int dlen __unused) 1301 { 1302 struct hn_txdesc *txd = sndc->hn_cbarg; 1303 struct hn_tx_ring *txr; 1304 1305 txr = txd->txr; 1306 KASSERT(txr->hn_chan == chan, 1307 ("channel mismatch, on chan%u, should be chan%u", 1308 vmbus_chan_subidx(chan), vmbus_chan_subidx(txr->hn_chan))); 1309 1310 txr->hn_has_txeof = 1; 1311 hn_txdesc_put(txr, txd); 1312 1313 ++txr->hn_txdone_cnt; 1314 if (txr->hn_txdone_cnt >= HN_EARLY_TXEOF_THRESH) { 1315 txr->hn_txdone_cnt = 0; 1316 if (txr->hn_oactive) 1317 hn_txeof(txr); 1318 } 1319 } 1320 1321 static void 1322 hn_chan_rollup(struct hn_rx_ring *rxr, struct hn_tx_ring *txr) 1323 { 1324 #if defined(INET) || defined(INET6) 1325 tcp_lro_flush_all(&rxr->hn_lro); 1326 #endif 1327 1328 /* 1329 * NOTE: 1330 * 'txr' could be NULL, if multiple channels and 1331 * ifnet.if_start method are enabled. 1332 */ 1333 if (txr == NULL || !txr->hn_has_txeof) 1334 return; 1335 1336 txr->hn_txdone_cnt = 0; 1337 hn_txeof(txr); 1338 } 1339 1340 static __inline uint32_t 1341 hn_rndis_pktmsg_offset(uint32_t ofs) 1342 { 1343 1344 KASSERT(ofs >= sizeof(struct rndis_packet_msg), 1345 ("invalid RNDIS packet msg offset %u", ofs)); 1346 return (ofs - __offsetof(struct rndis_packet_msg, rm_dataoffset)); 1347 } 1348 1349 static __inline void * 1350 hn_rndis_pktinfo_append(struct rndis_packet_msg *pkt, size_t pktsize, 1351 size_t pi_dlen, uint32_t pi_type) 1352 { 1353 const size_t pi_size = HN_RNDIS_PKTINFO_SIZE(pi_dlen); 1354 struct rndis_pktinfo *pi; 1355 1356 KASSERT((pi_size & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK) == 0, 1357 ("unaligned pktinfo size %zu, pktinfo dlen %zu", pi_size, pi_dlen)); 1358 1359 /* 1360 * Per-packet-info does not move; it only grows. 1361 * 1362 * NOTE: 1363 * rm_pktinfooffset in this phase counts from the beginning 1364 * of rndis_packet_msg. 1365 */ 1366 KASSERT(pkt->rm_pktinfooffset + pkt->rm_pktinfolen + pi_size <= pktsize, 1367 ("%u pktinfo overflows RNDIS packet msg", pi_type)); 1368 pi = (struct rndis_pktinfo *)((uint8_t *)pkt + pkt->rm_pktinfooffset + 1369 pkt->rm_pktinfolen); 1370 pkt->rm_pktinfolen += pi_size; 1371 1372 pi->rm_size = pi_size; 1373 pi->rm_type = pi_type; 1374 pi->rm_pktinfooffset = RNDIS_PKTINFO_OFFSET; 1375 1376 /* Data immediately follow per-packet-info. */ 1377 pkt->rm_dataoffset += pi_size; 1378 1379 /* Update RNDIS packet msg length */ 1380 pkt->rm_len += pi_size; 1381 1382 return (pi->rm_data); 1383 } 1384 1385 /* 1386 * NOTE: 1387 * If this function fails, then both txd and m_head0 will be freed. 1388 */ 1389 static int 1390 hn_encap(struct hn_tx_ring *txr, struct hn_txdesc *txd, struct mbuf **m_head0) 1391 { 1392 bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX]; 1393 int error, nsegs, i; 1394 struct mbuf *m_head = *m_head0; 1395 struct rndis_packet_msg *pkt; 1396 uint32_t *pi_data; 1397 void *chim = NULL; 1398 int pktlen; 1399 1400 pkt = txd->rndis_pkt; 1401 if (m_head->m_pkthdr.len + HN_RNDIS_PKT_LEN < txr->hn_chim_size) { 1402 /* 1403 * This packet is small enough to fit into a chimney sending 1404 * buffer. Try allocating one chimney sending buffer now. 1405 */ 1406 txr->hn_tx_chimney_tried++; 1407 txd->chim_index = hn_chim_alloc(txr->hn_sc); 1408 if (txd->chim_index != HN_NVS_CHIM_IDX_INVALID) { 1409 chim = txr->hn_sc->hn_chim + 1410 (txd->chim_index * txr->hn_sc->hn_chim_szmax); 1411 /* 1412 * Directly fill the chimney sending buffer w/ the 1413 * RNDIS packet message. 1414 */ 1415 pkt = chim; 1416 } 1417 } 1418 1419 pkt->rm_type = REMOTE_NDIS_PACKET_MSG; 1420 pkt->rm_len = sizeof(*pkt) + m_head->m_pkthdr.len; 1421 pkt->rm_dataoffset = sizeof(*pkt); 1422 pkt->rm_datalen = m_head->m_pkthdr.len; 1423 pkt->rm_pktinfooffset = sizeof(*pkt); 1424 pkt->rm_pktinfolen = 0; 1425 1426 if (txr->hn_tx_flags & HN_TX_FLAG_HASHVAL) { 1427 /* 1428 * Set the hash value for this packet, so that the host could 1429 * dispatch the TX done event for this packet back to this TX 1430 * ring's channel. 1431 */ 1432 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 1433 HN_NDIS_HASH_VALUE_SIZE, HN_NDIS_PKTINFO_TYPE_HASHVAL); 1434 *pi_data = txr->hn_tx_idx; 1435 } 1436 1437 if (m_head->m_flags & M_VLANTAG) { 1438 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 1439 NDIS_VLAN_INFO_SIZE, NDIS_PKTINFO_TYPE_VLAN); 1440 *pi_data = NDIS_VLAN_INFO_MAKE( 1441 EVL_VLANOFTAG(m_head->m_pkthdr.ether_vtag), 1442 EVL_PRIOFTAG(m_head->m_pkthdr.ether_vtag), 1443 EVL_CFIOFTAG(m_head->m_pkthdr.ether_vtag)); 1444 } 1445 1446 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 1447 #if defined(INET6) || defined(INET) 1448 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 1449 NDIS_LSO2_INFO_SIZE, NDIS_PKTINFO_TYPE_LSO); 1450 #ifdef INET 1451 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) { 1452 *pi_data = NDIS_LSO2_INFO_MAKEIPV4(0, 1453 m_head->m_pkthdr.tso_segsz); 1454 } 1455 #endif 1456 #if defined(INET6) && defined(INET) 1457 else 1458 #endif 1459 #ifdef INET6 1460 { 1461 *pi_data = NDIS_LSO2_INFO_MAKEIPV6(0, 1462 m_head->m_pkthdr.tso_segsz); 1463 } 1464 #endif 1465 #endif /* INET6 || INET */ 1466 } else if (m_head->m_pkthdr.csum_flags & txr->hn_csum_assist) { 1467 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 1468 NDIS_TXCSUM_INFO_SIZE, NDIS_PKTINFO_TYPE_CSUM); 1469 if (m_head->m_pkthdr.csum_flags & 1470 (CSUM_IP6_TCP | CSUM_IP6_UDP)) { 1471 *pi_data = NDIS_TXCSUM_INFO_IPV6; 1472 } else { 1473 *pi_data = NDIS_TXCSUM_INFO_IPV4; 1474 if (m_head->m_pkthdr.csum_flags & CSUM_IP) 1475 *pi_data |= NDIS_TXCSUM_INFO_IPCS; 1476 } 1477 1478 if (m_head->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP)) 1479 *pi_data |= NDIS_TXCSUM_INFO_TCPCS; 1480 else if (m_head->m_pkthdr.csum_flags & 1481 (CSUM_IP_UDP | CSUM_IP6_UDP)) 1482 *pi_data |= NDIS_TXCSUM_INFO_UDPCS; 1483 } 1484 1485 pktlen = pkt->rm_pktinfooffset + pkt->rm_pktinfolen; 1486 /* Convert RNDIS packet message offsets */ 1487 pkt->rm_dataoffset = hn_rndis_pktmsg_offset(pkt->rm_dataoffset); 1488 pkt->rm_pktinfooffset = hn_rndis_pktmsg_offset(pkt->rm_pktinfooffset); 1489 1490 /* 1491 * Fast path: Chimney sending. 1492 */ 1493 if (chim != NULL) { 1494 KASSERT(txd->chim_index != HN_NVS_CHIM_IDX_INVALID, 1495 ("chimney buffer is not used")); 1496 KASSERT(pkt == chim, ("RNDIS pkt not in chimney buffer")); 1497 1498 m_copydata(m_head, 0, m_head->m_pkthdr.len, 1499 ((uint8_t *)chim) + pktlen); 1500 1501 txd->chim_size = pkt->rm_len; 1502 txr->hn_gpa_cnt = 0; 1503 txr->hn_tx_chimney++; 1504 txr->hn_sendpkt = hn_txpkt_chim; 1505 goto done; 1506 } 1507 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID, 1508 ("chimney buffer is used")); 1509 KASSERT(pkt == txd->rndis_pkt, ("RNDIS pkt not in txdesc")); 1510 1511 error = hn_txdesc_dmamap_load(txr, txd, &m_head, segs, &nsegs); 1512 if (error) { 1513 int freed; 1514 1515 /* 1516 * This mbuf is not linked w/ the txd yet, so free it now. 1517 */ 1518 m_freem(m_head); 1519 *m_head0 = NULL; 1520 1521 freed = hn_txdesc_put(txr, txd); 1522 KASSERT(freed != 0, 1523 ("fail to free txd upon txdma error")); 1524 1525 txr->hn_txdma_failed++; 1526 if_inc_counter(txr->hn_sc->hn_ifp, IFCOUNTER_OERRORS, 1); 1527 return error; 1528 } 1529 *m_head0 = m_head; 1530 1531 /* +1 RNDIS packet message */ 1532 txr->hn_gpa_cnt = nsegs + 1; 1533 1534 /* send packet with page buffer */ 1535 txr->hn_gpa[0].gpa_page = atop(txd->rndis_pkt_paddr); 1536 txr->hn_gpa[0].gpa_ofs = txd->rndis_pkt_paddr & PAGE_MASK; 1537 txr->hn_gpa[0].gpa_len = pktlen; 1538 1539 /* 1540 * Fill the page buffers with mbuf info after the page 1541 * buffer for RNDIS packet message. 1542 */ 1543 for (i = 0; i < nsegs; ++i) { 1544 struct vmbus_gpa *gpa = &txr->hn_gpa[i + 1]; 1545 1546 gpa->gpa_page = atop(segs[i].ds_addr); 1547 gpa->gpa_ofs = segs[i].ds_addr & PAGE_MASK; 1548 gpa->gpa_len = segs[i].ds_len; 1549 } 1550 1551 txd->chim_index = HN_NVS_CHIM_IDX_INVALID; 1552 txd->chim_size = 0; 1553 txr->hn_sendpkt = hn_txpkt_sglist; 1554 done: 1555 txd->m = m_head; 1556 1557 /* Set the completion routine */ 1558 hn_nvs_sendctx_init(&txd->send_ctx, hn_txpkt_done, txd); 1559 1560 return 0; 1561 } 1562 1563 /* 1564 * NOTE: 1565 * If this function fails, then txd will be freed, but the mbuf 1566 * associated w/ the txd will _not_ be freed. 1567 */ 1568 static int 1569 hn_txpkt(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd) 1570 { 1571 int error, send_failed = 0; 1572 1573 again: 1574 /* 1575 * Make sure that txd is not freed before ETHER_BPF_MTAP. 1576 */ 1577 hn_txdesc_hold(txd); 1578 error = txr->hn_sendpkt(txr, txd); 1579 if (!error) { 1580 ETHER_BPF_MTAP(ifp, txd->m); 1581 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 1582 #ifdef HN_IFSTART_SUPPORT 1583 if (!hn_use_if_start) 1584 #endif 1585 { 1586 if_inc_counter(ifp, IFCOUNTER_OBYTES, 1587 txd->m->m_pkthdr.len); 1588 if (txd->m->m_flags & M_MCAST) 1589 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 1590 } 1591 txr->hn_pkts++; 1592 } 1593 hn_txdesc_put(txr, txd); 1594 1595 if (__predict_false(error)) { 1596 int freed; 1597 1598 /* 1599 * This should "really rarely" happen. 1600 * 1601 * XXX Too many RX to be acked or too many sideband 1602 * commands to run? Ask netvsc_channel_rollup() 1603 * to kick start later. 1604 */ 1605 txr->hn_has_txeof = 1; 1606 if (!send_failed) { 1607 txr->hn_send_failed++; 1608 send_failed = 1; 1609 /* 1610 * Try sending again after set hn_has_txeof; 1611 * in case that we missed the last 1612 * netvsc_channel_rollup(). 1613 */ 1614 goto again; 1615 } 1616 if_printf(ifp, "send failed\n"); 1617 1618 /* 1619 * Caller will perform further processing on the 1620 * associated mbuf, so don't free it in hn_txdesc_put(); 1621 * only unload it from the DMA map in hn_txdesc_put(), 1622 * if it was loaded. 1623 */ 1624 txd->m = NULL; 1625 freed = hn_txdesc_put(txr, txd); 1626 KASSERT(freed != 0, 1627 ("fail to free txd upon send error")); 1628 1629 txr->hn_send_failed++; 1630 } 1631 return error; 1632 } 1633 1634 /* 1635 * Append the specified data to the indicated mbuf chain, 1636 * Extend the mbuf chain if the new data does not fit in 1637 * existing space. 1638 * 1639 * This is a minor rewrite of m_append() from sys/kern/uipc_mbuf.c. 1640 * There should be an equivalent in the kernel mbuf code, 1641 * but there does not appear to be one yet. 1642 * 1643 * Differs from m_append() in that additional mbufs are 1644 * allocated with cluster size MJUMPAGESIZE, and filled 1645 * accordingly. 1646 * 1647 * Return 1 if able to complete the job; otherwise 0. 1648 */ 1649 static int 1650 hv_m_append(struct mbuf *m0, int len, c_caddr_t cp) 1651 { 1652 struct mbuf *m, *n; 1653 int remainder, space; 1654 1655 for (m = m0; m->m_next != NULL; m = m->m_next) 1656 ; 1657 remainder = len; 1658 space = M_TRAILINGSPACE(m); 1659 if (space > 0) { 1660 /* 1661 * Copy into available space. 1662 */ 1663 if (space > remainder) 1664 space = remainder; 1665 bcopy(cp, mtod(m, caddr_t) + m->m_len, space); 1666 m->m_len += space; 1667 cp += space; 1668 remainder -= space; 1669 } 1670 while (remainder > 0) { 1671 /* 1672 * Allocate a new mbuf; could check space 1673 * and allocate a cluster instead. 1674 */ 1675 n = m_getjcl(M_NOWAIT, m->m_type, 0, MJUMPAGESIZE); 1676 if (n == NULL) 1677 break; 1678 n->m_len = min(MJUMPAGESIZE, remainder); 1679 bcopy(cp, mtod(n, caddr_t), n->m_len); 1680 cp += n->m_len; 1681 remainder -= n->m_len; 1682 m->m_next = n; 1683 m = n; 1684 } 1685 if (m0->m_flags & M_PKTHDR) 1686 m0->m_pkthdr.len += len - remainder; 1687 1688 return (remainder == 0); 1689 } 1690 1691 #if defined(INET) || defined(INET6) 1692 static __inline int 1693 hn_lro_rx(struct lro_ctrl *lc, struct mbuf *m) 1694 { 1695 #if __FreeBSD_version >= 1100095 1696 if (hn_lro_mbufq_depth) { 1697 tcp_lro_queue_mbuf(lc, m); 1698 return 0; 1699 } 1700 #endif 1701 return tcp_lro_rx(lc, m, 0); 1702 } 1703 #endif 1704 1705 static int 1706 hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen, 1707 const struct hn_rxinfo *info) 1708 { 1709 struct ifnet *ifp = rxr->hn_ifp; 1710 struct mbuf *m_new; 1711 int size, do_lro = 0, do_csum = 1; 1712 int hash_type; 1713 1714 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) 1715 return (0); 1716 1717 /* 1718 * Bail out if packet contains more data than configured MTU. 1719 */ 1720 if (dlen > (ifp->if_mtu + ETHER_HDR_LEN)) { 1721 return (0); 1722 } else if (dlen <= MHLEN) { 1723 m_new = m_gethdr(M_NOWAIT, MT_DATA); 1724 if (m_new == NULL) { 1725 if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); 1726 return (0); 1727 } 1728 memcpy(mtod(m_new, void *), data, dlen); 1729 m_new->m_pkthdr.len = m_new->m_len = dlen; 1730 rxr->hn_small_pkts++; 1731 } else { 1732 /* 1733 * Get an mbuf with a cluster. For packets 2K or less, 1734 * get a standard 2K cluster. For anything larger, get a 1735 * 4K cluster. Any buffers larger than 4K can cause problems 1736 * if looped around to the Hyper-V TX channel, so avoid them. 1737 */ 1738 size = MCLBYTES; 1739 if (dlen > MCLBYTES) { 1740 /* 4096 */ 1741 size = MJUMPAGESIZE; 1742 } 1743 1744 m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size); 1745 if (m_new == NULL) { 1746 if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); 1747 return (0); 1748 } 1749 1750 hv_m_append(m_new, dlen, data); 1751 } 1752 m_new->m_pkthdr.rcvif = ifp; 1753 1754 if (__predict_false((ifp->if_capenable & IFCAP_RXCSUM) == 0)) 1755 do_csum = 0; 1756 1757 /* receive side checksum offload */ 1758 if (info->csum_info != HN_NDIS_RXCSUM_INFO_INVALID) { 1759 /* IP csum offload */ 1760 if ((info->csum_info & NDIS_RXCSUM_INFO_IPCS_OK) && do_csum) { 1761 m_new->m_pkthdr.csum_flags |= 1762 (CSUM_IP_CHECKED | CSUM_IP_VALID); 1763 rxr->hn_csum_ip++; 1764 } 1765 1766 /* TCP/UDP csum offload */ 1767 if ((info->csum_info & (NDIS_RXCSUM_INFO_UDPCS_OK | 1768 NDIS_RXCSUM_INFO_TCPCS_OK)) && do_csum) { 1769 m_new->m_pkthdr.csum_flags |= 1770 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 1771 m_new->m_pkthdr.csum_data = 0xffff; 1772 if (info->csum_info & NDIS_RXCSUM_INFO_TCPCS_OK) 1773 rxr->hn_csum_tcp++; 1774 else 1775 rxr->hn_csum_udp++; 1776 } 1777 1778 /* 1779 * XXX 1780 * As of this write (Oct 28th, 2016), host side will turn 1781 * on only TCPCS_OK and IPCS_OK even for UDP datagrams, so 1782 * the do_lro setting here is actually _not_ accurate. We 1783 * depend on the RSS hash type check to reset do_lro. 1784 */ 1785 if ((info->csum_info & 1786 (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) == 1787 (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) 1788 do_lro = 1; 1789 } else { 1790 const struct ether_header *eh; 1791 uint16_t etype; 1792 int hoff; 1793 1794 hoff = sizeof(*eh); 1795 if (m_new->m_len < hoff) 1796 goto skip; 1797 eh = mtod(m_new, struct ether_header *); 1798 etype = ntohs(eh->ether_type); 1799 if (etype == ETHERTYPE_VLAN) { 1800 const struct ether_vlan_header *evl; 1801 1802 hoff = sizeof(*evl); 1803 if (m_new->m_len < hoff) 1804 goto skip; 1805 evl = mtod(m_new, struct ether_vlan_header *); 1806 etype = ntohs(evl->evl_proto); 1807 } 1808 1809 if (etype == ETHERTYPE_IP) { 1810 int pr; 1811 1812 pr = hn_check_iplen(m_new, hoff); 1813 if (pr == IPPROTO_TCP) { 1814 if (do_csum && 1815 (rxr->hn_trust_hcsum & 1816 HN_TRUST_HCSUM_TCP)) { 1817 rxr->hn_csum_trusted++; 1818 m_new->m_pkthdr.csum_flags |= 1819 (CSUM_IP_CHECKED | CSUM_IP_VALID | 1820 CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 1821 m_new->m_pkthdr.csum_data = 0xffff; 1822 } 1823 do_lro = 1; 1824 } else if (pr == IPPROTO_UDP) { 1825 if (do_csum && 1826 (rxr->hn_trust_hcsum & 1827 HN_TRUST_HCSUM_UDP)) { 1828 rxr->hn_csum_trusted++; 1829 m_new->m_pkthdr.csum_flags |= 1830 (CSUM_IP_CHECKED | CSUM_IP_VALID | 1831 CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 1832 m_new->m_pkthdr.csum_data = 0xffff; 1833 } 1834 } else if (pr != IPPROTO_DONE && do_csum && 1835 (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_IP)) { 1836 rxr->hn_csum_trusted++; 1837 m_new->m_pkthdr.csum_flags |= 1838 (CSUM_IP_CHECKED | CSUM_IP_VALID); 1839 } 1840 } 1841 } 1842 skip: 1843 if (info->vlan_info != HN_NDIS_VLAN_INFO_INVALID) { 1844 m_new->m_pkthdr.ether_vtag = EVL_MAKETAG( 1845 NDIS_VLAN_INFO_ID(info->vlan_info), 1846 NDIS_VLAN_INFO_PRI(info->vlan_info), 1847 NDIS_VLAN_INFO_CFI(info->vlan_info)); 1848 m_new->m_flags |= M_VLANTAG; 1849 } 1850 1851 if (info->hash_info != HN_NDIS_HASH_INFO_INVALID) { 1852 rxr->hn_rss_pkts++; 1853 m_new->m_pkthdr.flowid = info->hash_value; 1854 hash_type = M_HASHTYPE_OPAQUE_HASH; 1855 if ((info->hash_info & NDIS_HASH_FUNCTION_MASK) == 1856 NDIS_HASH_FUNCTION_TOEPLITZ) { 1857 uint32_t type = (info->hash_info & NDIS_HASH_TYPE_MASK); 1858 1859 /* 1860 * NOTE: 1861 * do_lro is resetted, if the hash types are not TCP 1862 * related. See the comment in the above csum_flags 1863 * setup section. 1864 */ 1865 switch (type) { 1866 case NDIS_HASH_IPV4: 1867 hash_type = M_HASHTYPE_RSS_IPV4; 1868 do_lro = 0; 1869 break; 1870 1871 case NDIS_HASH_TCP_IPV4: 1872 hash_type = M_HASHTYPE_RSS_TCP_IPV4; 1873 break; 1874 1875 case NDIS_HASH_IPV6: 1876 hash_type = M_HASHTYPE_RSS_IPV6; 1877 do_lro = 0; 1878 break; 1879 1880 case NDIS_HASH_IPV6_EX: 1881 hash_type = M_HASHTYPE_RSS_IPV6_EX; 1882 do_lro = 0; 1883 break; 1884 1885 case NDIS_HASH_TCP_IPV6: 1886 hash_type = M_HASHTYPE_RSS_TCP_IPV6; 1887 break; 1888 1889 case NDIS_HASH_TCP_IPV6_EX: 1890 hash_type = M_HASHTYPE_RSS_TCP_IPV6_EX; 1891 break; 1892 } 1893 } 1894 } else { 1895 m_new->m_pkthdr.flowid = rxr->hn_rx_idx; 1896 hash_type = M_HASHTYPE_OPAQUE; 1897 } 1898 M_HASHTYPE_SET(m_new, hash_type); 1899 1900 /* 1901 * Note: Moved RX completion back to hv_nv_on_receive() so all 1902 * messages (not just data messages) will trigger a response. 1903 */ 1904 1905 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); 1906 rxr->hn_pkts++; 1907 1908 if ((ifp->if_capenable & IFCAP_LRO) && do_lro) { 1909 #if defined(INET) || defined(INET6) 1910 struct lro_ctrl *lro = &rxr->hn_lro; 1911 1912 if (lro->lro_cnt) { 1913 rxr->hn_lro_tried++; 1914 if (hn_lro_rx(lro, m_new) == 0) { 1915 /* DONE! */ 1916 return 0; 1917 } 1918 } 1919 #endif 1920 } 1921 1922 /* We're not holding the lock here, so don't release it */ 1923 (*ifp->if_input)(ifp, m_new); 1924 1925 return (0); 1926 } 1927 1928 static int 1929 hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1930 { 1931 struct hn_softc *sc = ifp->if_softc; 1932 struct ifreq *ifr = (struct ifreq *)data; 1933 int mask, error = 0; 1934 1935 switch (cmd) { 1936 case SIOCSIFMTU: 1937 if (ifr->ifr_mtu > HN_MTU_MAX) { 1938 error = EINVAL; 1939 break; 1940 } 1941 1942 HN_LOCK(sc); 1943 1944 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) { 1945 HN_UNLOCK(sc); 1946 break; 1947 } 1948 1949 if ((sc->hn_caps & HN_CAP_MTU) == 0) { 1950 /* Can't change MTU */ 1951 HN_UNLOCK(sc); 1952 error = EOPNOTSUPP; 1953 break; 1954 } 1955 1956 if (ifp->if_mtu == ifr->ifr_mtu) { 1957 HN_UNLOCK(sc); 1958 break; 1959 } 1960 1961 /* 1962 * Suspend this interface before the synthetic parts 1963 * are ripped. 1964 */ 1965 hn_suspend(sc); 1966 1967 /* 1968 * Detach the synthetics parts, i.e. NVS and RNDIS. 1969 */ 1970 hn_synth_detach(sc); 1971 1972 /* 1973 * Reattach the synthetic parts, i.e. NVS and RNDIS, 1974 * with the new MTU setting. 1975 */ 1976 error = hn_synth_attach(sc, ifr->ifr_mtu); 1977 if (error) { 1978 HN_UNLOCK(sc); 1979 break; 1980 } 1981 1982 /* 1983 * Commit the requested MTU, after the synthetic parts 1984 * have been successfully attached. 1985 */ 1986 ifp->if_mtu = ifr->ifr_mtu; 1987 1988 /* 1989 * Make sure that various parameters based on MTU are 1990 * still valid, after the MTU change. 1991 */ 1992 if (sc->hn_tx_ring[0].hn_chim_size > sc->hn_chim_szmax) 1993 hn_set_chim_size(sc, sc->hn_chim_szmax); 1994 hn_set_tso_maxsize(sc, hn_tso_maxlen, ifp->if_mtu); 1995 #if __FreeBSD_version >= 1100099 1996 if (sc->hn_rx_ring[0].hn_lro.lro_length_lim < 1997 HN_LRO_LENLIM_MIN(ifp)) 1998 hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MIN(ifp)); 1999 #endif 2000 2001 /* 2002 * All done! Resume the interface now. 2003 */ 2004 hn_resume(sc); 2005 2006 HN_UNLOCK(sc); 2007 break; 2008 2009 case SIOCSIFFLAGS: 2010 HN_LOCK(sc); 2011 2012 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) { 2013 HN_UNLOCK(sc); 2014 break; 2015 } 2016 2017 if (ifp->if_flags & IFF_UP) { 2018 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 2019 hn_set_rxfilter(sc); 2020 else 2021 hn_init_locked(sc); 2022 } else { 2023 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 2024 hn_stop(sc); 2025 } 2026 sc->hn_if_flags = ifp->if_flags; 2027 2028 HN_UNLOCK(sc); 2029 break; 2030 2031 case SIOCSIFCAP: 2032 HN_LOCK(sc); 2033 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 2034 2035 if (mask & IFCAP_TXCSUM) { 2036 ifp->if_capenable ^= IFCAP_TXCSUM; 2037 if (ifp->if_capenable & IFCAP_TXCSUM) 2038 ifp->if_hwassist |= HN_CSUM_IP_HWASSIST(sc); 2039 else 2040 ifp->if_hwassist &= ~HN_CSUM_IP_HWASSIST(sc); 2041 } 2042 if (mask & IFCAP_TXCSUM_IPV6) { 2043 ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; 2044 if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) 2045 ifp->if_hwassist |= HN_CSUM_IP6_HWASSIST(sc); 2046 else 2047 ifp->if_hwassist &= ~HN_CSUM_IP6_HWASSIST(sc); 2048 } 2049 2050 /* TODO: flip RNDIS offload parameters for RXCSUM. */ 2051 if (mask & IFCAP_RXCSUM) 2052 ifp->if_capenable ^= IFCAP_RXCSUM; 2053 #ifdef foo 2054 /* We can't diff IPv6 packets from IPv4 packets on RX path. */ 2055 if (mask & IFCAP_RXCSUM_IPV6) 2056 ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; 2057 #endif 2058 2059 if (mask & IFCAP_LRO) 2060 ifp->if_capenable ^= IFCAP_LRO; 2061 2062 if (mask & IFCAP_TSO4) { 2063 ifp->if_capenable ^= IFCAP_TSO4; 2064 if (ifp->if_capenable & IFCAP_TSO4) 2065 ifp->if_hwassist |= CSUM_IP_TSO; 2066 else 2067 ifp->if_hwassist &= ~CSUM_IP_TSO; 2068 } 2069 if (mask & IFCAP_TSO6) { 2070 ifp->if_capenable ^= IFCAP_TSO6; 2071 if (ifp->if_capenable & IFCAP_TSO6) 2072 ifp->if_hwassist |= CSUM_IP6_TSO; 2073 else 2074 ifp->if_hwassist &= ~CSUM_IP6_TSO; 2075 } 2076 2077 HN_UNLOCK(sc); 2078 break; 2079 2080 case SIOCADDMULTI: 2081 case SIOCDELMULTI: 2082 #ifdef notyet 2083 /* 2084 * XXX 2085 * Multicast uses mutex, while RNDIS RX filter setting 2086 * sleeps. We workaround this by always enabling 2087 * ALLMULTI. ALLMULTI would actually always be on, even 2088 * if we supported the SIOCADDMULTI/SIOCDELMULTI, since 2089 * we don't support multicast address list configuration 2090 * for this driver. 2091 */ 2092 HN_LOCK(sc); 2093 2094 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) { 2095 HN_UNLOCK(sc); 2096 break; 2097 } 2098 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 2099 hn_set_rxfilter(sc); 2100 2101 HN_UNLOCK(sc); 2102 #endif 2103 break; 2104 2105 case SIOCSIFMEDIA: 2106 case SIOCGIFMEDIA: 2107 error = ifmedia_ioctl(ifp, ifr, &sc->hn_media, cmd); 2108 break; 2109 2110 default: 2111 error = ether_ioctl(ifp, cmd, data); 2112 break; 2113 } 2114 return (error); 2115 } 2116 2117 static void 2118 hn_stop(struct hn_softc *sc) 2119 { 2120 struct ifnet *ifp = sc->hn_ifp; 2121 int i; 2122 2123 HN_LOCK_ASSERT(sc); 2124 2125 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, 2126 ("synthetic parts were not attached")); 2127 2128 /* Clear RUNNING bit _before_ hn_suspend_data() */ 2129 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_RUNNING); 2130 hn_suspend_data(sc); 2131 2132 /* Clear OACTIVE bit. */ 2133 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 2134 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) 2135 sc->hn_tx_ring[i].hn_oactive = 0; 2136 } 2137 2138 static void 2139 hn_init_locked(struct hn_softc *sc) 2140 { 2141 struct ifnet *ifp = sc->hn_ifp; 2142 int i; 2143 2144 HN_LOCK_ASSERT(sc); 2145 2146 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) 2147 return; 2148 2149 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 2150 return; 2151 2152 /* Configure RX filter */ 2153 hn_set_rxfilter(sc); 2154 2155 /* Clear OACTIVE bit. */ 2156 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 2157 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) 2158 sc->hn_tx_ring[i].hn_oactive = 0; 2159 2160 /* Clear TX 'suspended' bit. */ 2161 hn_resume_tx(sc, sc->hn_tx_ring_inuse); 2162 2163 /* Everything is ready; unleash! */ 2164 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_RUNNING); 2165 } 2166 2167 static void 2168 hn_init(void *xsc) 2169 { 2170 struct hn_softc *sc = xsc; 2171 2172 HN_LOCK(sc); 2173 hn_init_locked(sc); 2174 HN_UNLOCK(sc); 2175 } 2176 2177 #if __FreeBSD_version >= 1100099 2178 2179 static int 2180 hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS) 2181 { 2182 struct hn_softc *sc = arg1; 2183 unsigned int lenlim; 2184 int error; 2185 2186 lenlim = sc->hn_rx_ring[0].hn_lro.lro_length_lim; 2187 error = sysctl_handle_int(oidp, &lenlim, 0, req); 2188 if (error || req->newptr == NULL) 2189 return error; 2190 2191 HN_LOCK(sc); 2192 if (lenlim < HN_LRO_LENLIM_MIN(sc->hn_ifp) || 2193 lenlim > TCP_LRO_LENGTH_MAX) { 2194 HN_UNLOCK(sc); 2195 return EINVAL; 2196 } 2197 hn_set_lro_lenlim(sc, lenlim); 2198 HN_UNLOCK(sc); 2199 2200 return 0; 2201 } 2202 2203 static int 2204 hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS) 2205 { 2206 struct hn_softc *sc = arg1; 2207 int ackcnt, error, i; 2208 2209 /* 2210 * lro_ackcnt_lim is append count limit, 2211 * +1 to turn it into aggregation limit. 2212 */ 2213 ackcnt = sc->hn_rx_ring[0].hn_lro.lro_ackcnt_lim + 1; 2214 error = sysctl_handle_int(oidp, &ackcnt, 0, req); 2215 if (error || req->newptr == NULL) 2216 return error; 2217 2218 if (ackcnt < 2 || ackcnt > (TCP_LRO_ACKCNT_MAX + 1)) 2219 return EINVAL; 2220 2221 /* 2222 * Convert aggregation limit back to append 2223 * count limit. 2224 */ 2225 --ackcnt; 2226 HN_LOCK(sc); 2227 for (i = 0; i < sc->hn_rx_ring_inuse; ++i) 2228 sc->hn_rx_ring[i].hn_lro.lro_ackcnt_lim = ackcnt; 2229 HN_UNLOCK(sc); 2230 return 0; 2231 } 2232 2233 #endif 2234 2235 static int 2236 hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS) 2237 { 2238 struct hn_softc *sc = arg1; 2239 int hcsum = arg2; 2240 int on, error, i; 2241 2242 on = 0; 2243 if (sc->hn_rx_ring[0].hn_trust_hcsum & hcsum) 2244 on = 1; 2245 2246 error = sysctl_handle_int(oidp, &on, 0, req); 2247 if (error || req->newptr == NULL) 2248 return error; 2249 2250 HN_LOCK(sc); 2251 for (i = 0; i < sc->hn_rx_ring_inuse; ++i) { 2252 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 2253 2254 if (on) 2255 rxr->hn_trust_hcsum |= hcsum; 2256 else 2257 rxr->hn_trust_hcsum &= ~hcsum; 2258 } 2259 HN_UNLOCK(sc); 2260 return 0; 2261 } 2262 2263 static int 2264 hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS) 2265 { 2266 struct hn_softc *sc = arg1; 2267 int chim_size, error; 2268 2269 chim_size = sc->hn_tx_ring[0].hn_chim_size; 2270 error = sysctl_handle_int(oidp, &chim_size, 0, req); 2271 if (error || req->newptr == NULL) 2272 return error; 2273 2274 if (chim_size > sc->hn_chim_szmax || chim_size <= 0) 2275 return EINVAL; 2276 2277 HN_LOCK(sc); 2278 hn_set_chim_size(sc, chim_size); 2279 HN_UNLOCK(sc); 2280 return 0; 2281 } 2282 2283 #if __FreeBSD_version < 1100095 2284 static int 2285 hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS) 2286 { 2287 struct hn_softc *sc = arg1; 2288 int ofs = arg2, i, error; 2289 struct hn_rx_ring *rxr; 2290 uint64_t stat; 2291 2292 stat = 0; 2293 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 2294 rxr = &sc->hn_rx_ring[i]; 2295 stat += *((int *)((uint8_t *)rxr + ofs)); 2296 } 2297 2298 error = sysctl_handle_64(oidp, &stat, 0, req); 2299 if (error || req->newptr == NULL) 2300 return error; 2301 2302 /* Zero out this stat. */ 2303 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 2304 rxr = &sc->hn_rx_ring[i]; 2305 *((int *)((uint8_t *)rxr + ofs)) = 0; 2306 } 2307 return 0; 2308 } 2309 #else 2310 static int 2311 hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS) 2312 { 2313 struct hn_softc *sc = arg1; 2314 int ofs = arg2, i, error; 2315 struct hn_rx_ring *rxr; 2316 uint64_t stat; 2317 2318 stat = 0; 2319 for (i = 0; i < sc->hn_rx_ring_inuse; ++i) { 2320 rxr = &sc->hn_rx_ring[i]; 2321 stat += *((uint64_t *)((uint8_t *)rxr + ofs)); 2322 } 2323 2324 error = sysctl_handle_64(oidp, &stat, 0, req); 2325 if (error || req->newptr == NULL) 2326 return error; 2327 2328 /* Zero out this stat. */ 2329 for (i = 0; i < sc->hn_rx_ring_inuse; ++i) { 2330 rxr = &sc->hn_rx_ring[i]; 2331 *((uint64_t *)((uint8_t *)rxr + ofs)) = 0; 2332 } 2333 return 0; 2334 } 2335 2336 #endif 2337 2338 static int 2339 hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS) 2340 { 2341 struct hn_softc *sc = arg1; 2342 int ofs = arg2, i, error; 2343 struct hn_rx_ring *rxr; 2344 u_long stat; 2345 2346 stat = 0; 2347 for (i = 0; i < sc->hn_rx_ring_inuse; ++i) { 2348 rxr = &sc->hn_rx_ring[i]; 2349 stat += *((u_long *)((uint8_t *)rxr + ofs)); 2350 } 2351 2352 error = sysctl_handle_long(oidp, &stat, 0, req); 2353 if (error || req->newptr == NULL) 2354 return error; 2355 2356 /* Zero out this stat. */ 2357 for (i = 0; i < sc->hn_rx_ring_inuse; ++i) { 2358 rxr = &sc->hn_rx_ring[i]; 2359 *((u_long *)((uint8_t *)rxr + ofs)) = 0; 2360 } 2361 return 0; 2362 } 2363 2364 static int 2365 hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS) 2366 { 2367 struct hn_softc *sc = arg1; 2368 int ofs = arg2, i, error; 2369 struct hn_tx_ring *txr; 2370 u_long stat; 2371 2372 stat = 0; 2373 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { 2374 txr = &sc->hn_tx_ring[i]; 2375 stat += *((u_long *)((uint8_t *)txr + ofs)); 2376 } 2377 2378 error = sysctl_handle_long(oidp, &stat, 0, req); 2379 if (error || req->newptr == NULL) 2380 return error; 2381 2382 /* Zero out this stat. */ 2383 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { 2384 txr = &sc->hn_tx_ring[i]; 2385 *((u_long *)((uint8_t *)txr + ofs)) = 0; 2386 } 2387 return 0; 2388 } 2389 2390 static int 2391 hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS) 2392 { 2393 struct hn_softc *sc = arg1; 2394 int ofs = arg2, i, error, conf; 2395 struct hn_tx_ring *txr; 2396 2397 txr = &sc->hn_tx_ring[0]; 2398 conf = *((int *)((uint8_t *)txr + ofs)); 2399 2400 error = sysctl_handle_int(oidp, &conf, 0, req); 2401 if (error || req->newptr == NULL) 2402 return error; 2403 2404 HN_LOCK(sc); 2405 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { 2406 txr = &sc->hn_tx_ring[i]; 2407 *((int *)((uint8_t *)txr + ofs)) = conf; 2408 } 2409 HN_UNLOCK(sc); 2410 2411 return 0; 2412 } 2413 2414 static int 2415 hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS) 2416 { 2417 struct hn_softc *sc = arg1; 2418 char verstr[16]; 2419 2420 snprintf(verstr, sizeof(verstr), "%u.%u", 2421 HN_NDIS_VERSION_MAJOR(sc->hn_ndis_ver), 2422 HN_NDIS_VERSION_MINOR(sc->hn_ndis_ver)); 2423 return sysctl_handle_string(oidp, verstr, sizeof(verstr), req); 2424 } 2425 2426 static int 2427 hn_caps_sysctl(SYSCTL_HANDLER_ARGS) 2428 { 2429 struct hn_softc *sc = arg1; 2430 char caps_str[128]; 2431 uint32_t caps; 2432 2433 HN_LOCK(sc); 2434 caps = sc->hn_caps; 2435 HN_UNLOCK(sc); 2436 snprintf(caps_str, sizeof(caps_str), "%b", caps, HN_CAP_BITS); 2437 return sysctl_handle_string(oidp, caps_str, sizeof(caps_str), req); 2438 } 2439 2440 static int 2441 hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS) 2442 { 2443 struct hn_softc *sc = arg1; 2444 char assist_str[128]; 2445 uint32_t hwassist; 2446 2447 HN_LOCK(sc); 2448 hwassist = sc->hn_ifp->if_hwassist; 2449 HN_UNLOCK(sc); 2450 snprintf(assist_str, sizeof(assist_str), "%b", hwassist, CSUM_BITS); 2451 return sysctl_handle_string(oidp, assist_str, sizeof(assist_str), req); 2452 } 2453 2454 static int 2455 hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS) 2456 { 2457 struct hn_softc *sc = arg1; 2458 char filter_str[128]; 2459 uint32_t filter; 2460 2461 HN_LOCK(sc); 2462 filter = sc->hn_rx_filter; 2463 HN_UNLOCK(sc); 2464 snprintf(filter_str, sizeof(filter_str), "%b", filter, 2465 NDIS_PACKET_TYPES); 2466 return sysctl_handle_string(oidp, filter_str, sizeof(filter_str), req); 2467 } 2468 2469 static int 2470 hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS) 2471 { 2472 struct hn_softc *sc = arg1; 2473 int error; 2474 2475 HN_LOCK(sc); 2476 2477 error = SYSCTL_OUT(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key)); 2478 if (error || req->newptr == NULL) 2479 goto back; 2480 2481 error = SYSCTL_IN(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key)); 2482 if (error) 2483 goto back; 2484 sc->hn_flags |= HN_FLAG_HAS_RSSKEY; 2485 2486 if (sc->hn_rx_ring_inuse > 1) { 2487 error = hn_rss_reconfig(sc); 2488 } else { 2489 /* Not RSS capable, at least for now; just save the RSS key. */ 2490 error = 0; 2491 } 2492 back: 2493 HN_UNLOCK(sc); 2494 return (error); 2495 } 2496 2497 static int 2498 hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS) 2499 { 2500 struct hn_softc *sc = arg1; 2501 int error; 2502 2503 HN_LOCK(sc); 2504 2505 error = SYSCTL_OUT(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind)); 2506 if (error || req->newptr == NULL) 2507 goto back; 2508 2509 /* 2510 * Don't allow RSS indirect table change, if this interface is not 2511 * RSS capable currently. 2512 */ 2513 if (sc->hn_rx_ring_inuse == 1) { 2514 error = EOPNOTSUPP; 2515 goto back; 2516 } 2517 2518 error = SYSCTL_IN(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind)); 2519 if (error) 2520 goto back; 2521 sc->hn_flags |= HN_FLAG_HAS_RSSIND; 2522 2523 hn_rss_ind_fixup(sc, sc->hn_rx_ring_inuse); 2524 error = hn_rss_reconfig(sc); 2525 back: 2526 HN_UNLOCK(sc); 2527 return (error); 2528 } 2529 2530 static int 2531 hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS) 2532 { 2533 struct hn_softc *sc = arg1; 2534 char hash_str[128]; 2535 uint32_t hash; 2536 2537 HN_LOCK(sc); 2538 hash = sc->hn_rss_hash; 2539 HN_UNLOCK(sc); 2540 snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS); 2541 return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req); 2542 } 2543 2544 static int 2545 hn_check_iplen(const struct mbuf *m, int hoff) 2546 { 2547 const struct ip *ip; 2548 int len, iphlen, iplen; 2549 const struct tcphdr *th; 2550 int thoff; /* TCP data offset */ 2551 2552 len = hoff + sizeof(struct ip); 2553 2554 /* The packet must be at least the size of an IP header. */ 2555 if (m->m_pkthdr.len < len) 2556 return IPPROTO_DONE; 2557 2558 /* The fixed IP header must reside completely in the first mbuf. */ 2559 if (m->m_len < len) 2560 return IPPROTO_DONE; 2561 2562 ip = mtodo(m, hoff); 2563 2564 /* Bound check the packet's stated IP header length. */ 2565 iphlen = ip->ip_hl << 2; 2566 if (iphlen < sizeof(struct ip)) /* minimum header length */ 2567 return IPPROTO_DONE; 2568 2569 /* The full IP header must reside completely in the one mbuf. */ 2570 if (m->m_len < hoff + iphlen) 2571 return IPPROTO_DONE; 2572 2573 iplen = ntohs(ip->ip_len); 2574 2575 /* 2576 * Check that the amount of data in the buffers is as 2577 * at least much as the IP header would have us expect. 2578 */ 2579 if (m->m_pkthdr.len < hoff + iplen) 2580 return IPPROTO_DONE; 2581 2582 /* 2583 * Ignore IP fragments. 2584 */ 2585 if (ntohs(ip->ip_off) & (IP_OFFMASK | IP_MF)) 2586 return IPPROTO_DONE; 2587 2588 /* 2589 * The TCP/IP or UDP/IP header must be entirely contained within 2590 * the first fragment of a packet. 2591 */ 2592 switch (ip->ip_p) { 2593 case IPPROTO_TCP: 2594 if (iplen < iphlen + sizeof(struct tcphdr)) 2595 return IPPROTO_DONE; 2596 if (m->m_len < hoff + iphlen + sizeof(struct tcphdr)) 2597 return IPPROTO_DONE; 2598 th = (const struct tcphdr *)((const uint8_t *)ip + iphlen); 2599 thoff = th->th_off << 2; 2600 if (thoff < sizeof(struct tcphdr) || thoff + iphlen > iplen) 2601 return IPPROTO_DONE; 2602 if (m->m_len < hoff + iphlen + thoff) 2603 return IPPROTO_DONE; 2604 break; 2605 case IPPROTO_UDP: 2606 if (iplen < iphlen + sizeof(struct udphdr)) 2607 return IPPROTO_DONE; 2608 if (m->m_len < hoff + iphlen + sizeof(struct udphdr)) 2609 return IPPROTO_DONE; 2610 break; 2611 default: 2612 if (iplen < iphlen) 2613 return IPPROTO_DONE; 2614 break; 2615 } 2616 return ip->ip_p; 2617 } 2618 2619 static int 2620 hn_create_rx_data(struct hn_softc *sc, int ring_cnt) 2621 { 2622 struct sysctl_oid_list *child; 2623 struct sysctl_ctx_list *ctx; 2624 device_t dev = sc->hn_dev; 2625 #if defined(INET) || defined(INET6) 2626 #if __FreeBSD_version >= 1100095 2627 int lroent_cnt; 2628 #endif 2629 #endif 2630 int i; 2631 2632 /* 2633 * Create RXBUF for reception. 2634 * 2635 * NOTE: 2636 * - It is shared by all channels. 2637 * - A large enough buffer is allocated, certain version of NVSes 2638 * may further limit the usable space. 2639 */ 2640 sc->hn_rxbuf = hyperv_dmamem_alloc(bus_get_dma_tag(dev), 2641 PAGE_SIZE, 0, HN_RXBUF_SIZE, &sc->hn_rxbuf_dma, 2642 BUS_DMA_WAITOK | BUS_DMA_ZERO); 2643 if (sc->hn_rxbuf == NULL) { 2644 device_printf(sc->hn_dev, "allocate rxbuf failed\n"); 2645 return (ENOMEM); 2646 } 2647 2648 sc->hn_rx_ring_cnt = ring_cnt; 2649 sc->hn_rx_ring_inuse = sc->hn_rx_ring_cnt; 2650 2651 sc->hn_rx_ring = malloc(sizeof(struct hn_rx_ring) * sc->hn_rx_ring_cnt, 2652 M_DEVBUF, M_WAITOK | M_ZERO); 2653 2654 #if defined(INET) || defined(INET6) 2655 #if __FreeBSD_version >= 1100095 2656 lroent_cnt = hn_lro_entry_count; 2657 if (lroent_cnt < TCP_LRO_ENTRIES) 2658 lroent_cnt = TCP_LRO_ENTRIES; 2659 if (bootverbose) 2660 device_printf(dev, "LRO: entry count %d\n", lroent_cnt); 2661 #endif 2662 #endif /* INET || INET6 */ 2663 2664 ctx = device_get_sysctl_ctx(dev); 2665 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); 2666 2667 /* Create dev.hn.UNIT.rx sysctl tree */ 2668 sc->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rx", 2669 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 2670 2671 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 2672 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 2673 2674 rxr->hn_br = hyperv_dmamem_alloc(bus_get_dma_tag(dev), 2675 PAGE_SIZE, 0, HN_TXBR_SIZE + HN_RXBR_SIZE, 2676 &rxr->hn_br_dma, BUS_DMA_WAITOK); 2677 if (rxr->hn_br == NULL) { 2678 device_printf(dev, "allocate bufring failed\n"); 2679 return (ENOMEM); 2680 } 2681 2682 if (hn_trust_hosttcp) 2683 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_TCP; 2684 if (hn_trust_hostudp) 2685 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_UDP; 2686 if (hn_trust_hostip) 2687 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_IP; 2688 rxr->hn_ifp = sc->hn_ifp; 2689 if (i < sc->hn_tx_ring_cnt) 2690 rxr->hn_txr = &sc->hn_tx_ring[i]; 2691 rxr->hn_pktbuf_len = HN_PKTBUF_LEN_DEF; 2692 rxr->hn_pktbuf = malloc(rxr->hn_pktbuf_len, M_DEVBUF, M_WAITOK); 2693 rxr->hn_rx_idx = i; 2694 rxr->hn_rxbuf = sc->hn_rxbuf; 2695 2696 /* 2697 * Initialize LRO. 2698 */ 2699 #if defined(INET) || defined(INET6) 2700 #if __FreeBSD_version >= 1100095 2701 tcp_lro_init_args(&rxr->hn_lro, sc->hn_ifp, lroent_cnt, 2702 hn_lro_mbufq_depth); 2703 #else 2704 tcp_lro_init(&rxr->hn_lro); 2705 rxr->hn_lro.ifp = sc->hn_ifp; 2706 #endif 2707 #if __FreeBSD_version >= 1100099 2708 rxr->hn_lro.lro_length_lim = HN_LRO_LENLIM_DEF; 2709 rxr->hn_lro.lro_ackcnt_lim = HN_LRO_ACKCNT_DEF; 2710 #endif 2711 #endif /* INET || INET6 */ 2712 2713 if (sc->hn_rx_sysctl_tree != NULL) { 2714 char name[16]; 2715 2716 /* 2717 * Create per RX ring sysctl tree: 2718 * dev.hn.UNIT.rx.RINGID 2719 */ 2720 snprintf(name, sizeof(name), "%d", i); 2721 rxr->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, 2722 SYSCTL_CHILDREN(sc->hn_rx_sysctl_tree), 2723 OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 2724 2725 if (rxr->hn_rx_sysctl_tree != NULL) { 2726 SYSCTL_ADD_ULONG(ctx, 2727 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), 2728 OID_AUTO, "packets", CTLFLAG_RW, 2729 &rxr->hn_pkts, "# of packets received"); 2730 SYSCTL_ADD_ULONG(ctx, 2731 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), 2732 OID_AUTO, "rss_pkts", CTLFLAG_RW, 2733 &rxr->hn_rss_pkts, 2734 "# of packets w/ RSS info received"); 2735 SYSCTL_ADD_INT(ctx, 2736 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), 2737 OID_AUTO, "pktbuf_len", CTLFLAG_RD, 2738 &rxr->hn_pktbuf_len, 0, 2739 "Temporary channel packet buffer length"); 2740 } 2741 } 2742 } 2743 2744 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_queued", 2745 CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 2746 __offsetof(struct hn_rx_ring, hn_lro.lro_queued), 2747 #if __FreeBSD_version < 1100095 2748 hn_rx_stat_int_sysctl, 2749 #else 2750 hn_rx_stat_u64_sysctl, 2751 #endif 2752 "LU", "LRO queued"); 2753 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_flushed", 2754 CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 2755 __offsetof(struct hn_rx_ring, hn_lro.lro_flushed), 2756 #if __FreeBSD_version < 1100095 2757 hn_rx_stat_int_sysctl, 2758 #else 2759 hn_rx_stat_u64_sysctl, 2760 #endif 2761 "LU", "LRO flushed"); 2762 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_tried", 2763 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 2764 __offsetof(struct hn_rx_ring, hn_lro_tried), 2765 hn_rx_stat_ulong_sysctl, "LU", "# of LRO tries"); 2766 #if __FreeBSD_version >= 1100099 2767 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_length_lim", 2768 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 2769 hn_lro_lenlim_sysctl, "IU", 2770 "Max # of data bytes to be aggregated by LRO"); 2771 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_ackcnt_lim", 2772 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 2773 hn_lro_ackcnt_sysctl, "I", 2774 "Max # of ACKs to be aggregated by LRO"); 2775 #endif 2776 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hosttcp", 2777 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_TCP, 2778 hn_trust_hcsum_sysctl, "I", 2779 "Trust tcp segement verification on host side, " 2780 "when csum info is missing"); 2781 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostudp", 2782 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_UDP, 2783 hn_trust_hcsum_sysctl, "I", 2784 "Trust udp datagram verification on host side, " 2785 "when csum info is missing"); 2786 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostip", 2787 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_IP, 2788 hn_trust_hcsum_sysctl, "I", 2789 "Trust ip packet verification on host side, " 2790 "when csum info is missing"); 2791 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_ip", 2792 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 2793 __offsetof(struct hn_rx_ring, hn_csum_ip), 2794 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM IP"); 2795 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_tcp", 2796 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 2797 __offsetof(struct hn_rx_ring, hn_csum_tcp), 2798 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM TCP"); 2799 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_udp", 2800 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 2801 __offsetof(struct hn_rx_ring, hn_csum_udp), 2802 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM UDP"); 2803 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_trusted", 2804 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 2805 __offsetof(struct hn_rx_ring, hn_csum_trusted), 2806 hn_rx_stat_ulong_sysctl, "LU", 2807 "# of packets that we trust host's csum verification"); 2808 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "small_pkts", 2809 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 2810 __offsetof(struct hn_rx_ring, hn_small_pkts), 2811 hn_rx_stat_ulong_sysctl, "LU", "# of small packets received"); 2812 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_ack_failed", 2813 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 2814 __offsetof(struct hn_rx_ring, hn_ack_failed), 2815 hn_rx_stat_ulong_sysctl, "LU", "# of RXBUF ack failures"); 2816 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_cnt", 2817 CTLFLAG_RD, &sc->hn_rx_ring_cnt, 0, "# created RX rings"); 2818 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_inuse", 2819 CTLFLAG_RD, &sc->hn_rx_ring_inuse, 0, "# used RX rings"); 2820 2821 return (0); 2822 } 2823 2824 static void 2825 hn_destroy_rx_data(struct hn_softc *sc) 2826 { 2827 int i; 2828 2829 if (sc->hn_rxbuf != NULL) { 2830 hyperv_dmamem_free(&sc->hn_rxbuf_dma, sc->hn_rxbuf); 2831 sc->hn_rxbuf = NULL; 2832 } 2833 2834 if (sc->hn_rx_ring_cnt == 0) 2835 return; 2836 2837 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 2838 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 2839 2840 if (rxr->hn_br == NULL) 2841 continue; 2842 hyperv_dmamem_free(&rxr->hn_br_dma, rxr->hn_br); 2843 rxr->hn_br = NULL; 2844 2845 #if defined(INET) || defined(INET6) 2846 tcp_lro_free(&rxr->hn_lro); 2847 #endif 2848 free(rxr->hn_pktbuf, M_DEVBUF); 2849 } 2850 free(sc->hn_rx_ring, M_DEVBUF); 2851 sc->hn_rx_ring = NULL; 2852 2853 sc->hn_rx_ring_cnt = 0; 2854 sc->hn_rx_ring_inuse = 0; 2855 } 2856 2857 static int 2858 hn_tx_ring_create(struct hn_softc *sc, int id) 2859 { 2860 struct hn_tx_ring *txr = &sc->hn_tx_ring[id]; 2861 device_t dev = sc->hn_dev; 2862 bus_dma_tag_t parent_dtag; 2863 int error, i; 2864 2865 txr->hn_sc = sc; 2866 txr->hn_tx_idx = id; 2867 2868 #ifndef HN_USE_TXDESC_BUFRING 2869 mtx_init(&txr->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN); 2870 #endif 2871 mtx_init(&txr->hn_tx_lock, "hn tx", NULL, MTX_DEF); 2872 2873 txr->hn_txdesc_cnt = HN_TX_DESC_CNT; 2874 txr->hn_txdesc = malloc(sizeof(struct hn_txdesc) * txr->hn_txdesc_cnt, 2875 M_DEVBUF, M_WAITOK | M_ZERO); 2876 #ifndef HN_USE_TXDESC_BUFRING 2877 SLIST_INIT(&txr->hn_txlist); 2878 #else 2879 txr->hn_txdesc_br = buf_ring_alloc(txr->hn_txdesc_cnt, M_DEVBUF, 2880 M_WAITOK, &txr->hn_tx_lock); 2881 #endif 2882 2883 txr->hn_tx_taskq = sc->hn_tx_taskq; 2884 2885 #ifdef HN_IFSTART_SUPPORT 2886 if (hn_use_if_start) { 2887 txr->hn_txeof = hn_start_txeof; 2888 TASK_INIT(&txr->hn_tx_task, 0, hn_start_taskfunc, txr); 2889 TASK_INIT(&txr->hn_txeof_task, 0, hn_start_txeof_taskfunc, txr); 2890 } else 2891 #endif 2892 { 2893 int br_depth; 2894 2895 txr->hn_txeof = hn_xmit_txeof; 2896 TASK_INIT(&txr->hn_tx_task, 0, hn_xmit_taskfunc, txr); 2897 TASK_INIT(&txr->hn_txeof_task, 0, hn_xmit_txeof_taskfunc, txr); 2898 2899 br_depth = hn_get_txswq_depth(txr); 2900 txr->hn_mbuf_br = buf_ring_alloc(br_depth, M_DEVBUF, 2901 M_WAITOK, &txr->hn_tx_lock); 2902 } 2903 2904 txr->hn_direct_tx_size = hn_direct_tx_size; 2905 2906 /* 2907 * Always schedule transmission instead of trying to do direct 2908 * transmission. This one gives the best performance so far. 2909 */ 2910 txr->hn_sched_tx = 1; 2911 2912 parent_dtag = bus_get_dma_tag(dev); 2913 2914 /* DMA tag for RNDIS packet messages. */ 2915 error = bus_dma_tag_create(parent_dtag, /* parent */ 2916 HN_RNDIS_PKT_ALIGN, /* alignment */ 2917 HN_RNDIS_PKT_BOUNDARY, /* boundary */ 2918 BUS_SPACE_MAXADDR, /* lowaddr */ 2919 BUS_SPACE_MAXADDR, /* highaddr */ 2920 NULL, NULL, /* filter, filterarg */ 2921 HN_RNDIS_PKT_LEN, /* maxsize */ 2922 1, /* nsegments */ 2923 HN_RNDIS_PKT_LEN, /* maxsegsize */ 2924 0, /* flags */ 2925 NULL, /* lockfunc */ 2926 NULL, /* lockfuncarg */ 2927 &txr->hn_tx_rndis_dtag); 2928 if (error) { 2929 device_printf(dev, "failed to create rndis dmatag\n"); 2930 return error; 2931 } 2932 2933 /* DMA tag for data. */ 2934 error = bus_dma_tag_create(parent_dtag, /* parent */ 2935 1, /* alignment */ 2936 HN_TX_DATA_BOUNDARY, /* boundary */ 2937 BUS_SPACE_MAXADDR, /* lowaddr */ 2938 BUS_SPACE_MAXADDR, /* highaddr */ 2939 NULL, NULL, /* filter, filterarg */ 2940 HN_TX_DATA_MAXSIZE, /* maxsize */ 2941 HN_TX_DATA_SEGCNT_MAX, /* nsegments */ 2942 HN_TX_DATA_SEGSIZE, /* maxsegsize */ 2943 0, /* flags */ 2944 NULL, /* lockfunc */ 2945 NULL, /* lockfuncarg */ 2946 &txr->hn_tx_data_dtag); 2947 if (error) { 2948 device_printf(dev, "failed to create data dmatag\n"); 2949 return error; 2950 } 2951 2952 for (i = 0; i < txr->hn_txdesc_cnt; ++i) { 2953 struct hn_txdesc *txd = &txr->hn_txdesc[i]; 2954 2955 txd->txr = txr; 2956 txd->chim_index = HN_NVS_CHIM_IDX_INVALID; 2957 2958 /* 2959 * Allocate and load RNDIS packet message. 2960 */ 2961 error = bus_dmamem_alloc(txr->hn_tx_rndis_dtag, 2962 (void **)&txd->rndis_pkt, 2963 BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, 2964 &txd->rndis_pkt_dmap); 2965 if (error) { 2966 device_printf(dev, 2967 "failed to allocate rndis_packet_msg, %d\n", i); 2968 return error; 2969 } 2970 2971 error = bus_dmamap_load(txr->hn_tx_rndis_dtag, 2972 txd->rndis_pkt_dmap, 2973 txd->rndis_pkt, HN_RNDIS_PKT_LEN, 2974 hyperv_dma_map_paddr, &txd->rndis_pkt_paddr, 2975 BUS_DMA_NOWAIT); 2976 if (error) { 2977 device_printf(dev, 2978 "failed to load rndis_packet_msg, %d\n", i); 2979 bus_dmamem_free(txr->hn_tx_rndis_dtag, 2980 txd->rndis_pkt, txd->rndis_pkt_dmap); 2981 return error; 2982 } 2983 2984 /* DMA map for TX data. */ 2985 error = bus_dmamap_create(txr->hn_tx_data_dtag, 0, 2986 &txd->data_dmap); 2987 if (error) { 2988 device_printf(dev, 2989 "failed to allocate tx data dmamap\n"); 2990 bus_dmamap_unload(txr->hn_tx_rndis_dtag, 2991 txd->rndis_pkt_dmap); 2992 bus_dmamem_free(txr->hn_tx_rndis_dtag, 2993 txd->rndis_pkt, txd->rndis_pkt_dmap); 2994 return error; 2995 } 2996 2997 /* All set, put it to list */ 2998 txd->flags |= HN_TXD_FLAG_ONLIST; 2999 #ifndef HN_USE_TXDESC_BUFRING 3000 SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link); 3001 #else 3002 buf_ring_enqueue(txr->hn_txdesc_br, txd); 3003 #endif 3004 } 3005 txr->hn_txdesc_avail = txr->hn_txdesc_cnt; 3006 3007 if (sc->hn_tx_sysctl_tree != NULL) { 3008 struct sysctl_oid_list *child; 3009 struct sysctl_ctx_list *ctx; 3010 char name[16]; 3011 3012 /* 3013 * Create per TX ring sysctl tree: 3014 * dev.hn.UNIT.tx.RINGID 3015 */ 3016 ctx = device_get_sysctl_ctx(dev); 3017 child = SYSCTL_CHILDREN(sc->hn_tx_sysctl_tree); 3018 3019 snprintf(name, sizeof(name), "%d", id); 3020 txr->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, 3021 name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 3022 3023 if (txr->hn_tx_sysctl_tree != NULL) { 3024 child = SYSCTL_CHILDREN(txr->hn_tx_sysctl_tree); 3025 3026 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail", 3027 CTLFLAG_RD, &txr->hn_txdesc_avail, 0, 3028 "# of available TX descs"); 3029 #ifdef HN_IFSTART_SUPPORT 3030 if (!hn_use_if_start) 3031 #endif 3032 { 3033 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "oactive", 3034 CTLFLAG_RD, &txr->hn_oactive, 0, 3035 "over active"); 3036 } 3037 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "packets", 3038 CTLFLAG_RW, &txr->hn_pkts, 3039 "# of packets transmitted"); 3040 } 3041 } 3042 3043 return 0; 3044 } 3045 3046 static void 3047 hn_txdesc_dmamap_destroy(struct hn_txdesc *txd) 3048 { 3049 struct hn_tx_ring *txr = txd->txr; 3050 3051 KASSERT(txd->m == NULL, ("still has mbuf installed")); 3052 KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("still dma mapped")); 3053 3054 bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_pkt_dmap); 3055 bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_pkt, 3056 txd->rndis_pkt_dmap); 3057 bus_dmamap_destroy(txr->hn_tx_data_dtag, txd->data_dmap); 3058 } 3059 3060 static void 3061 hn_tx_ring_destroy(struct hn_tx_ring *txr) 3062 { 3063 struct hn_txdesc *txd; 3064 3065 if (txr->hn_txdesc == NULL) 3066 return; 3067 3068 #ifndef HN_USE_TXDESC_BUFRING 3069 while ((txd = SLIST_FIRST(&txr->hn_txlist)) != NULL) { 3070 SLIST_REMOVE_HEAD(&txr->hn_txlist, link); 3071 hn_txdesc_dmamap_destroy(txd); 3072 } 3073 #else 3074 mtx_lock(&txr->hn_tx_lock); 3075 while ((txd = buf_ring_dequeue_sc(txr->hn_txdesc_br)) != NULL) 3076 hn_txdesc_dmamap_destroy(txd); 3077 mtx_unlock(&txr->hn_tx_lock); 3078 #endif 3079 3080 if (txr->hn_tx_data_dtag != NULL) 3081 bus_dma_tag_destroy(txr->hn_tx_data_dtag); 3082 if (txr->hn_tx_rndis_dtag != NULL) 3083 bus_dma_tag_destroy(txr->hn_tx_rndis_dtag); 3084 3085 #ifdef HN_USE_TXDESC_BUFRING 3086 buf_ring_free(txr->hn_txdesc_br, M_DEVBUF); 3087 #endif 3088 3089 free(txr->hn_txdesc, M_DEVBUF); 3090 txr->hn_txdesc = NULL; 3091 3092 if (txr->hn_mbuf_br != NULL) 3093 buf_ring_free(txr->hn_mbuf_br, M_DEVBUF); 3094 3095 #ifndef HN_USE_TXDESC_BUFRING 3096 mtx_destroy(&txr->hn_txlist_spin); 3097 #endif 3098 mtx_destroy(&txr->hn_tx_lock); 3099 } 3100 3101 static int 3102 hn_create_tx_data(struct hn_softc *sc, int ring_cnt) 3103 { 3104 struct sysctl_oid_list *child; 3105 struct sysctl_ctx_list *ctx; 3106 int i; 3107 3108 /* 3109 * Create TXBUF for chimney sending. 3110 * 3111 * NOTE: It is shared by all channels. 3112 */ 3113 sc->hn_chim = hyperv_dmamem_alloc(bus_get_dma_tag(sc->hn_dev), 3114 PAGE_SIZE, 0, HN_CHIM_SIZE, &sc->hn_chim_dma, 3115 BUS_DMA_WAITOK | BUS_DMA_ZERO); 3116 if (sc->hn_chim == NULL) { 3117 device_printf(sc->hn_dev, "allocate txbuf failed\n"); 3118 return (ENOMEM); 3119 } 3120 3121 sc->hn_tx_ring_cnt = ring_cnt; 3122 sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt; 3123 3124 sc->hn_tx_ring = malloc(sizeof(struct hn_tx_ring) * sc->hn_tx_ring_cnt, 3125 M_DEVBUF, M_WAITOK | M_ZERO); 3126 3127 ctx = device_get_sysctl_ctx(sc->hn_dev); 3128 child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->hn_dev)); 3129 3130 /* Create dev.hn.UNIT.tx sysctl tree */ 3131 sc->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "tx", 3132 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 3133 3134 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 3135 int error; 3136 3137 error = hn_tx_ring_create(sc, i); 3138 if (error) 3139 return error; 3140 } 3141 3142 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "no_txdescs", 3143 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3144 __offsetof(struct hn_tx_ring, hn_no_txdescs), 3145 hn_tx_stat_ulong_sysctl, "LU", "# of times short of TX descs"); 3146 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "send_failed", 3147 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3148 __offsetof(struct hn_tx_ring, hn_send_failed), 3149 hn_tx_stat_ulong_sysctl, "LU", "# of hyper-v sending failure"); 3150 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txdma_failed", 3151 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3152 __offsetof(struct hn_tx_ring, hn_txdma_failed), 3153 hn_tx_stat_ulong_sysctl, "LU", "# of TX DMA failure"); 3154 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_collapsed", 3155 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3156 __offsetof(struct hn_tx_ring, hn_tx_collapsed), 3157 hn_tx_stat_ulong_sysctl, "LU", "# of TX mbuf collapsed"); 3158 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney", 3159 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3160 __offsetof(struct hn_tx_ring, hn_tx_chimney), 3161 hn_tx_stat_ulong_sysctl, "LU", "# of chimney send"); 3162 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_tried", 3163 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3164 __offsetof(struct hn_tx_ring, hn_tx_chimney_tried), 3165 hn_tx_stat_ulong_sysctl, "LU", "# of chimney send tries"); 3166 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt", 3167 CTLFLAG_RD, &sc->hn_tx_ring[0].hn_txdesc_cnt, 0, 3168 "# of total TX descs"); 3169 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max", 3170 CTLFLAG_RD, &sc->hn_chim_szmax, 0, 3171 "Chimney send packet size upper boundary"); 3172 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size", 3173 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 3174 hn_chim_size_sysctl, "I", "Chimney send packet size limit"); 3175 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "direct_tx_size", 3176 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3177 __offsetof(struct hn_tx_ring, hn_direct_tx_size), 3178 hn_tx_conf_int_sysctl, "I", 3179 "Size of the packet for direct transmission"); 3180 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "sched_tx", 3181 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3182 __offsetof(struct hn_tx_ring, hn_sched_tx), 3183 hn_tx_conf_int_sysctl, "I", 3184 "Always schedule transmission " 3185 "instead of doing direct transmission"); 3186 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_cnt", 3187 CTLFLAG_RD, &sc->hn_tx_ring_cnt, 0, "# created TX rings"); 3188 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_inuse", 3189 CTLFLAG_RD, &sc->hn_tx_ring_inuse, 0, "# used TX rings"); 3190 3191 return 0; 3192 } 3193 3194 static void 3195 hn_set_chim_size(struct hn_softc *sc, int chim_size) 3196 { 3197 int i; 3198 3199 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) 3200 sc->hn_tx_ring[i].hn_chim_size = chim_size; 3201 } 3202 3203 static void 3204 hn_set_tso_maxsize(struct hn_softc *sc, int tso_maxlen, int mtu) 3205 { 3206 struct ifnet *ifp = sc->hn_ifp; 3207 int tso_minlen; 3208 3209 if ((ifp->if_capabilities & (IFCAP_TSO4 | IFCAP_TSO6)) == 0) 3210 return; 3211 3212 KASSERT(sc->hn_ndis_tso_sgmin >= 2, 3213 ("invalid NDIS tso sgmin %d", sc->hn_ndis_tso_sgmin)); 3214 tso_minlen = sc->hn_ndis_tso_sgmin * mtu; 3215 3216 KASSERT(sc->hn_ndis_tso_szmax >= tso_minlen && 3217 sc->hn_ndis_tso_szmax <= IP_MAXPACKET, 3218 ("invalid NDIS tso szmax %d", sc->hn_ndis_tso_szmax)); 3219 3220 if (tso_maxlen < tso_minlen) 3221 tso_maxlen = tso_minlen; 3222 else if (tso_maxlen > IP_MAXPACKET) 3223 tso_maxlen = IP_MAXPACKET; 3224 if (tso_maxlen > sc->hn_ndis_tso_szmax) 3225 tso_maxlen = sc->hn_ndis_tso_szmax; 3226 ifp->if_hw_tsomax = tso_maxlen - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); 3227 if (bootverbose) 3228 if_printf(ifp, "TSO size max %u\n", ifp->if_hw_tsomax); 3229 } 3230 3231 static void 3232 hn_fixup_tx_data(struct hn_softc *sc) 3233 { 3234 uint64_t csum_assist; 3235 int i; 3236 3237 hn_set_chim_size(sc, sc->hn_chim_szmax); 3238 if (hn_tx_chimney_size > 0 && 3239 hn_tx_chimney_size < sc->hn_chim_szmax) 3240 hn_set_chim_size(sc, hn_tx_chimney_size); 3241 3242 csum_assist = 0; 3243 if (sc->hn_caps & HN_CAP_IPCS) 3244 csum_assist |= CSUM_IP; 3245 if (sc->hn_caps & HN_CAP_TCP4CS) 3246 csum_assist |= CSUM_IP_TCP; 3247 if (sc->hn_caps & HN_CAP_UDP4CS) 3248 csum_assist |= CSUM_IP_UDP; 3249 #ifdef notyet 3250 if (sc->hn_caps & HN_CAP_TCP6CS) 3251 csum_assist |= CSUM_IP6_TCP; 3252 if (sc->hn_caps & HN_CAP_UDP6CS) 3253 csum_assist |= CSUM_IP6_UDP; 3254 #endif 3255 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 3256 sc->hn_tx_ring[i].hn_csum_assist = csum_assist; 3257 3258 if (sc->hn_caps & HN_CAP_HASHVAL) { 3259 /* 3260 * Support HASHVAL pktinfo on TX path. 3261 */ 3262 if (bootverbose) 3263 if_printf(sc->hn_ifp, "support HASHVAL pktinfo\n"); 3264 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 3265 sc->hn_tx_ring[i].hn_tx_flags |= HN_TX_FLAG_HASHVAL; 3266 } 3267 } 3268 3269 static void 3270 hn_destroy_tx_data(struct hn_softc *sc) 3271 { 3272 int i; 3273 3274 if (sc->hn_chim != NULL) { 3275 hyperv_dmamem_free(&sc->hn_chim_dma, sc->hn_chim); 3276 sc->hn_chim = NULL; 3277 } 3278 3279 if (sc->hn_tx_ring_cnt == 0) 3280 return; 3281 3282 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 3283 hn_tx_ring_destroy(&sc->hn_tx_ring[i]); 3284 3285 free(sc->hn_tx_ring, M_DEVBUF); 3286 sc->hn_tx_ring = NULL; 3287 3288 sc->hn_tx_ring_cnt = 0; 3289 sc->hn_tx_ring_inuse = 0; 3290 } 3291 3292 #ifdef HN_IFSTART_SUPPORT 3293 3294 static void 3295 hn_start_taskfunc(void *xtxr, int pending __unused) 3296 { 3297 struct hn_tx_ring *txr = xtxr; 3298 3299 mtx_lock(&txr->hn_tx_lock); 3300 hn_start_locked(txr, 0); 3301 mtx_unlock(&txr->hn_tx_lock); 3302 } 3303 3304 static int 3305 hn_start_locked(struct hn_tx_ring *txr, int len) 3306 { 3307 struct hn_softc *sc = txr->hn_sc; 3308 struct ifnet *ifp = sc->hn_ifp; 3309 3310 KASSERT(hn_use_if_start, 3311 ("hn_start_locked is called, when if_start is disabled")); 3312 KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring")); 3313 mtx_assert(&txr->hn_tx_lock, MA_OWNED); 3314 3315 if (__predict_false(txr->hn_suspended)) 3316 return 0; 3317 3318 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != 3319 IFF_DRV_RUNNING) 3320 return 0; 3321 3322 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { 3323 struct hn_txdesc *txd; 3324 struct mbuf *m_head; 3325 int error; 3326 3327 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); 3328 if (m_head == NULL) 3329 break; 3330 3331 if (len > 0 && m_head->m_pkthdr.len > len) { 3332 /* 3333 * This sending could be time consuming; let callers 3334 * dispatch this packet sending (and sending of any 3335 * following up packets) to tx taskqueue. 3336 */ 3337 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 3338 return 1; 3339 } 3340 3341 #if defined(INET6) || defined(INET) 3342 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 3343 m_head = hn_tso_fixup(m_head); 3344 if (__predict_false(m_head == NULL)) { 3345 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 3346 continue; 3347 } 3348 } 3349 #endif 3350 3351 txd = hn_txdesc_get(txr); 3352 if (txd == NULL) { 3353 txr->hn_no_txdescs++; 3354 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 3355 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 3356 break; 3357 } 3358 3359 error = hn_encap(txr, txd, &m_head); 3360 if (error) { 3361 /* Both txd and m_head are freed */ 3362 continue; 3363 } 3364 3365 error = hn_txpkt(ifp, txr, txd); 3366 if (__predict_false(error)) { 3367 /* txd is freed, but m_head is not */ 3368 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 3369 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 3370 break; 3371 } 3372 } 3373 return 0; 3374 } 3375 3376 static void 3377 hn_start(struct ifnet *ifp) 3378 { 3379 struct hn_softc *sc = ifp->if_softc; 3380 struct hn_tx_ring *txr = &sc->hn_tx_ring[0]; 3381 3382 if (txr->hn_sched_tx) 3383 goto do_sched; 3384 3385 if (mtx_trylock(&txr->hn_tx_lock)) { 3386 int sched; 3387 3388 sched = hn_start_locked(txr, txr->hn_direct_tx_size); 3389 mtx_unlock(&txr->hn_tx_lock); 3390 if (!sched) 3391 return; 3392 } 3393 do_sched: 3394 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); 3395 } 3396 3397 static void 3398 hn_start_txeof_taskfunc(void *xtxr, int pending __unused) 3399 { 3400 struct hn_tx_ring *txr = xtxr; 3401 3402 mtx_lock(&txr->hn_tx_lock); 3403 atomic_clear_int(&txr->hn_sc->hn_ifp->if_drv_flags, IFF_DRV_OACTIVE); 3404 hn_start_locked(txr, 0); 3405 mtx_unlock(&txr->hn_tx_lock); 3406 } 3407 3408 static void 3409 hn_start_txeof(struct hn_tx_ring *txr) 3410 { 3411 struct hn_softc *sc = txr->hn_sc; 3412 struct ifnet *ifp = sc->hn_ifp; 3413 3414 KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring")); 3415 3416 if (txr->hn_sched_tx) 3417 goto do_sched; 3418 3419 if (mtx_trylock(&txr->hn_tx_lock)) { 3420 int sched; 3421 3422 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 3423 sched = hn_start_locked(txr, txr->hn_direct_tx_size); 3424 mtx_unlock(&txr->hn_tx_lock); 3425 if (sched) { 3426 taskqueue_enqueue(txr->hn_tx_taskq, 3427 &txr->hn_tx_task); 3428 } 3429 } else { 3430 do_sched: 3431 /* 3432 * Release the OACTIVE earlier, with the hope, that 3433 * others could catch up. The task will clear the 3434 * flag again with the hn_tx_lock to avoid possible 3435 * races. 3436 */ 3437 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 3438 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); 3439 } 3440 } 3441 3442 #endif /* HN_IFSTART_SUPPORT */ 3443 3444 static int 3445 hn_xmit(struct hn_tx_ring *txr, int len) 3446 { 3447 struct hn_softc *sc = txr->hn_sc; 3448 struct ifnet *ifp = sc->hn_ifp; 3449 struct mbuf *m_head; 3450 3451 mtx_assert(&txr->hn_tx_lock, MA_OWNED); 3452 #ifdef HN_IFSTART_SUPPORT 3453 KASSERT(hn_use_if_start == 0, 3454 ("hn_xmit is called, when if_start is enabled")); 3455 #endif 3456 3457 if (__predict_false(txr->hn_suspended)) 3458 return 0; 3459 3460 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || txr->hn_oactive) 3461 return 0; 3462 3463 while ((m_head = drbr_peek(ifp, txr->hn_mbuf_br)) != NULL) { 3464 struct hn_txdesc *txd; 3465 int error; 3466 3467 if (len > 0 && m_head->m_pkthdr.len > len) { 3468 /* 3469 * This sending could be time consuming; let callers 3470 * dispatch this packet sending (and sending of any 3471 * following up packets) to tx taskqueue. 3472 */ 3473 drbr_putback(ifp, txr->hn_mbuf_br, m_head); 3474 return 1; 3475 } 3476 3477 txd = hn_txdesc_get(txr); 3478 if (txd == NULL) { 3479 txr->hn_no_txdescs++; 3480 drbr_putback(ifp, txr->hn_mbuf_br, m_head); 3481 txr->hn_oactive = 1; 3482 break; 3483 } 3484 3485 error = hn_encap(txr, txd, &m_head); 3486 if (error) { 3487 /* Both txd and m_head are freed; discard */ 3488 drbr_advance(ifp, txr->hn_mbuf_br); 3489 continue; 3490 } 3491 3492 error = hn_txpkt(ifp, txr, txd); 3493 if (__predict_false(error)) { 3494 /* txd is freed, but m_head is not */ 3495 drbr_putback(ifp, txr->hn_mbuf_br, m_head); 3496 txr->hn_oactive = 1; 3497 break; 3498 } 3499 3500 /* Sent */ 3501 drbr_advance(ifp, txr->hn_mbuf_br); 3502 } 3503 return 0; 3504 } 3505 3506 static int 3507 hn_transmit(struct ifnet *ifp, struct mbuf *m) 3508 { 3509 struct hn_softc *sc = ifp->if_softc; 3510 struct hn_tx_ring *txr; 3511 int error, idx = 0; 3512 3513 #if defined(INET6) || defined(INET) 3514 /* 3515 * Perform TSO packet header fixup now, since the TSO 3516 * packet header should be cache-hot. 3517 */ 3518 if (m->m_pkthdr.csum_flags & CSUM_TSO) { 3519 m = hn_tso_fixup(m); 3520 if (__predict_false(m == NULL)) { 3521 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 3522 return EIO; 3523 } 3524 } 3525 #endif 3526 3527 /* 3528 * Select the TX ring based on flowid 3529 */ 3530 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) 3531 idx = m->m_pkthdr.flowid % sc->hn_tx_ring_inuse; 3532 txr = &sc->hn_tx_ring[idx]; 3533 3534 error = drbr_enqueue(ifp, txr->hn_mbuf_br, m); 3535 if (error) { 3536 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); 3537 return error; 3538 } 3539 3540 if (txr->hn_oactive) 3541 return 0; 3542 3543 if (txr->hn_sched_tx) 3544 goto do_sched; 3545 3546 if (mtx_trylock(&txr->hn_tx_lock)) { 3547 int sched; 3548 3549 sched = hn_xmit(txr, txr->hn_direct_tx_size); 3550 mtx_unlock(&txr->hn_tx_lock); 3551 if (!sched) 3552 return 0; 3553 } 3554 do_sched: 3555 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); 3556 return 0; 3557 } 3558 3559 static void 3560 hn_tx_ring_qflush(struct hn_tx_ring *txr) 3561 { 3562 struct mbuf *m; 3563 3564 mtx_lock(&txr->hn_tx_lock); 3565 while ((m = buf_ring_dequeue_sc(txr->hn_mbuf_br)) != NULL) 3566 m_freem(m); 3567 mtx_unlock(&txr->hn_tx_lock); 3568 } 3569 3570 static void 3571 hn_xmit_qflush(struct ifnet *ifp) 3572 { 3573 struct hn_softc *sc = ifp->if_softc; 3574 int i; 3575 3576 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) 3577 hn_tx_ring_qflush(&sc->hn_tx_ring[i]); 3578 if_qflush(ifp); 3579 } 3580 3581 static void 3582 hn_xmit_txeof(struct hn_tx_ring *txr) 3583 { 3584 3585 if (txr->hn_sched_tx) 3586 goto do_sched; 3587 3588 if (mtx_trylock(&txr->hn_tx_lock)) { 3589 int sched; 3590 3591 txr->hn_oactive = 0; 3592 sched = hn_xmit(txr, txr->hn_direct_tx_size); 3593 mtx_unlock(&txr->hn_tx_lock); 3594 if (sched) { 3595 taskqueue_enqueue(txr->hn_tx_taskq, 3596 &txr->hn_tx_task); 3597 } 3598 } else { 3599 do_sched: 3600 /* 3601 * Release the oactive earlier, with the hope, that 3602 * others could catch up. The task will clear the 3603 * oactive again with the hn_tx_lock to avoid possible 3604 * races. 3605 */ 3606 txr->hn_oactive = 0; 3607 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); 3608 } 3609 } 3610 3611 static void 3612 hn_xmit_taskfunc(void *xtxr, int pending __unused) 3613 { 3614 struct hn_tx_ring *txr = xtxr; 3615 3616 mtx_lock(&txr->hn_tx_lock); 3617 hn_xmit(txr, 0); 3618 mtx_unlock(&txr->hn_tx_lock); 3619 } 3620 3621 static void 3622 hn_xmit_txeof_taskfunc(void *xtxr, int pending __unused) 3623 { 3624 struct hn_tx_ring *txr = xtxr; 3625 3626 mtx_lock(&txr->hn_tx_lock); 3627 txr->hn_oactive = 0; 3628 hn_xmit(txr, 0); 3629 mtx_unlock(&txr->hn_tx_lock); 3630 } 3631 3632 static int 3633 hn_chan_attach(struct hn_softc *sc, struct vmbus_channel *chan) 3634 { 3635 struct vmbus_chan_br cbr; 3636 struct hn_rx_ring *rxr; 3637 struct hn_tx_ring *txr = NULL; 3638 int idx, error; 3639 3640 idx = vmbus_chan_subidx(chan); 3641 3642 /* 3643 * Link this channel to RX/TX ring. 3644 */ 3645 KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse, 3646 ("invalid channel index %d, should > 0 && < %d", 3647 idx, sc->hn_rx_ring_inuse)); 3648 rxr = &sc->hn_rx_ring[idx]; 3649 KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED) == 0, 3650 ("RX ring %d already attached", idx)); 3651 rxr->hn_rx_flags |= HN_RX_FLAG_ATTACHED; 3652 3653 if (bootverbose) { 3654 if_printf(sc->hn_ifp, "link RX ring %d to chan%u\n", 3655 idx, vmbus_chan_id(chan)); 3656 } 3657 3658 if (idx < sc->hn_tx_ring_inuse) { 3659 txr = &sc->hn_tx_ring[idx]; 3660 KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED) == 0, 3661 ("TX ring %d already attached", idx)); 3662 txr->hn_tx_flags |= HN_TX_FLAG_ATTACHED; 3663 3664 txr->hn_chan = chan; 3665 if (bootverbose) { 3666 if_printf(sc->hn_ifp, "link TX ring %d to chan%u\n", 3667 idx, vmbus_chan_id(chan)); 3668 } 3669 } 3670 3671 /* Bind this channel to a proper CPU. */ 3672 vmbus_chan_cpu_set(chan, (sc->hn_cpu + idx) % mp_ncpus); 3673 3674 /* 3675 * Open this channel 3676 */ 3677 cbr.cbr = rxr->hn_br; 3678 cbr.cbr_paddr = rxr->hn_br_dma.hv_paddr; 3679 cbr.cbr_txsz = HN_TXBR_SIZE; 3680 cbr.cbr_rxsz = HN_RXBR_SIZE; 3681 error = vmbus_chan_open_br(chan, &cbr, NULL, 0, hn_chan_callback, rxr); 3682 if (error) { 3683 if_printf(sc->hn_ifp, "open chan%u failed: %d\n", 3684 vmbus_chan_id(chan), error); 3685 rxr->hn_rx_flags &= ~HN_RX_FLAG_ATTACHED; 3686 if (txr != NULL) 3687 txr->hn_tx_flags &= ~HN_TX_FLAG_ATTACHED; 3688 } 3689 return (error); 3690 } 3691 3692 static void 3693 hn_chan_detach(struct hn_softc *sc, struct vmbus_channel *chan) 3694 { 3695 struct hn_rx_ring *rxr; 3696 int idx; 3697 3698 idx = vmbus_chan_subidx(chan); 3699 3700 /* 3701 * Link this channel to RX/TX ring. 3702 */ 3703 KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse, 3704 ("invalid channel index %d, should > 0 && < %d", 3705 idx, sc->hn_rx_ring_inuse)); 3706 rxr = &sc->hn_rx_ring[idx]; 3707 KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED), 3708 ("RX ring %d is not attached", idx)); 3709 rxr->hn_rx_flags &= ~HN_RX_FLAG_ATTACHED; 3710 3711 if (idx < sc->hn_tx_ring_inuse) { 3712 struct hn_tx_ring *txr = &sc->hn_tx_ring[idx]; 3713 3714 KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED), 3715 ("TX ring %d is not attached attached", idx)); 3716 txr->hn_tx_flags &= ~HN_TX_FLAG_ATTACHED; 3717 } 3718 3719 /* 3720 * Close this channel. 3721 * 3722 * NOTE: 3723 * Channel closing does _not_ destroy the target channel. 3724 */ 3725 vmbus_chan_close(chan); 3726 } 3727 3728 static int 3729 hn_attach_subchans(struct hn_softc *sc) 3730 { 3731 struct vmbus_channel **subchans; 3732 int subchan_cnt = sc->hn_rx_ring_inuse - 1; 3733 int i, error = 0; 3734 3735 if (subchan_cnt == 0) 3736 return (0); 3737 3738 /* Attach the sub-channels. */ 3739 subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt); 3740 for (i = 0; i < subchan_cnt; ++i) { 3741 error = hn_chan_attach(sc, subchans[i]); 3742 if (error) 3743 break; 3744 } 3745 vmbus_subchan_rel(subchans, subchan_cnt); 3746 3747 if (error) { 3748 if_printf(sc->hn_ifp, "sub-channels attach failed: %d\n", error); 3749 } else { 3750 if (bootverbose) { 3751 if_printf(sc->hn_ifp, "%d sub-channels attached\n", 3752 subchan_cnt); 3753 } 3754 } 3755 return (error); 3756 } 3757 3758 static void 3759 hn_detach_allchans(struct hn_softc *sc) 3760 { 3761 struct vmbus_channel **subchans; 3762 int subchan_cnt = sc->hn_rx_ring_inuse - 1; 3763 int i; 3764 3765 if (subchan_cnt == 0) 3766 goto back; 3767 3768 /* Detach the sub-channels. */ 3769 subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt); 3770 for (i = 0; i < subchan_cnt; ++i) 3771 hn_chan_detach(sc, subchans[i]); 3772 vmbus_subchan_rel(subchans, subchan_cnt); 3773 3774 back: 3775 /* 3776 * Detach the primary channel, _after_ all sub-channels 3777 * are detached. 3778 */ 3779 hn_chan_detach(sc, sc->hn_prichan); 3780 3781 /* Wait for sub-channels to be destroyed, if any. */ 3782 vmbus_subchan_drain(sc->hn_prichan); 3783 3784 #ifdef INVARIANTS 3785 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 3786 KASSERT((sc->hn_rx_ring[i].hn_rx_flags & 3787 HN_RX_FLAG_ATTACHED) == 0, 3788 ("%dth RX ring is still attached", i)); 3789 } 3790 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 3791 KASSERT((sc->hn_tx_ring[i].hn_tx_flags & 3792 HN_TX_FLAG_ATTACHED) == 0, 3793 ("%dth TX ring is still attached", i)); 3794 } 3795 #endif 3796 } 3797 3798 static int 3799 hn_synth_alloc_subchans(struct hn_softc *sc, int *nsubch) 3800 { 3801 struct vmbus_channel **subchans; 3802 int nchan, rxr_cnt, error; 3803 3804 nchan = *nsubch + 1; 3805 if (nchan == 1) { 3806 /* 3807 * Multiple RX/TX rings are not requested. 3808 */ 3809 *nsubch = 0; 3810 return (0); 3811 } 3812 3813 /* 3814 * Query RSS capabilities, e.g. # of RX rings, and # of indirect 3815 * table entries. 3816 */ 3817 error = hn_rndis_query_rsscaps(sc, &rxr_cnt); 3818 if (error) { 3819 /* No RSS; this is benign. */ 3820 *nsubch = 0; 3821 return (0); 3822 } 3823 if (bootverbose) { 3824 if_printf(sc->hn_ifp, "RX rings offered %u, requested %d\n", 3825 rxr_cnt, nchan); 3826 } 3827 3828 if (nchan > rxr_cnt) 3829 nchan = rxr_cnt; 3830 if (nchan == 1) { 3831 if_printf(sc->hn_ifp, "only 1 channel is supported, no vRSS\n"); 3832 *nsubch = 0; 3833 return (0); 3834 } 3835 3836 /* 3837 * Allocate sub-channels from NVS. 3838 */ 3839 *nsubch = nchan - 1; 3840 error = hn_nvs_alloc_subchans(sc, nsubch); 3841 if (error || *nsubch == 0) { 3842 /* Failed to allocate sub-channels. */ 3843 *nsubch = 0; 3844 return (0); 3845 } 3846 3847 /* 3848 * Wait for all sub-channels to become ready before moving on. 3849 */ 3850 subchans = vmbus_subchan_get(sc->hn_prichan, *nsubch); 3851 vmbus_subchan_rel(subchans, *nsubch); 3852 return (0); 3853 } 3854 3855 static int 3856 hn_synth_attach(struct hn_softc *sc, int mtu) 3857 { 3858 struct ndis_rssprm_toeplitz *rss = &sc->hn_rss; 3859 int error, nsubch, nchan, i; 3860 uint32_t old_caps; 3861 3862 KASSERT((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0, 3863 ("synthetic parts were attached")); 3864 3865 /* Save capabilities for later verification. */ 3866 old_caps = sc->hn_caps; 3867 sc->hn_caps = 0; 3868 3869 /* Clear RSS stuffs. */ 3870 sc->hn_rss_ind_size = 0; 3871 sc->hn_rss_hash = 0; 3872 3873 /* 3874 * Attach the primary channel _before_ attaching NVS and RNDIS. 3875 */ 3876 error = hn_chan_attach(sc, sc->hn_prichan); 3877 if (error) 3878 return (error); 3879 3880 /* 3881 * Attach NVS. 3882 */ 3883 error = hn_nvs_attach(sc, mtu); 3884 if (error) 3885 return (error); 3886 3887 /* 3888 * Attach RNDIS _after_ NVS is attached. 3889 */ 3890 error = hn_rndis_attach(sc, mtu); 3891 if (error) 3892 return (error); 3893 3894 /* 3895 * Make sure capabilities are not changed. 3896 */ 3897 if (device_is_attached(sc->hn_dev) && old_caps != sc->hn_caps) { 3898 if_printf(sc->hn_ifp, "caps mismatch old 0x%08x, new 0x%08x\n", 3899 old_caps, sc->hn_caps); 3900 /* Restore old capabilities and abort. */ 3901 sc->hn_caps = old_caps; 3902 return ENXIO; 3903 } 3904 3905 /* 3906 * Allocate sub-channels for multi-TX/RX rings. 3907 * 3908 * NOTE: 3909 * The # of RX rings that can be used is equivalent to the # of 3910 * channels to be requested. 3911 */ 3912 nsubch = sc->hn_rx_ring_cnt - 1; 3913 error = hn_synth_alloc_subchans(sc, &nsubch); 3914 if (error) 3915 return (error); 3916 3917 nchan = nsubch + 1; 3918 if (nchan == 1) { 3919 /* Only the primary channel can be used; done */ 3920 goto back; 3921 } 3922 3923 /* 3924 * Configure RSS key and indirect table _after_ all sub-channels 3925 * are allocated. 3926 */ 3927 3928 if ((sc->hn_flags & HN_FLAG_HAS_RSSKEY) == 0) { 3929 /* 3930 * RSS key is not set yet; set it to the default RSS key. 3931 */ 3932 if (bootverbose) 3933 if_printf(sc->hn_ifp, "setup default RSS key\n"); 3934 memcpy(rss->rss_key, hn_rss_key_default, sizeof(rss->rss_key)); 3935 sc->hn_flags |= HN_FLAG_HAS_RSSKEY; 3936 } 3937 3938 if ((sc->hn_flags & HN_FLAG_HAS_RSSIND) == 0) { 3939 /* 3940 * RSS indirect table is not set yet; set it up in round- 3941 * robin fashion. 3942 */ 3943 if (bootverbose) { 3944 if_printf(sc->hn_ifp, "setup default RSS indirect " 3945 "table\n"); 3946 } 3947 for (i = 0; i < NDIS_HASH_INDCNT; ++i) 3948 rss->rss_ind[i] = i % nchan; 3949 sc->hn_flags |= HN_FLAG_HAS_RSSIND; 3950 } else { 3951 /* 3952 * # of usable channels may be changed, so we have to 3953 * make sure that all entries in RSS indirect table 3954 * are valid. 3955 */ 3956 hn_rss_ind_fixup(sc, nchan); 3957 } 3958 3959 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE); 3960 if (error) { 3961 /* 3962 * Failed to configure RSS key or indirect table; only 3963 * the primary channel can be used. 3964 */ 3965 nchan = 1; 3966 } 3967 back: 3968 /* 3969 * Set the # of TX/RX rings that could be used according to 3970 * the # of channels that NVS offered. 3971 */ 3972 hn_set_ring_inuse(sc, nchan); 3973 3974 /* 3975 * Attach the sub-channels, if any. 3976 */ 3977 error = hn_attach_subchans(sc); 3978 if (error) 3979 return (error); 3980 3981 sc->hn_flags |= HN_FLAG_SYNTH_ATTACHED; 3982 return (0); 3983 } 3984 3985 /* 3986 * NOTE: 3987 * The interface must have been suspended though hn_suspend(), before 3988 * this function get called. 3989 */ 3990 static void 3991 hn_synth_detach(struct hn_softc *sc) 3992 { 3993 HN_LOCK_ASSERT(sc); 3994 3995 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, 3996 ("synthetic parts were not attached")); 3997 3998 /* Detach the RNDIS first. */ 3999 hn_rndis_detach(sc); 4000 4001 /* Detach NVS. */ 4002 hn_nvs_detach(sc); 4003 4004 /* Detach all of the channels. */ 4005 hn_detach_allchans(sc); 4006 4007 sc->hn_flags &= ~HN_FLAG_SYNTH_ATTACHED; 4008 } 4009 4010 static void 4011 hn_set_ring_inuse(struct hn_softc *sc, int ring_cnt) 4012 { 4013 KASSERT(ring_cnt > 0 && ring_cnt <= sc->hn_rx_ring_cnt, 4014 ("invalid ring count %d", ring_cnt)); 4015 4016 if (sc->hn_tx_ring_cnt > ring_cnt) 4017 sc->hn_tx_ring_inuse = ring_cnt; 4018 else 4019 sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt; 4020 sc->hn_rx_ring_inuse = ring_cnt; 4021 4022 if (bootverbose) { 4023 if_printf(sc->hn_ifp, "%d TX ring, %d RX ring\n", 4024 sc->hn_tx_ring_inuse, sc->hn_rx_ring_inuse); 4025 } 4026 } 4027 4028 static void 4029 hn_chan_drain(struct vmbus_channel *chan) 4030 { 4031 4032 while (!vmbus_chan_rx_empty(chan) || !vmbus_chan_tx_empty(chan)) 4033 pause("waitch", 1); 4034 vmbus_chan_intr_drain(chan); 4035 } 4036 4037 static void 4038 hn_suspend_data(struct hn_softc *sc) 4039 { 4040 struct vmbus_channel **subch = NULL; 4041 int i, nsubch; 4042 4043 HN_LOCK_ASSERT(sc); 4044 4045 /* 4046 * Suspend TX. 4047 */ 4048 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { 4049 struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; 4050 4051 mtx_lock(&txr->hn_tx_lock); 4052 txr->hn_suspended = 1; 4053 mtx_unlock(&txr->hn_tx_lock); 4054 /* No one is able send more packets now. */ 4055 4056 /* Wait for all pending sends to finish. */ 4057 while (hn_tx_ring_pending(txr)) 4058 pause("hnwtx", 1 /* 1 tick */); 4059 4060 taskqueue_drain(txr->hn_tx_taskq, &txr->hn_tx_task); 4061 taskqueue_drain(txr->hn_tx_taskq, &txr->hn_txeof_task); 4062 } 4063 4064 /* 4065 * Disable RX by clearing RX filter. 4066 */ 4067 sc->hn_rx_filter = NDIS_PACKET_TYPE_NONE; 4068 hn_rndis_set_rxfilter(sc, sc->hn_rx_filter); 4069 4070 /* 4071 * Give RNDIS enough time to flush all pending data packets. 4072 */ 4073 pause("waitrx", (200 * hz) / 1000); 4074 4075 /* 4076 * Drain RX/TX bufrings and interrupts. 4077 */ 4078 nsubch = sc->hn_rx_ring_inuse - 1; 4079 if (nsubch > 0) 4080 subch = vmbus_subchan_get(sc->hn_prichan, nsubch); 4081 4082 if (subch != NULL) { 4083 for (i = 0; i < nsubch; ++i) 4084 hn_chan_drain(subch[i]); 4085 } 4086 hn_chan_drain(sc->hn_prichan); 4087 4088 if (subch != NULL) 4089 vmbus_subchan_rel(subch, nsubch); 4090 } 4091 4092 static void 4093 hn_suspend_mgmt_taskfunc(void *xsc, int pending __unused) 4094 { 4095 4096 ((struct hn_softc *)xsc)->hn_mgmt_taskq = NULL; 4097 } 4098 4099 static void 4100 hn_suspend_mgmt(struct hn_softc *sc) 4101 { 4102 struct task task; 4103 4104 HN_LOCK_ASSERT(sc); 4105 4106 /* 4107 * Make sure that hn_mgmt_taskq0 can nolonger be accessed 4108 * through hn_mgmt_taskq. 4109 */ 4110 TASK_INIT(&task, 0, hn_suspend_mgmt_taskfunc, sc); 4111 vmbus_chan_run_task(sc->hn_prichan, &task); 4112 4113 /* 4114 * Make sure that all pending management tasks are completed. 4115 */ 4116 taskqueue_drain(sc->hn_mgmt_taskq0, &sc->hn_netchg_init); 4117 taskqueue_drain_timeout(sc->hn_mgmt_taskq0, &sc->hn_netchg_status); 4118 taskqueue_drain_all(sc->hn_mgmt_taskq0); 4119 } 4120 4121 static void 4122 hn_suspend(struct hn_softc *sc) 4123 { 4124 4125 if (sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) 4126 hn_suspend_data(sc); 4127 hn_suspend_mgmt(sc); 4128 } 4129 4130 static void 4131 hn_resume_tx(struct hn_softc *sc, int tx_ring_cnt) 4132 { 4133 int i; 4134 4135 KASSERT(tx_ring_cnt <= sc->hn_tx_ring_cnt, 4136 ("invalid TX ring count %d", tx_ring_cnt)); 4137 4138 for (i = 0; i < tx_ring_cnt; ++i) { 4139 struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; 4140 4141 mtx_lock(&txr->hn_tx_lock); 4142 txr->hn_suspended = 0; 4143 mtx_unlock(&txr->hn_tx_lock); 4144 } 4145 } 4146 4147 static void 4148 hn_resume_data(struct hn_softc *sc) 4149 { 4150 int i; 4151 4152 HN_LOCK_ASSERT(sc); 4153 4154 /* 4155 * Re-enable RX. 4156 */ 4157 hn_set_rxfilter(sc); 4158 4159 /* 4160 * Make sure to clear suspend status on "all" TX rings, 4161 * since hn_tx_ring_inuse can be changed after 4162 * hn_suspend_data(). 4163 */ 4164 hn_resume_tx(sc, sc->hn_tx_ring_cnt); 4165 4166 #ifdef HN_IFSTART_SUPPORT 4167 if (!hn_use_if_start) 4168 #endif 4169 { 4170 /* 4171 * Flush unused drbrs, since hn_tx_ring_inuse may be 4172 * reduced. 4173 */ 4174 for (i = sc->hn_tx_ring_inuse; i < sc->hn_tx_ring_cnt; ++i) 4175 hn_tx_ring_qflush(&sc->hn_tx_ring[i]); 4176 } 4177 4178 /* 4179 * Kick start TX. 4180 */ 4181 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { 4182 struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; 4183 4184 /* 4185 * Use txeof task, so that any pending oactive can be 4186 * cleared properly. 4187 */ 4188 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); 4189 } 4190 } 4191 4192 static void 4193 hn_resume_mgmt(struct hn_softc *sc) 4194 { 4195 4196 sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0; 4197 4198 /* 4199 * Kick off network change detection, if it was pending. 4200 * If no network change was pending, start link status 4201 * checks, which is more lightweight than network change 4202 * detection. 4203 */ 4204 if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG) 4205 hn_change_network(sc); 4206 else 4207 hn_update_link_status(sc); 4208 } 4209 4210 static void 4211 hn_resume(struct hn_softc *sc) 4212 { 4213 4214 if (sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) 4215 hn_resume_data(sc); 4216 hn_resume_mgmt(sc); 4217 } 4218 4219 static void 4220 hn_rndis_rx_status(struct hn_softc *sc, const void *data, int dlen) 4221 { 4222 const struct rndis_status_msg *msg; 4223 int ofs; 4224 4225 if (dlen < sizeof(*msg)) { 4226 if_printf(sc->hn_ifp, "invalid RNDIS status\n"); 4227 return; 4228 } 4229 msg = data; 4230 4231 switch (msg->rm_status) { 4232 case RNDIS_STATUS_MEDIA_CONNECT: 4233 case RNDIS_STATUS_MEDIA_DISCONNECT: 4234 hn_update_link_status(sc); 4235 break; 4236 4237 case RNDIS_STATUS_TASK_OFFLOAD_CURRENT_CONFIG: 4238 /* Not really useful; ignore. */ 4239 break; 4240 4241 case RNDIS_STATUS_NETWORK_CHANGE: 4242 ofs = RNDIS_STBUFOFFSET_ABS(msg->rm_stbufoffset); 4243 if (dlen < ofs + msg->rm_stbuflen || 4244 msg->rm_stbuflen < sizeof(uint32_t)) { 4245 if_printf(sc->hn_ifp, "network changed\n"); 4246 } else { 4247 uint32_t change; 4248 4249 memcpy(&change, ((const uint8_t *)msg) + ofs, 4250 sizeof(change)); 4251 if_printf(sc->hn_ifp, "network changed, change %u\n", 4252 change); 4253 } 4254 hn_change_network(sc); 4255 break; 4256 4257 default: 4258 if_printf(sc->hn_ifp, "unknown RNDIS status 0x%08x\n", 4259 msg->rm_status); 4260 break; 4261 } 4262 } 4263 4264 static int 4265 hn_rndis_rxinfo(const void *info_data, int info_dlen, struct hn_rxinfo *info) 4266 { 4267 const struct rndis_pktinfo *pi = info_data; 4268 uint32_t mask = 0; 4269 4270 while (info_dlen != 0) { 4271 const void *data; 4272 uint32_t dlen; 4273 4274 if (__predict_false(info_dlen < sizeof(*pi))) 4275 return (EINVAL); 4276 if (__predict_false(info_dlen < pi->rm_size)) 4277 return (EINVAL); 4278 info_dlen -= pi->rm_size; 4279 4280 if (__predict_false(pi->rm_size & RNDIS_PKTINFO_SIZE_ALIGNMASK)) 4281 return (EINVAL); 4282 if (__predict_false(pi->rm_size < pi->rm_pktinfooffset)) 4283 return (EINVAL); 4284 dlen = pi->rm_size - pi->rm_pktinfooffset; 4285 data = pi->rm_data; 4286 4287 switch (pi->rm_type) { 4288 case NDIS_PKTINFO_TYPE_VLAN: 4289 if (__predict_false(dlen < NDIS_VLAN_INFO_SIZE)) 4290 return (EINVAL); 4291 info->vlan_info = *((const uint32_t *)data); 4292 mask |= HN_RXINFO_VLAN; 4293 break; 4294 4295 case NDIS_PKTINFO_TYPE_CSUM: 4296 if (__predict_false(dlen < NDIS_RXCSUM_INFO_SIZE)) 4297 return (EINVAL); 4298 info->csum_info = *((const uint32_t *)data); 4299 mask |= HN_RXINFO_CSUM; 4300 break; 4301 4302 case HN_NDIS_PKTINFO_TYPE_HASHVAL: 4303 if (__predict_false(dlen < HN_NDIS_HASH_VALUE_SIZE)) 4304 return (EINVAL); 4305 info->hash_value = *((const uint32_t *)data); 4306 mask |= HN_RXINFO_HASHVAL; 4307 break; 4308 4309 case HN_NDIS_PKTINFO_TYPE_HASHINF: 4310 if (__predict_false(dlen < HN_NDIS_HASH_INFO_SIZE)) 4311 return (EINVAL); 4312 info->hash_info = *((const uint32_t *)data); 4313 mask |= HN_RXINFO_HASHINF; 4314 break; 4315 4316 default: 4317 goto next; 4318 } 4319 4320 if (mask == HN_RXINFO_ALL) { 4321 /* All found; done */ 4322 break; 4323 } 4324 next: 4325 pi = (const struct rndis_pktinfo *) 4326 ((const uint8_t *)pi + pi->rm_size); 4327 } 4328 4329 /* 4330 * Final fixup. 4331 * - If there is no hash value, invalidate the hash info. 4332 */ 4333 if ((mask & HN_RXINFO_HASHVAL) == 0) 4334 info->hash_info = HN_NDIS_HASH_INFO_INVALID; 4335 return (0); 4336 } 4337 4338 static __inline bool 4339 hn_rndis_check_overlap(int off, int len, int check_off, int check_len) 4340 { 4341 4342 if (off < check_off) { 4343 if (__predict_true(off + len <= check_off)) 4344 return (false); 4345 } else if (off > check_off) { 4346 if (__predict_true(check_off + check_len <= off)) 4347 return (false); 4348 } 4349 return (true); 4350 } 4351 4352 static void 4353 hn_rndis_rx_data(struct hn_rx_ring *rxr, const void *data, int dlen) 4354 { 4355 const struct rndis_packet_msg *pkt; 4356 struct hn_rxinfo info; 4357 int data_off, pktinfo_off, data_len, pktinfo_len; 4358 4359 /* 4360 * Check length. 4361 */ 4362 if (__predict_false(dlen < sizeof(*pkt))) { 4363 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg\n"); 4364 return; 4365 } 4366 pkt = data; 4367 4368 if (__predict_false(dlen < pkt->rm_len)) { 4369 if_printf(rxr->hn_ifp, "truncated RNDIS packet msg, " 4370 "dlen %d, msglen %u\n", dlen, pkt->rm_len); 4371 return; 4372 } 4373 if (__predict_false(pkt->rm_len < 4374 pkt->rm_datalen + pkt->rm_oobdatalen + pkt->rm_pktinfolen)) { 4375 if_printf(rxr->hn_ifp, "invalid RNDIS packet msglen, " 4376 "msglen %u, data %u, oob %u, pktinfo %u\n", 4377 pkt->rm_len, pkt->rm_datalen, pkt->rm_oobdatalen, 4378 pkt->rm_pktinfolen); 4379 return; 4380 } 4381 if (__predict_false(pkt->rm_datalen == 0)) { 4382 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, no data\n"); 4383 return; 4384 } 4385 4386 /* 4387 * Check offests. 4388 */ 4389 #define IS_OFFSET_INVALID(ofs) \ 4390 ((ofs) < RNDIS_PACKET_MSG_OFFSET_MIN || \ 4391 ((ofs) & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK)) 4392 4393 /* XXX Hyper-V does not meet data offset alignment requirement */ 4394 if (__predict_false(pkt->rm_dataoffset < RNDIS_PACKET_MSG_OFFSET_MIN)) { 4395 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 4396 "data offset %u\n", pkt->rm_dataoffset); 4397 return; 4398 } 4399 if (__predict_false(pkt->rm_oobdataoffset > 0 && 4400 IS_OFFSET_INVALID(pkt->rm_oobdataoffset))) { 4401 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 4402 "oob offset %u\n", pkt->rm_oobdataoffset); 4403 return; 4404 } 4405 if (__predict_true(pkt->rm_pktinfooffset > 0) && 4406 __predict_false(IS_OFFSET_INVALID(pkt->rm_pktinfooffset))) { 4407 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 4408 "pktinfo offset %u\n", pkt->rm_pktinfooffset); 4409 return; 4410 } 4411 4412 #undef IS_OFFSET_INVALID 4413 4414 data_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_dataoffset); 4415 data_len = pkt->rm_datalen; 4416 pktinfo_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_pktinfooffset); 4417 pktinfo_len = pkt->rm_pktinfolen; 4418 4419 /* 4420 * Check OOB coverage. 4421 */ 4422 if (__predict_false(pkt->rm_oobdatalen != 0)) { 4423 int oob_off, oob_len; 4424 4425 if_printf(rxr->hn_ifp, "got oobdata\n"); 4426 oob_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_oobdataoffset); 4427 oob_len = pkt->rm_oobdatalen; 4428 4429 if (__predict_false(oob_off + oob_len > pkt->rm_len)) { 4430 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 4431 "oob overflow, msglen %u, oob abs %d len %d\n", 4432 pkt->rm_len, oob_off, oob_len); 4433 return; 4434 } 4435 4436 /* 4437 * Check against data. 4438 */ 4439 if (hn_rndis_check_overlap(oob_off, oob_len, 4440 data_off, data_len)) { 4441 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 4442 "oob overlaps data, oob abs %d len %d, " 4443 "data abs %d len %d\n", 4444 oob_off, oob_len, data_off, data_len); 4445 return; 4446 } 4447 4448 /* 4449 * Check against pktinfo. 4450 */ 4451 if (pktinfo_len != 0 && 4452 hn_rndis_check_overlap(oob_off, oob_len, 4453 pktinfo_off, pktinfo_len)) { 4454 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 4455 "oob overlaps pktinfo, oob abs %d len %d, " 4456 "pktinfo abs %d len %d\n", 4457 oob_off, oob_len, pktinfo_off, pktinfo_len); 4458 return; 4459 } 4460 } 4461 4462 /* 4463 * Check per-packet-info coverage and find useful per-packet-info. 4464 */ 4465 info.vlan_info = HN_NDIS_VLAN_INFO_INVALID; 4466 info.csum_info = HN_NDIS_RXCSUM_INFO_INVALID; 4467 info.hash_info = HN_NDIS_HASH_INFO_INVALID; 4468 if (__predict_true(pktinfo_len != 0)) { 4469 bool overlap; 4470 int error; 4471 4472 if (__predict_false(pktinfo_off + pktinfo_len > pkt->rm_len)) { 4473 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 4474 "pktinfo overflow, msglen %u, " 4475 "pktinfo abs %d len %d\n", 4476 pkt->rm_len, pktinfo_off, pktinfo_len); 4477 return; 4478 } 4479 4480 /* 4481 * Check packet info coverage. 4482 */ 4483 overlap = hn_rndis_check_overlap(pktinfo_off, pktinfo_len, 4484 data_off, data_len); 4485 if (__predict_false(overlap)) { 4486 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 4487 "pktinfo overlap data, pktinfo abs %d len %d, " 4488 "data abs %d len %d\n", 4489 pktinfo_off, pktinfo_len, data_off, data_len); 4490 return; 4491 } 4492 4493 /* 4494 * Find useful per-packet-info. 4495 */ 4496 error = hn_rndis_rxinfo(((const uint8_t *)pkt) + pktinfo_off, 4497 pktinfo_len, &info); 4498 if (__predict_false(error)) { 4499 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg " 4500 "pktinfo\n"); 4501 return; 4502 } 4503 } 4504 4505 if (__predict_false(data_off + data_len > pkt->rm_len)) { 4506 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 4507 "data overflow, msglen %u, data abs %d len %d\n", 4508 pkt->rm_len, data_off, data_len); 4509 return; 4510 } 4511 hn_rxpkt(rxr, ((const uint8_t *)pkt) + data_off, data_len, &info); 4512 } 4513 4514 static __inline void 4515 hn_rndis_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen) 4516 { 4517 const struct rndis_msghdr *hdr; 4518 4519 if (__predict_false(dlen < sizeof(*hdr))) { 4520 if_printf(rxr->hn_ifp, "invalid RNDIS msg\n"); 4521 return; 4522 } 4523 hdr = data; 4524 4525 if (__predict_true(hdr->rm_type == REMOTE_NDIS_PACKET_MSG)) { 4526 /* Hot data path. */ 4527 hn_rndis_rx_data(rxr, data, dlen); 4528 /* Done! */ 4529 return; 4530 } 4531 4532 if (hdr->rm_type == REMOTE_NDIS_INDICATE_STATUS_MSG) 4533 hn_rndis_rx_status(rxr->hn_ifp->if_softc, data, dlen); 4534 else 4535 hn_rndis_rx_ctrl(rxr->hn_ifp->if_softc, data, dlen); 4536 } 4537 4538 static void 4539 hn_nvs_handle_notify(struct hn_softc *sc, const struct vmbus_chanpkt_hdr *pkt) 4540 { 4541 const struct hn_nvs_hdr *hdr; 4542 4543 if (VMBUS_CHANPKT_DATALEN(pkt) < sizeof(*hdr)) { 4544 if_printf(sc->hn_ifp, "invalid nvs notify\n"); 4545 return; 4546 } 4547 hdr = VMBUS_CHANPKT_CONST_DATA(pkt); 4548 4549 if (hdr->nvs_type == HN_NVS_TYPE_TXTBL_NOTE) { 4550 /* Useless; ignore */ 4551 return; 4552 } 4553 if_printf(sc->hn_ifp, "got notify, nvs type %u\n", hdr->nvs_type); 4554 } 4555 4556 static void 4557 hn_nvs_handle_comp(struct hn_softc *sc, struct vmbus_channel *chan, 4558 const struct vmbus_chanpkt_hdr *pkt) 4559 { 4560 struct hn_nvs_sendctx *sndc; 4561 4562 sndc = (struct hn_nvs_sendctx *)(uintptr_t)pkt->cph_xactid; 4563 sndc->hn_cb(sndc, sc, chan, VMBUS_CHANPKT_CONST_DATA(pkt), 4564 VMBUS_CHANPKT_DATALEN(pkt)); 4565 /* 4566 * NOTE: 4567 * 'sndc' CAN NOT be accessed anymore, since it can be freed by 4568 * its callback. 4569 */ 4570 } 4571 4572 static void 4573 hn_nvs_handle_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan, 4574 const struct vmbus_chanpkt_hdr *pkthdr) 4575 { 4576 const struct vmbus_chanpkt_rxbuf *pkt; 4577 const struct hn_nvs_hdr *nvs_hdr; 4578 int count, i, hlen; 4579 4580 if (__predict_false(VMBUS_CHANPKT_DATALEN(pkthdr) < sizeof(*nvs_hdr))) { 4581 if_printf(rxr->hn_ifp, "invalid nvs RNDIS\n"); 4582 return; 4583 } 4584 nvs_hdr = VMBUS_CHANPKT_CONST_DATA(pkthdr); 4585 4586 /* Make sure that this is a RNDIS message. */ 4587 if (__predict_false(nvs_hdr->nvs_type != HN_NVS_TYPE_RNDIS)) { 4588 if_printf(rxr->hn_ifp, "nvs type %u, not RNDIS\n", 4589 nvs_hdr->nvs_type); 4590 return; 4591 } 4592 4593 hlen = VMBUS_CHANPKT_GETLEN(pkthdr->cph_hlen); 4594 if (__predict_false(hlen < sizeof(*pkt))) { 4595 if_printf(rxr->hn_ifp, "invalid rxbuf chanpkt\n"); 4596 return; 4597 } 4598 pkt = (const struct vmbus_chanpkt_rxbuf *)pkthdr; 4599 4600 if (__predict_false(pkt->cp_rxbuf_id != HN_NVS_RXBUF_SIG)) { 4601 if_printf(rxr->hn_ifp, "invalid rxbuf_id 0x%08x\n", 4602 pkt->cp_rxbuf_id); 4603 return; 4604 } 4605 4606 count = pkt->cp_rxbuf_cnt; 4607 if (__predict_false(hlen < 4608 __offsetof(struct vmbus_chanpkt_rxbuf, cp_rxbuf[count]))) { 4609 if_printf(rxr->hn_ifp, "invalid rxbuf_cnt %d\n", count); 4610 return; 4611 } 4612 4613 /* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */ 4614 for (i = 0; i < count; ++i) { 4615 int ofs, len; 4616 4617 ofs = pkt->cp_rxbuf[i].rb_ofs; 4618 len = pkt->cp_rxbuf[i].rb_len; 4619 if (__predict_false(ofs + len > HN_RXBUF_SIZE)) { 4620 if_printf(rxr->hn_ifp, "%dth RNDIS msg overflow rxbuf, " 4621 "ofs %d, len %d\n", i, ofs, len); 4622 continue; 4623 } 4624 hn_rndis_rxpkt(rxr, rxr->hn_rxbuf + ofs, len); 4625 } 4626 4627 /* 4628 * Ack the consumed RXBUF associated w/ this channel packet, 4629 * so that this RXBUF can be recycled by the hypervisor. 4630 */ 4631 hn_nvs_ack_rxbuf(rxr, chan, pkt->cp_hdr.cph_xactid); 4632 } 4633 4634 static void 4635 hn_nvs_ack_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan, 4636 uint64_t tid) 4637 { 4638 struct hn_nvs_rndis_ack ack; 4639 int retries, error; 4640 4641 ack.nvs_type = HN_NVS_TYPE_RNDIS_ACK; 4642 ack.nvs_status = HN_NVS_STATUS_OK; 4643 4644 retries = 0; 4645 again: 4646 error = vmbus_chan_send(chan, VMBUS_CHANPKT_TYPE_COMP, 4647 VMBUS_CHANPKT_FLAG_NONE, &ack, sizeof(ack), tid); 4648 if (__predict_false(error == EAGAIN)) { 4649 /* 4650 * NOTE: 4651 * This should _not_ happen in real world, since the 4652 * consumption of the TX bufring from the TX path is 4653 * controlled. 4654 */ 4655 if (rxr->hn_ack_failed == 0) 4656 if_printf(rxr->hn_ifp, "RXBUF ack retry\n"); 4657 rxr->hn_ack_failed++; 4658 retries++; 4659 if (retries < 10) { 4660 DELAY(100); 4661 goto again; 4662 } 4663 /* RXBUF leaks! */ 4664 if_printf(rxr->hn_ifp, "RXBUF ack failed\n"); 4665 } 4666 } 4667 4668 static void 4669 hn_chan_callback(struct vmbus_channel *chan, void *xrxr) 4670 { 4671 struct hn_rx_ring *rxr = xrxr; 4672 struct hn_softc *sc = rxr->hn_ifp->if_softc; 4673 4674 for (;;) { 4675 struct vmbus_chanpkt_hdr *pkt = rxr->hn_pktbuf; 4676 int error, pktlen; 4677 4678 pktlen = rxr->hn_pktbuf_len; 4679 error = vmbus_chan_recv_pkt(chan, pkt, &pktlen); 4680 if (__predict_false(error == ENOBUFS)) { 4681 void *nbuf; 4682 int nlen; 4683 4684 /* 4685 * Expand channel packet buffer. 4686 * 4687 * XXX 4688 * Use M_WAITOK here, since allocation failure 4689 * is fatal. 4690 */ 4691 nlen = rxr->hn_pktbuf_len * 2; 4692 while (nlen < pktlen) 4693 nlen *= 2; 4694 nbuf = malloc(nlen, M_DEVBUF, M_WAITOK); 4695 4696 if_printf(rxr->hn_ifp, "expand pktbuf %d -> %d\n", 4697 rxr->hn_pktbuf_len, nlen); 4698 4699 free(rxr->hn_pktbuf, M_DEVBUF); 4700 rxr->hn_pktbuf = nbuf; 4701 rxr->hn_pktbuf_len = nlen; 4702 /* Retry! */ 4703 continue; 4704 } else if (__predict_false(error == EAGAIN)) { 4705 /* No more channel packets; done! */ 4706 break; 4707 } 4708 KASSERT(!error, ("vmbus_chan_recv_pkt failed: %d", error)); 4709 4710 switch (pkt->cph_type) { 4711 case VMBUS_CHANPKT_TYPE_COMP: 4712 hn_nvs_handle_comp(sc, chan, pkt); 4713 break; 4714 4715 case VMBUS_CHANPKT_TYPE_RXBUF: 4716 hn_nvs_handle_rxbuf(rxr, chan, pkt); 4717 break; 4718 4719 case VMBUS_CHANPKT_TYPE_INBAND: 4720 hn_nvs_handle_notify(sc, pkt); 4721 break; 4722 4723 default: 4724 if_printf(rxr->hn_ifp, "unknown chan pkt %u\n", 4725 pkt->cph_type); 4726 break; 4727 } 4728 } 4729 hn_chan_rollup(rxr, rxr->hn_txr); 4730 } 4731 4732 static void 4733 hn_tx_taskq_create(void *arg __unused) 4734 { 4735 4736 if (vm_guest != VM_GUEST_HV) 4737 return; 4738 4739 if (!hn_share_tx_taskq) 4740 return; 4741 4742 hn_tx_taskq = taskqueue_create("hn_tx", M_WAITOK, 4743 taskqueue_thread_enqueue, &hn_tx_taskq); 4744 if (hn_bind_tx_taskq >= 0) { 4745 int cpu = hn_bind_tx_taskq; 4746 cpuset_t cpu_set; 4747 4748 if (cpu > mp_ncpus - 1) 4749 cpu = mp_ncpus - 1; 4750 CPU_SETOF(cpu, &cpu_set); 4751 taskqueue_start_threads_cpuset(&hn_tx_taskq, 1, PI_NET, 4752 &cpu_set, "hn tx"); 4753 } else { 4754 taskqueue_start_threads(&hn_tx_taskq, 1, PI_NET, "hn tx"); 4755 } 4756 } 4757 SYSINIT(hn_txtq_create, SI_SUB_DRIVERS, SI_ORDER_SECOND, 4758 hn_tx_taskq_create, NULL); 4759 4760 static void 4761 hn_tx_taskq_destroy(void *arg __unused) 4762 { 4763 4764 if (hn_tx_taskq != NULL) 4765 taskqueue_free(hn_tx_taskq); 4766 } 4767 SYSUNINIT(hn_txtq_destroy, SI_SUB_DRIVERS, SI_ORDER_SECOND, 4768 hn_tx_taskq_destroy, NULL); 4769