xref: /freebsd/sys/dev/hyperv/hvsock/hv_sock.c (revision a0da2f73b6ef12db95d5391d380b0b04507b4778)
1a560f3ebSWei Hu /*-
24d846d26SWarner Losh  * SPDX-License-Identifier: BSD-2-Clause
3a560f3ebSWei Hu  *
4a560f3ebSWei Hu  * Copyright (c) 2020 Microsoft Corp.
5a560f3ebSWei Hu  * All rights reserved.
6a560f3ebSWei Hu  *
7a560f3ebSWei Hu  * Redistribution and use in source and binary forms, with or without
8a560f3ebSWei Hu  * modification, are permitted provided that the following conditions
9a560f3ebSWei Hu  * are met:
10a560f3ebSWei Hu  * 1. Redistributions of source code must retain the above copyright
11a560f3ebSWei Hu  *    notice unmodified, this list of conditions, and the following
12a560f3ebSWei Hu  *    disclaimer.
13a560f3ebSWei Hu  * 2. Redistributions in binary form must reproduce the above copyright
14a560f3ebSWei Hu  *    notice, this list of conditions and the following disclaimer in the
15a560f3ebSWei Hu  *    documentation and/or other materials provided with the distribution.
16a560f3ebSWei Hu  *
17a560f3ebSWei Hu  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18a560f3ebSWei Hu  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19a560f3ebSWei Hu  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20a560f3ebSWei Hu  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21a560f3ebSWei Hu  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22a560f3ebSWei Hu  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23a560f3ebSWei Hu  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24a560f3ebSWei Hu  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25a560f3ebSWei Hu  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26a560f3ebSWei Hu  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27a560f3ebSWei Hu  */
28a560f3ebSWei Hu 
29a560f3ebSWei Hu #include <sys/param.h>
30a560f3ebSWei Hu #include <sys/bus.h>
31a560f3ebSWei Hu #include <sys/domain.h>
32a560f3ebSWei Hu #include <sys/lock.h>
33a560f3ebSWei Hu #include <sys/kernel.h>
34a560f3ebSWei Hu #include <sys/types.h>
35a560f3ebSWei Hu #include <sys/malloc.h>
36a560f3ebSWei Hu #include <sys/module.h>
37a560f3ebSWei Hu #include <sys/mutex.h>
38a560f3ebSWei Hu #include <sys/proc.h>
39a560f3ebSWei Hu #include <sys/protosw.h>
40a560f3ebSWei Hu #include <sys/socket.h>
41a560f3ebSWei Hu #include <sys/sysctl.h>
42a560f3ebSWei Hu #include <sys/sysproto.h>
43a560f3ebSWei Hu #include <sys/systm.h>
44a560f3ebSWei Hu #include <sys/sockbuf.h>
45a560f3ebSWei Hu #include <sys/sx.h>
46a560f3ebSWei Hu #include <sys/uio.h>
47a560f3ebSWei Hu 
48a560f3ebSWei Hu #include <net/vnet.h>
49a560f3ebSWei Hu 
50a560f3ebSWei Hu #include <dev/hyperv/vmbus/vmbus_reg.h>
51a560f3ebSWei Hu 
52a560f3ebSWei Hu #include "hv_sock.h"
53a560f3ebSWei Hu 
54a560f3ebSWei Hu #define HVSOCK_DBG_NONE			0x0
55a560f3ebSWei Hu #define HVSOCK_DBG_INFO			0x1
56a560f3ebSWei Hu #define HVSOCK_DBG_ERR			0x2
57a560f3ebSWei Hu #define HVSOCK_DBG_VERBOSE		0x3
58a560f3ebSWei Hu 
59a560f3ebSWei Hu 
60a560f3ebSWei Hu SYSCTL_NODE(_net, OID_AUTO, hvsock, CTLFLAG_RD, 0, "HyperV socket");
61a560f3ebSWei Hu 
62a560f3ebSWei Hu static int hvs_dbg_level;
63a560f3ebSWei Hu SYSCTL_INT(_net_hvsock, OID_AUTO, hvs_dbg_level, CTLFLAG_RWTUN, &hvs_dbg_level,
64a560f3ebSWei Hu     0, "hyperv socket debug level: 0 = none, 1 = info, 2 = error, 3 = verbose");
65a560f3ebSWei Hu 
66a560f3ebSWei Hu 
67a560f3ebSWei Hu #define HVSOCK_DBG(level, ...) do {					\
68a560f3ebSWei Hu 	if (hvs_dbg_level >= (level))					\
69a560f3ebSWei Hu 		printf(__VA_ARGS__);					\
70a560f3ebSWei Hu 	} while (0)
71a560f3ebSWei Hu 
72a560f3ebSWei Hu MALLOC_DEFINE(M_HVSOCK, "hyperv_socket", "hyperv socket control structures");
73a560f3ebSWei Hu 
74625932c9SKyle Evans static int hvs_dom_probe(void);
75625932c9SKyle Evans 
76a560f3ebSWei Hu /* The MTU is 16KB per host side's design */
77a560f3ebSWei Hu #define HVSOCK_MTU_SIZE		(1024 * 16)
78a560f3ebSWei Hu #define HVSOCK_SEND_BUF_SZ	(PAGE_SIZE - sizeof(struct vmpipe_proto_header))
79a560f3ebSWei Hu 
80a560f3ebSWei Hu #define HVSOCK_HEADER_LEN	(sizeof(struct hvs_pkt_header))
81a560f3ebSWei Hu 
82a560f3ebSWei Hu #define HVSOCK_PKT_LEN(payload_len)	(HVSOCK_HEADER_LEN + \
83a560f3ebSWei Hu 					 roundup2(payload_len, 8) + \
84a560f3ebSWei Hu 					 sizeof(uint64_t))
85a560f3ebSWei Hu 
86a560f3ebSWei Hu /*
87a560f3ebSWei Hu  * HyperV Transport sockets
88a560f3ebSWei Hu  */
89e7d02be1SGleb Smirnoff static struct protosw hv_socket_protosw = {
90a560f3ebSWei Hu 	.pr_type =		SOCK_STREAM,
91a560f3ebSWei Hu 	.pr_protocol =		HYPERV_SOCK_PROTO_TRANS,
92a560f3ebSWei Hu 	.pr_flags =		PR_CONNREQUIRED,
93e7d02be1SGleb Smirnoff 	.pr_attach =		hvs_trans_attach,
94e7d02be1SGleb Smirnoff 	.pr_bind =		hvs_trans_bind,
95e7d02be1SGleb Smirnoff 	.pr_listen =		hvs_trans_listen,
96e7d02be1SGleb Smirnoff 	.pr_accept =		hvs_trans_accept,
97e7d02be1SGleb Smirnoff 	.pr_connect =		hvs_trans_connect,
98e7d02be1SGleb Smirnoff 	.pr_peeraddr =		hvs_trans_peeraddr,
99e7d02be1SGleb Smirnoff 	.pr_sockaddr =		hvs_trans_sockaddr,
100e7d02be1SGleb Smirnoff 	.pr_soreceive =		hvs_trans_soreceive,
101e7d02be1SGleb Smirnoff 	.pr_sosend =		hvs_trans_sosend,
102e7d02be1SGleb Smirnoff 	.pr_disconnect =	hvs_trans_disconnect,
103e7d02be1SGleb Smirnoff 	.pr_close =		hvs_trans_close,
104e7d02be1SGleb Smirnoff 	.pr_detach =		hvs_trans_detach,
105e7d02be1SGleb Smirnoff 	.pr_shutdown =		hvs_trans_shutdown,
106e7d02be1SGleb Smirnoff 	.pr_abort =		hvs_trans_abort,
107a560f3ebSWei Hu };
108a560f3ebSWei Hu 
109a560f3ebSWei Hu static struct domain		hv_socket_domain = {
110a560f3ebSWei Hu 	.dom_family =		AF_HYPERV,
111a560f3ebSWei Hu 	.dom_name =		"hyperv",
112625932c9SKyle Evans 	.dom_probe =		hvs_dom_probe,
113e7d02be1SGleb Smirnoff 	.dom_nprotosw =		1,
114e7d02be1SGleb Smirnoff 	.dom_protosw =		{ &hv_socket_protosw },
115a560f3ebSWei Hu };
116a560f3ebSWei Hu 
117644ca084SGleb Smirnoff DOMAIN_SET(hv_socket_);
118a560f3ebSWei Hu 
119a560f3ebSWei Hu #define MAX_PORT			((uint32_t)0xFFFFFFFF)
120a560f3ebSWei Hu #define MIN_PORT			((uint32_t)0x0)
121a560f3ebSWei Hu 
122a560f3ebSWei Hu /* 00000000-facb-11e6-bd58-64006a7986d3 */
123a560f3ebSWei Hu static const struct hyperv_guid srv_id_template = {
124a560f3ebSWei Hu 	.hv_guid = {
125a560f3ebSWei Hu 	    0x00, 0x00, 0x00, 0x00, 0xcb, 0xfa, 0xe6, 0x11,
126a560f3ebSWei Hu 	    0xbd, 0x58, 0x64, 0x00, 0x6a, 0x79, 0x86, 0xd3 }
127a560f3ebSWei Hu };
128a560f3ebSWei Hu 
129a560f3ebSWei Hu static int		hvsock_br_callback(void *, int, void *);
130a560f3ebSWei Hu static uint32_t		hvsock_canread_check(struct hvs_pcb *);
131a560f3ebSWei Hu static uint32_t		hvsock_canwrite_check(struct hvs_pcb *);
132a560f3ebSWei Hu static int		hvsock_send_data(struct vmbus_channel *chan,
133a560f3ebSWei Hu     struct uio *uio, uint32_t to_write, struct sockbuf *sb);
134a560f3ebSWei Hu 
135a560f3ebSWei Hu 
136a560f3ebSWei Hu 
137a560f3ebSWei Hu /* Globals */
138a560f3ebSWei Hu static struct sx		hvs_trans_socks_sx;
139a560f3ebSWei Hu static struct mtx		hvs_trans_socks_mtx;
140a560f3ebSWei Hu static LIST_HEAD(, hvs_pcb)	hvs_trans_bound_socks;
141a560f3ebSWei Hu static LIST_HEAD(, hvs_pcb)	hvs_trans_connected_socks;
142a560f3ebSWei Hu static uint32_t			previous_auto_bound_port;
143a560f3ebSWei Hu 
144a560f3ebSWei Hu static void
hvsock_print_guid(struct hyperv_guid * guid)145a560f3ebSWei Hu hvsock_print_guid(struct hyperv_guid *guid)
146a560f3ebSWei Hu {
147a560f3ebSWei Hu 	unsigned char *p = (unsigned char *)guid;
148a560f3ebSWei Hu 
149a560f3ebSWei Hu 	HVSOCK_DBG(HVSOCK_DBG_INFO,
150a560f3ebSWei Hu 	    "0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x\n",
151a560f3ebSWei Hu 	    *(unsigned int *)p,
152a560f3ebSWei Hu 	    *((unsigned short *) &p[4]),
153a560f3ebSWei Hu 	    *((unsigned short *) &p[6]),
154a560f3ebSWei Hu 	    p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]);
155a560f3ebSWei Hu }
156a560f3ebSWei Hu 
157a560f3ebSWei Hu static bool
is_valid_srv_id(const struct hyperv_guid * id)158a560f3ebSWei Hu is_valid_srv_id(const struct hyperv_guid *id)
159a560f3ebSWei Hu {
160a560f3ebSWei Hu 	return !memcmp(&id->hv_guid[4],
161a560f3ebSWei Hu 	    &srv_id_template.hv_guid[4], sizeof(struct hyperv_guid) - 4);
162a560f3ebSWei Hu }
163a560f3ebSWei Hu 
164a560f3ebSWei Hu static unsigned int
get_port_by_srv_id(const struct hyperv_guid * srv_id)165a560f3ebSWei Hu get_port_by_srv_id(const struct hyperv_guid *srv_id)
166a560f3ebSWei Hu {
167a560f3ebSWei Hu 	return *((const unsigned int *)srv_id);
168a560f3ebSWei Hu }
169a560f3ebSWei Hu 
170a560f3ebSWei Hu static void
set_port_by_srv_id(struct hyperv_guid * srv_id,unsigned int port)171a560f3ebSWei Hu set_port_by_srv_id(struct hyperv_guid *srv_id, unsigned int port)
172a560f3ebSWei Hu {
173a560f3ebSWei Hu 	*((unsigned int *)srv_id) = port;
174a560f3ebSWei Hu }
175a560f3ebSWei Hu 
176a560f3ebSWei Hu 
177a560f3ebSWei Hu static void
__hvs_remove_pcb_from_list(struct hvs_pcb * pcb,unsigned char list)178a560f3ebSWei Hu __hvs_remove_pcb_from_list(struct hvs_pcb *pcb, unsigned char list)
179a560f3ebSWei Hu {
180a560f3ebSWei Hu 	struct hvs_pcb *p = NULL;
181a560f3ebSWei Hu 
182a560f3ebSWei Hu 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: pcb is %p\n", __func__, pcb);
183a560f3ebSWei Hu 
184a560f3ebSWei Hu 	if (!pcb)
185a560f3ebSWei Hu 		return;
186a560f3ebSWei Hu 
187a560f3ebSWei Hu 	if (list & HVS_LIST_BOUND) {
188a560f3ebSWei Hu 		LIST_FOREACH(p, &hvs_trans_bound_socks, bound_next)
189a560f3ebSWei Hu 			if  (p == pcb)
190a560f3ebSWei Hu 				LIST_REMOVE(p, bound_next);
191a560f3ebSWei Hu 	}
192a560f3ebSWei Hu 
193a560f3ebSWei Hu 	if (list & HVS_LIST_CONNECTED) {
194a560f3ebSWei Hu 		LIST_FOREACH(p, &hvs_trans_connected_socks, connected_next)
195a560f3ebSWei Hu 			if (p == pcb)
196a560f3ebSWei Hu 				LIST_REMOVE(pcb, connected_next);
197a560f3ebSWei Hu 	}
198a560f3ebSWei Hu }
199a560f3ebSWei Hu 
200a560f3ebSWei Hu static void
__hvs_remove_socket_from_list(struct socket * so,unsigned char list)201a560f3ebSWei Hu __hvs_remove_socket_from_list(struct socket *so, unsigned char list)
202a560f3ebSWei Hu {
203a560f3ebSWei Hu 	struct hvs_pcb *pcb = so2hvspcb(so);
204a560f3ebSWei Hu 
205a560f3ebSWei Hu 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: pcb is %p\n", __func__, pcb);
206a560f3ebSWei Hu 
207a560f3ebSWei Hu 	__hvs_remove_pcb_from_list(pcb, list);
208a560f3ebSWei Hu }
209a560f3ebSWei Hu 
210a560f3ebSWei Hu static void
__hvs_insert_socket_on_list(struct socket * so,unsigned char list)211a560f3ebSWei Hu __hvs_insert_socket_on_list(struct socket *so, unsigned char list)
212a560f3ebSWei Hu {
213a560f3ebSWei Hu 	struct hvs_pcb *pcb = so2hvspcb(so);
214a560f3ebSWei Hu 
215a560f3ebSWei Hu 	if (list & HVS_LIST_BOUND)
216a560f3ebSWei Hu 		LIST_INSERT_HEAD(&hvs_trans_bound_socks,
217a560f3ebSWei Hu 		   pcb, bound_next);
218a560f3ebSWei Hu 
219a560f3ebSWei Hu 	if (list & HVS_LIST_CONNECTED)
220a560f3ebSWei Hu 		LIST_INSERT_HEAD(&hvs_trans_connected_socks,
221a560f3ebSWei Hu 		   pcb, connected_next);
222a560f3ebSWei Hu }
223a560f3ebSWei Hu 
224a560f3ebSWei Hu void
hvs_remove_socket_from_list(struct socket * so,unsigned char list)225a560f3ebSWei Hu hvs_remove_socket_from_list(struct socket *so, unsigned char list)
226a560f3ebSWei Hu {
227a560f3ebSWei Hu 	if (!so || !so->so_pcb) {
228a560f3ebSWei Hu 		HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
229a560f3ebSWei Hu 		    "%s: socket or so_pcb is null\n", __func__);
230a560f3ebSWei Hu 		return;
231a560f3ebSWei Hu 	}
232a560f3ebSWei Hu 
233a560f3ebSWei Hu 	mtx_lock(&hvs_trans_socks_mtx);
234a560f3ebSWei Hu 	__hvs_remove_socket_from_list(so, list);
235a560f3ebSWei Hu 	mtx_unlock(&hvs_trans_socks_mtx);
236a560f3ebSWei Hu }
237a560f3ebSWei Hu 
238a560f3ebSWei Hu static void
hvs_insert_socket_on_list(struct socket * so,unsigned char list)239a560f3ebSWei Hu hvs_insert_socket_on_list(struct socket *so, unsigned char list)
240a560f3ebSWei Hu {
241a560f3ebSWei Hu 	if (!so || !so->so_pcb) {
242a560f3ebSWei Hu 		HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
243a560f3ebSWei Hu 		    "%s: socket or so_pcb is null\n", __func__);
244a560f3ebSWei Hu 		return;
245a560f3ebSWei Hu 	}
246a560f3ebSWei Hu 
247a560f3ebSWei Hu 	mtx_lock(&hvs_trans_socks_mtx);
248a560f3ebSWei Hu 	__hvs_insert_socket_on_list(so, list);
249a560f3ebSWei Hu 	mtx_unlock(&hvs_trans_socks_mtx);
250a560f3ebSWei Hu }
251a560f3ebSWei Hu 
252a560f3ebSWei Hu static struct socket *
__hvs_find_socket_on_list(struct sockaddr_hvs * addr,unsigned char list)253a560f3ebSWei Hu __hvs_find_socket_on_list(struct sockaddr_hvs *addr, unsigned char list)
254a560f3ebSWei Hu {
255a560f3ebSWei Hu 	struct hvs_pcb *p = NULL;
256a560f3ebSWei Hu 
257a560f3ebSWei Hu 	if (list & HVS_LIST_BOUND)
258a560f3ebSWei Hu 		LIST_FOREACH(p, &hvs_trans_bound_socks, bound_next)
259a560f3ebSWei Hu 			if (p->so != NULL &&
260a560f3ebSWei Hu 			    addr->hvs_port == p->local_addr.hvs_port)
261a560f3ebSWei Hu 				return p->so;
262a560f3ebSWei Hu 
263a560f3ebSWei Hu 	if (list & HVS_LIST_CONNECTED)
264a560f3ebSWei Hu 		LIST_FOREACH(p, &hvs_trans_connected_socks, connected_next)
265a560f3ebSWei Hu 			if (p->so != NULL &&
266a560f3ebSWei Hu 			    addr->hvs_port == p->local_addr.hvs_port)
267a560f3ebSWei Hu 				return p->so;
268a560f3ebSWei Hu 
269a560f3ebSWei Hu 	return NULL;
270a560f3ebSWei Hu }
271a560f3ebSWei Hu 
272a560f3ebSWei Hu static struct socket *
hvs_find_socket_on_list(struct sockaddr_hvs * addr,unsigned char list)273a560f3ebSWei Hu hvs_find_socket_on_list(struct sockaddr_hvs *addr, unsigned char list)
274a560f3ebSWei Hu {
275a560f3ebSWei Hu 	struct socket *s = NULL;
276a560f3ebSWei Hu 
277a560f3ebSWei Hu 	mtx_lock(&hvs_trans_socks_mtx);
278a560f3ebSWei Hu 	s = __hvs_find_socket_on_list(addr, list);
279a560f3ebSWei Hu 	mtx_unlock(&hvs_trans_socks_mtx);
280a560f3ebSWei Hu 
281a560f3ebSWei Hu 	return s;
282a560f3ebSWei Hu }
283a560f3ebSWei Hu 
284a560f3ebSWei Hu static inline void
hvs_addr_set(struct sockaddr_hvs * addr,unsigned int port)285a560f3ebSWei Hu hvs_addr_set(struct sockaddr_hvs *addr, unsigned int port)
286a560f3ebSWei Hu {
287a560f3ebSWei Hu 	memset(addr, 0, sizeof(*addr));
288a560f3ebSWei Hu 	addr->sa_family = AF_HYPERV;
289f161d294SMark Johnston 	addr->sa_len = sizeof(*addr);
290a560f3ebSWei Hu 	addr->hvs_port = port;
291a560f3ebSWei Hu }
292a560f3ebSWei Hu 
293a560f3ebSWei Hu void
hvs_addr_init(struct sockaddr_hvs * addr,const struct hyperv_guid * svr_id)294a560f3ebSWei Hu hvs_addr_init(struct sockaddr_hvs *addr, const struct hyperv_guid *svr_id)
295a560f3ebSWei Hu {
296a560f3ebSWei Hu 	hvs_addr_set(addr, get_port_by_srv_id(svr_id));
297a560f3ebSWei Hu }
298a560f3ebSWei Hu 
299a560f3ebSWei Hu int
hvs_trans_lock(void)300a560f3ebSWei Hu hvs_trans_lock(void)
301a560f3ebSWei Hu {
302a560f3ebSWei Hu 	sx_xlock(&hvs_trans_socks_sx);
303a560f3ebSWei Hu 	return (0);
304a560f3ebSWei Hu }
305a560f3ebSWei Hu 
306a560f3ebSWei Hu void
hvs_trans_unlock(void)307a560f3ebSWei Hu hvs_trans_unlock(void)
308a560f3ebSWei Hu {
309a560f3ebSWei Hu 	sx_xunlock(&hvs_trans_socks_sx);
310a560f3ebSWei Hu }
311a560f3ebSWei Hu 
312625932c9SKyle Evans static int
hvs_dom_probe(void)313625932c9SKyle Evans hvs_dom_probe(void)
314625932c9SKyle Evans {
315625932c9SKyle Evans 
316625932c9SKyle Evans 	/* Don't even give us a chance to attach on non-HyperV. */
317625932c9SKyle Evans 	if (vm_guest != VM_GUEST_HV)
318625932c9SKyle Evans 		return (ENXIO);
319625932c9SKyle Evans 	return (0);
320625932c9SKyle Evans }
321625932c9SKyle Evans 
32289128ff3SGleb Smirnoff static void
hvs_trans_init(void * arg __unused)32389128ff3SGleb Smirnoff hvs_trans_init(void *arg __unused)
324a560f3ebSWei Hu {
325a560f3ebSWei Hu 
326a560f3ebSWei Hu 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
327a560f3ebSWei Hu 	    "%s: HyperV Socket hvs_trans_init called\n", __func__);
328a560f3ebSWei Hu 
329a560f3ebSWei Hu 	/* Initialize Globals */
330a560f3ebSWei Hu 	previous_auto_bound_port = MAX_PORT;
331a560f3ebSWei Hu 	sx_init(&hvs_trans_socks_sx, "hvs_trans_sock_sx");
332a560f3ebSWei Hu 	mtx_init(&hvs_trans_socks_mtx,
333a560f3ebSWei Hu 	    "hvs_trans_socks_mtx", NULL, MTX_DEF);
334a560f3ebSWei Hu 	LIST_INIT(&hvs_trans_bound_socks);
335a560f3ebSWei Hu 	LIST_INIT(&hvs_trans_connected_socks);
336a560f3ebSWei Hu }
33789128ff3SGleb Smirnoff SYSINIT(hvs_trans_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
33889128ff3SGleb Smirnoff     hvs_trans_init, NULL);
339a560f3ebSWei Hu 
340a560f3ebSWei Hu /*
341a560f3ebSWei Hu  * Called in two cases:
342a560f3ebSWei Hu  * 1) When user calls socket();
343a560f3ebSWei Hu  * 2) When we accept new incoming conneciton and call sonewconn().
344a560f3ebSWei Hu  */
345a560f3ebSWei Hu int
hvs_trans_attach(struct socket * so,int proto,struct thread * td)346a560f3ebSWei Hu hvs_trans_attach(struct socket *so, int proto, struct thread *td)
347a560f3ebSWei Hu {
348a560f3ebSWei Hu 	struct hvs_pcb *pcb = so2hvspcb(so);
349a560f3ebSWei Hu 
350a560f3ebSWei Hu 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
351a560f3ebSWei Hu 	    "%s: HyperV Socket hvs_trans_attach called\n", __func__);
352a560f3ebSWei Hu 
353a560f3ebSWei Hu 	if (so->so_type != SOCK_STREAM)
354a560f3ebSWei Hu 		return (ESOCKTNOSUPPORT);
355a560f3ebSWei Hu 
356a560f3ebSWei Hu 	if (proto != 0 && proto != HYPERV_SOCK_PROTO_TRANS)
357a560f3ebSWei Hu 		return (EPROTONOSUPPORT);
358a560f3ebSWei Hu 
359a560f3ebSWei Hu 	if (pcb != NULL)
360a560f3ebSWei Hu 		return (EISCONN);
361a560f3ebSWei Hu 	pcb = malloc(sizeof(struct hvs_pcb), M_HVSOCK, M_NOWAIT | M_ZERO);
362a560f3ebSWei Hu 	if (pcb == NULL)
363a560f3ebSWei Hu 		return (ENOMEM);
364a560f3ebSWei Hu 
365a560f3ebSWei Hu 	pcb->so = so;
366a560f3ebSWei Hu 	so->so_pcb = (void *)pcb;
367a560f3ebSWei Hu 
368a560f3ebSWei Hu 	return (0);
369a560f3ebSWei Hu }
370a560f3ebSWei Hu 
371a560f3ebSWei Hu void
hvs_trans_detach(struct socket * so)372a560f3ebSWei Hu hvs_trans_detach(struct socket *so)
373a560f3ebSWei Hu {
374a560f3ebSWei Hu 	struct hvs_pcb *pcb;
375a560f3ebSWei Hu 
376a560f3ebSWei Hu 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
377a560f3ebSWei Hu 	    "%s: HyperV Socket hvs_trans_detach called\n", __func__);
378a560f3ebSWei Hu 
379a560f3ebSWei Hu 	(void) hvs_trans_lock();
380a560f3ebSWei Hu 	pcb = so2hvspcb(so);
381a560f3ebSWei Hu 	if (pcb == NULL) {
382a560f3ebSWei Hu 		hvs_trans_unlock();
383a560f3ebSWei Hu 		return;
384a560f3ebSWei Hu 	}
385a560f3ebSWei Hu 
386a560f3ebSWei Hu 	if (SOLISTENING(so)) {
387a560f3ebSWei Hu 		bzero(pcb, sizeof(*pcb));
388a560f3ebSWei Hu 		free(pcb, M_HVSOCK);
389a560f3ebSWei Hu 	}
390a560f3ebSWei Hu 
391a560f3ebSWei Hu 	so->so_pcb = NULL;
392a560f3ebSWei Hu 
393a560f3ebSWei Hu 	hvs_trans_unlock();
394a560f3ebSWei Hu }
395a560f3ebSWei Hu 
396a560f3ebSWei Hu int
hvs_trans_bind(struct socket * so,struct sockaddr * addr,struct thread * td)397a560f3ebSWei Hu hvs_trans_bind(struct socket *so, struct sockaddr *addr, struct thread *td)
398a560f3ebSWei Hu {
399a560f3ebSWei Hu 	struct hvs_pcb *pcb = so2hvspcb(so);
400a560f3ebSWei Hu 	struct sockaddr_hvs *sa = (struct sockaddr_hvs *) addr;
401a560f3ebSWei Hu 	int error = 0;
402a560f3ebSWei Hu 
403a560f3ebSWei Hu 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
404a560f3ebSWei Hu 	    "%s: HyperV Socket hvs_trans_bind called\n", __func__);
405a560f3ebSWei Hu 
406a560f3ebSWei Hu 	if (sa == NULL) {
407a560f3ebSWei Hu 		return (EINVAL);
408a560f3ebSWei Hu 	}
409a560f3ebSWei Hu 
410a560f3ebSWei Hu 	if (pcb == NULL) {
411a560f3ebSWei Hu 		return (EINVAL);
412a560f3ebSWei Hu 	}
413a560f3ebSWei Hu 
414a560f3ebSWei Hu 	if (sa->sa_family != AF_HYPERV) {
415a560f3ebSWei Hu 		HVSOCK_DBG(HVSOCK_DBG_ERR,
416a560f3ebSWei Hu 		    "%s: Not supported, sa_family is %u\n",
417a560f3ebSWei Hu 		    __func__, sa->sa_family);
418a560f3ebSWei Hu 		return (EAFNOSUPPORT);
419a560f3ebSWei Hu 	}
420f161d294SMark Johnston 	if (sa->sa_len != sizeof(*sa)) {
421f161d294SMark Johnston 		HVSOCK_DBG(HVSOCK_DBG_ERR,
422f161d294SMark Johnston 		    "%s: Not supported, sa_len is %u\n",
423f161d294SMark Johnston 		    __func__, sa->sa_len);
424f161d294SMark Johnston 		return (EINVAL);
425f161d294SMark Johnston 	}
426a560f3ebSWei Hu 
427a560f3ebSWei Hu 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
428a560f3ebSWei Hu 	    "%s: binding port = 0x%x\n", __func__, sa->hvs_port);
429a560f3ebSWei Hu 
430a560f3ebSWei Hu 	mtx_lock(&hvs_trans_socks_mtx);
431a560f3ebSWei Hu 	if (__hvs_find_socket_on_list(sa,
432a560f3ebSWei Hu 	    HVS_LIST_BOUND | HVS_LIST_CONNECTED)) {
433a560f3ebSWei Hu 		error = EADDRINUSE;
434a560f3ebSWei Hu 	} else {
435a560f3ebSWei Hu 		/*
436a560f3ebSWei Hu 		 * The address is available for us to bind.
437a560f3ebSWei Hu 		 * Add socket to the bound list.
438a560f3ebSWei Hu 		 */
439a560f3ebSWei Hu 		hvs_addr_set(&pcb->local_addr, sa->hvs_port);
440a560f3ebSWei Hu 		hvs_addr_set(&pcb->remote_addr, HVADDR_PORT_ANY);
441a560f3ebSWei Hu 		__hvs_insert_socket_on_list(so, HVS_LIST_BOUND);
442a560f3ebSWei Hu 	}
443a560f3ebSWei Hu 	mtx_unlock(&hvs_trans_socks_mtx);
444a560f3ebSWei Hu 
445a560f3ebSWei Hu 	return (error);
446a560f3ebSWei Hu }
447a560f3ebSWei Hu 
448a560f3ebSWei Hu int
hvs_trans_listen(struct socket * so,int backlog,struct thread * td)449a560f3ebSWei Hu hvs_trans_listen(struct socket *so, int backlog, struct thread *td)
450a560f3ebSWei Hu {
451a560f3ebSWei Hu 	struct hvs_pcb *pcb = so2hvspcb(so);
452a560f3ebSWei Hu 	struct socket *bound_so;
453a560f3ebSWei Hu 	int error;
454a560f3ebSWei Hu 
455a560f3ebSWei Hu 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
456a560f3ebSWei Hu 	    "%s: HyperV Socket hvs_trans_listen called\n", __func__);
457a560f3ebSWei Hu 
458a560f3ebSWei Hu 	if (pcb == NULL)
459a560f3ebSWei Hu 		return (EINVAL);
460a560f3ebSWei Hu 
461a560f3ebSWei Hu 	/* Check if the address is already bound and it was by us. */
462a560f3ebSWei Hu 	bound_so = hvs_find_socket_on_list(&pcb->local_addr, HVS_LIST_BOUND);
463a560f3ebSWei Hu 	if (bound_so == NULL || bound_so != so) {
464a560f3ebSWei Hu 		HVSOCK_DBG(HVSOCK_DBG_ERR,
465a560f3ebSWei Hu 		    "%s: Address not bound or not by us.\n", __func__);
466a560f3ebSWei Hu 		return (EADDRNOTAVAIL);
467a560f3ebSWei Hu 	}
468a560f3ebSWei Hu 
469a560f3ebSWei Hu 	SOCK_LOCK(so);
470a560f3ebSWei Hu 	error = solisten_proto_check(so);
471a560f3ebSWei Hu 	if (error == 0)
472a560f3ebSWei Hu 		solisten_proto(so, backlog);
473a560f3ebSWei Hu 	SOCK_UNLOCK(so);
474a560f3ebSWei Hu 
475a560f3ebSWei Hu 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
476a560f3ebSWei Hu 	    "%s: HyperV Socket listen error = %d\n", __func__, error);
477a560f3ebSWei Hu 	return (error);
478a560f3ebSWei Hu }
479a560f3ebSWei Hu 
480a560f3ebSWei Hu int
hvs_trans_accept(struct socket * so,struct sockaddr * sa)481cfb1e929SGleb Smirnoff hvs_trans_accept(struct socket *so, struct sockaddr *sa)
482a560f3ebSWei Hu {
483a560f3ebSWei Hu 	struct hvs_pcb *pcb = so2hvspcb(so);
484a560f3ebSWei Hu 
485a560f3ebSWei Hu 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
486a560f3ebSWei Hu 	    "%s: HyperV Socket hvs_trans_accept called\n", __func__);
487a560f3ebSWei Hu 
488a560f3ebSWei Hu 	if (pcb == NULL)
489a560f3ebSWei Hu 		return (EINVAL);
490a560f3ebSWei Hu 
491cfb1e929SGleb Smirnoff 	memcpy(sa, &pcb->remote_addr, pcb->remote_addr.sa_len);
492a560f3ebSWei Hu 
493cfb1e929SGleb Smirnoff 	return (0);
494a560f3ebSWei Hu }
495a560f3ebSWei Hu 
496a560f3ebSWei Hu int
hvs_trans_connect(struct socket * so,struct sockaddr * nam,struct thread * td)497a560f3ebSWei Hu hvs_trans_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
498a560f3ebSWei Hu {
499a560f3ebSWei Hu 	struct hvs_pcb *pcb = so2hvspcb(so);
500a560f3ebSWei Hu 	struct sockaddr_hvs *raddr = (struct sockaddr_hvs *)nam;
501a560f3ebSWei Hu 	bool found_auto_bound_port = false;
502a560f3ebSWei Hu 	int i, error = 0;
503a560f3ebSWei Hu 
504a560f3ebSWei Hu 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
505a560f3ebSWei Hu 	    "%s: HyperV Socket hvs_trans_connect called, remote port is %x\n",
506a560f3ebSWei Hu 	    __func__, raddr->hvs_port);
507a560f3ebSWei Hu 
508a560f3ebSWei Hu 	if (pcb == NULL)
509a560f3ebSWei Hu 		return (EINVAL);
510a560f3ebSWei Hu 
511a560f3ebSWei Hu 	/* Verify the remote address */
512a560f3ebSWei Hu 	if (raddr == NULL)
513a560f3ebSWei Hu 		return (EINVAL);
514a560f3ebSWei Hu 	if (raddr->sa_family != AF_HYPERV)
515a560f3ebSWei Hu 		return (EAFNOSUPPORT);
516f161d294SMark Johnston 	if (raddr->sa_len != sizeof(*raddr))
517f161d294SMark Johnston 		return (EINVAL);
518a560f3ebSWei Hu 
519a560f3ebSWei Hu 	mtx_lock(&hvs_trans_socks_mtx);
520a560f3ebSWei Hu 	if (so->so_state &
521a560f3ebSWei Hu 	    (SS_ISCONNECTED|SS_ISDISCONNECTING|SS_ISCONNECTING)) {
522a560f3ebSWei Hu 			HVSOCK_DBG(HVSOCK_DBG_ERR,
523a560f3ebSWei Hu 			    "%s: socket connect in progress\n",
524a560f3ebSWei Hu 			    __func__);
525a560f3ebSWei Hu 			error = EINPROGRESS;
526a560f3ebSWei Hu 			goto out;
527a560f3ebSWei Hu 	}
528a560f3ebSWei Hu 
529a560f3ebSWei Hu 	/*
530a560f3ebSWei Hu 	 * Find an available port for us to auto bind the local
531a560f3ebSWei Hu 	 * address.
532a560f3ebSWei Hu 	 */
533a560f3ebSWei Hu 	hvs_addr_set(&pcb->local_addr, 0);
534a560f3ebSWei Hu 
535a560f3ebSWei Hu 	for (i = previous_auto_bound_port - 1;
536a560f3ebSWei Hu 	    i != previous_auto_bound_port; i --) {
537a560f3ebSWei Hu 		if (i == MIN_PORT)
538a560f3ebSWei Hu 			i = MAX_PORT;
539a560f3ebSWei Hu 
540a560f3ebSWei Hu 		pcb->local_addr.hvs_port = i;
541a560f3ebSWei Hu 
542a560f3ebSWei Hu 		if (__hvs_find_socket_on_list(&pcb->local_addr,
543a560f3ebSWei Hu 		    HVS_LIST_BOUND | HVS_LIST_CONNECTED) == NULL) {
544a560f3ebSWei Hu 			found_auto_bound_port = true;
545a560f3ebSWei Hu 			previous_auto_bound_port = i;
546a560f3ebSWei Hu 			HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
547a560f3ebSWei Hu 			    "%s: found local bound port is %x\n",
548a560f3ebSWei Hu 			    __func__, pcb->local_addr.hvs_port);
549a560f3ebSWei Hu 			break;
550a560f3ebSWei Hu 		}
551a560f3ebSWei Hu 	}
552a560f3ebSWei Hu 
553a560f3ebSWei Hu 	if (found_auto_bound_port == true) {
554a560f3ebSWei Hu 		/* Found available port for auto bound, put on list */
555a560f3ebSWei Hu 		__hvs_insert_socket_on_list(so, HVS_LIST_BOUND);
556a560f3ebSWei Hu 		/* Set VM service ID */
557a560f3ebSWei Hu 		pcb->vm_srv_id = srv_id_template;
558a560f3ebSWei Hu 		set_port_by_srv_id(&pcb->vm_srv_id, pcb->local_addr.hvs_port);
559a560f3ebSWei Hu 		/* Set host service ID and remote port */
560a560f3ebSWei Hu 		pcb->host_srv_id = srv_id_template;
561a560f3ebSWei Hu 		set_port_by_srv_id(&pcb->host_srv_id, raddr->hvs_port);
562a560f3ebSWei Hu 		hvs_addr_set(&pcb->remote_addr, raddr->hvs_port);
563a560f3ebSWei Hu 
564a560f3ebSWei Hu 		/* Change the socket state to SS_ISCONNECTING */
565a560f3ebSWei Hu 		soisconnecting(so);
566a560f3ebSWei Hu 	} else {
567a560f3ebSWei Hu 		HVSOCK_DBG(HVSOCK_DBG_ERR,
568a560f3ebSWei Hu 		    "%s: No local port available for auto bound\n",
569a560f3ebSWei Hu 		    __func__);
570a560f3ebSWei Hu 		error = EADDRINUSE;
571a560f3ebSWei Hu 	}
572a560f3ebSWei Hu 
573a560f3ebSWei Hu 	HVSOCK_DBG(HVSOCK_DBG_INFO, "Connect vm_srv_id is ");
574a560f3ebSWei Hu 	hvsock_print_guid(&pcb->vm_srv_id);
575a560f3ebSWei Hu 	HVSOCK_DBG(HVSOCK_DBG_INFO, "Connect host_srv_id is ");
576a560f3ebSWei Hu 	hvsock_print_guid(&pcb->host_srv_id);
577a560f3ebSWei Hu 
578a560f3ebSWei Hu out:
579a560f3ebSWei Hu 	mtx_unlock(&hvs_trans_socks_mtx);
580a560f3ebSWei Hu 
581a560f3ebSWei Hu 	if (found_auto_bound_port == true)
582a560f3ebSWei Hu 		 vmbus_req_tl_connect(&pcb->vm_srv_id, &pcb->host_srv_id);
583a560f3ebSWei Hu 
584a560f3ebSWei Hu 	return (error);
585a560f3ebSWei Hu }
586a560f3ebSWei Hu 
587a560f3ebSWei Hu int
hvs_trans_disconnect(struct socket * so)588a560f3ebSWei Hu hvs_trans_disconnect(struct socket *so)
589a560f3ebSWei Hu {
590a560f3ebSWei Hu 	struct hvs_pcb *pcb;
591a560f3ebSWei Hu 
592a560f3ebSWei Hu 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
593a560f3ebSWei Hu 	    "%s: HyperV Socket hvs_trans_disconnect called\n", __func__);
594a560f3ebSWei Hu 
595a560f3ebSWei Hu 	(void) hvs_trans_lock();
596a560f3ebSWei Hu 	pcb = so2hvspcb(so);
597a560f3ebSWei Hu 	if (pcb == NULL) {
598a560f3ebSWei Hu 		hvs_trans_unlock();
599a560f3ebSWei Hu 		return (EINVAL);
600a560f3ebSWei Hu 	}
601a560f3ebSWei Hu 
602a560f3ebSWei Hu 	/* If socket is already disconnected, skip this */
603a560f3ebSWei Hu 	if ((so->so_state & SS_ISDISCONNECTED) == 0)
604a560f3ebSWei Hu 		soisdisconnecting(so);
605a560f3ebSWei Hu 
606a560f3ebSWei Hu 	hvs_trans_unlock();
607a560f3ebSWei Hu 
608a560f3ebSWei Hu 	return (0);
609a560f3ebSWei Hu }
610a560f3ebSWei Hu 
611a560f3ebSWei Hu struct hvs_callback_arg {
612a560f3ebSWei Hu 	struct uio *uio;
613a560f3ebSWei Hu 	struct sockbuf *sb;
614a560f3ebSWei Hu };
615a560f3ebSWei Hu 
616a560f3ebSWei Hu int
hvs_trans_soreceive(struct socket * so,struct sockaddr ** paddr,struct uio * uio,struct mbuf ** mp0,struct mbuf ** controlp,int * flagsp)617a560f3ebSWei Hu hvs_trans_soreceive(struct socket *so, struct sockaddr **paddr,
618a560f3ebSWei Hu     struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
619a560f3ebSWei Hu {
620a560f3ebSWei Hu 	struct hvs_pcb *pcb = so2hvspcb(so);
621a560f3ebSWei Hu 	struct sockbuf *sb;
622a560f3ebSWei Hu 	ssize_t orig_resid;
623a560f3ebSWei Hu 	uint32_t canread, to_read;
624a560f3ebSWei Hu 	int flags, error = 0;
625a560f3ebSWei Hu 	struct hvs_callback_arg cbarg;
626a560f3ebSWei Hu 
627a560f3ebSWei Hu 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
628a560f3ebSWei Hu 	    "%s: HyperV Socket hvs_trans_soreceive called\n", __func__);
629a560f3ebSWei Hu 
630a560f3ebSWei Hu 	if (so->so_type != SOCK_STREAM)
631a560f3ebSWei Hu 		return (EINVAL);
632a560f3ebSWei Hu 	if (pcb == NULL)
633a560f3ebSWei Hu 		return (EINVAL);
634a560f3ebSWei Hu 
635a560f3ebSWei Hu 	if (flagsp != NULL)
636a560f3ebSWei Hu 		flags = *flagsp &~ MSG_EOR;
637a560f3ebSWei Hu 	else
638a560f3ebSWei Hu 		flags = 0;
639a560f3ebSWei Hu 
640a560f3ebSWei Hu 	if (flags & MSG_PEEK)
641a560f3ebSWei Hu 		return (EOPNOTSUPP);
642a560f3ebSWei Hu 
643a560f3ebSWei Hu 	/* If no space to copy out anything */
644a560f3ebSWei Hu 	if (uio->uio_resid == 0 || uio->uio_rw != UIO_READ)
645a560f3ebSWei Hu 		return (EINVAL);
646a560f3ebSWei Hu 
647a560f3ebSWei Hu 	orig_resid = uio->uio_resid;
648a560f3ebSWei Hu 
649a560f3ebSWei Hu 	/* Prevent other readers from entering the socket. */
650f94acf52SMark Johnston 	error = SOCK_IO_RECV_LOCK(so, SBLOCKWAIT(flags));
651a560f3ebSWei Hu 	if (error) {
652a560f3ebSWei Hu 		HVSOCK_DBG(HVSOCK_DBG_ERR,
653f94acf52SMark Johnston 		    "%s: soiolock returned error = %d\n", __func__, error);
654a560f3ebSWei Hu 		return (error);
655a560f3ebSWei Hu 	}
656a560f3ebSWei Hu 
657f94acf52SMark Johnston 	sb = &so->so_rcv;
658a560f3ebSWei Hu 	SOCKBUF_LOCK(sb);
659a560f3ebSWei Hu 
660a560f3ebSWei Hu 	cbarg.uio = uio;
661a560f3ebSWei Hu 	cbarg.sb = sb;
662a560f3ebSWei Hu 	/*
663a560f3ebSWei Hu 	 * If the socket is closing, there might still be some data
664a560f3ebSWei Hu 	 * in rx br to read. However we need to make sure
665a560f3ebSWei Hu 	 * the channel is still open.
666a560f3ebSWei Hu 	 */
667a560f3ebSWei Hu 	if ((sb->sb_state & SBS_CANTRCVMORE) &&
668a560f3ebSWei Hu 	    (so->so_state & SS_ISDISCONNECTED)) {
669a560f3ebSWei Hu 		/* Other thread already closed the channel */
670a560f3ebSWei Hu 		error = EPIPE;
671a560f3ebSWei Hu 		goto out;
672a560f3ebSWei Hu 	}
673a560f3ebSWei Hu 
674a560f3ebSWei Hu 	while (true) {
675a560f3ebSWei Hu 		while (uio->uio_resid > 0 &&
676a560f3ebSWei Hu 		    (canread = hvsock_canread_check(pcb)) > 0) {
677a560f3ebSWei Hu 			to_read = MIN(canread, uio->uio_resid);
678a560f3ebSWei Hu 			HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
679a560f3ebSWei Hu 			    "%s: to_read = %u, skip = %u\n", __func__, to_read,
680a560f3ebSWei Hu 			    (unsigned int)(sizeof(struct hvs_pkt_header) +
681a560f3ebSWei Hu 			    pcb->recv_data_off));
682a560f3ebSWei Hu 
683a560f3ebSWei Hu 			error = vmbus_chan_recv_peek_call(pcb->chan, to_read,
684a560f3ebSWei Hu 			    sizeof(struct hvs_pkt_header) + pcb->recv_data_off,
685a560f3ebSWei Hu 			    hvsock_br_callback, (void *)&cbarg);
686a560f3ebSWei Hu 			/*
687a560f3ebSWei Hu 			 * It is possible socket is disconnected becasue
688a560f3ebSWei Hu 			 * we released lock in hvsock_br_callback. So we
689a560f3ebSWei Hu 			 * need to check the state to make sure it is not
690a560f3ebSWei Hu 			 * disconnected.
691a560f3ebSWei Hu 			 */
692a560f3ebSWei Hu 			if (error || so->so_state & SS_ISDISCONNECTED) {
693a560f3ebSWei Hu 				break;
694a560f3ebSWei Hu 			}
695a560f3ebSWei Hu 
696a560f3ebSWei Hu 			pcb->recv_data_len -= to_read;
697a560f3ebSWei Hu 			pcb->recv_data_off += to_read;
698a560f3ebSWei Hu 		}
699a560f3ebSWei Hu 
700a560f3ebSWei Hu 		if (error)
701a560f3ebSWei Hu 			break;
702a560f3ebSWei Hu 
703a560f3ebSWei Hu 		/* Abort if socket has reported problems. */
704a560f3ebSWei Hu 		if (so->so_error) {
705a560f3ebSWei Hu 			if (so->so_error == ESHUTDOWN &&
706a560f3ebSWei Hu 			    orig_resid > uio->uio_resid) {
707a560f3ebSWei Hu 				/*
708a560f3ebSWei Hu 				 * Although we got a FIN, we also received
709a560f3ebSWei Hu 				 * some data in this round. Delivery it
710a560f3ebSWei Hu 				 * to user.
711a560f3ebSWei Hu 				 */
712a560f3ebSWei Hu 				error = 0;
713a560f3ebSWei Hu 			} else {
714a560f3ebSWei Hu 				if (so->so_error != ESHUTDOWN)
715a560f3ebSWei Hu 					error = so->so_error;
716a560f3ebSWei Hu 			}
717a560f3ebSWei Hu 
718a560f3ebSWei Hu 			break;
719a560f3ebSWei Hu 		}
720a560f3ebSWei Hu 
721a560f3ebSWei Hu 		/* Cannot received more. */
722a560f3ebSWei Hu 		if (sb->sb_state & SBS_CANTRCVMORE)
723a560f3ebSWei Hu 			break;
724a560f3ebSWei Hu 
725a560f3ebSWei Hu 		/* We are done if buffer has been filled */
726a560f3ebSWei Hu 		if (uio->uio_resid == 0)
727a560f3ebSWei Hu 			break;
728a560f3ebSWei Hu 
729a560f3ebSWei Hu 		if (!(flags & MSG_WAITALL) && orig_resid > uio->uio_resid)
730a560f3ebSWei Hu 			break;
731a560f3ebSWei Hu 
732a560f3ebSWei Hu 		/* Buffer ring is empty and we shall not block */
733a560f3ebSWei Hu 		if ((so->so_state & SS_NBIO) ||
734a560f3ebSWei Hu 		    (flags & (MSG_DONTWAIT|MSG_NBIO))) {
735a560f3ebSWei Hu 			if (orig_resid == uio->uio_resid) {
736a560f3ebSWei Hu 				/* We have not read anything */
737a560f3ebSWei Hu 				error = EAGAIN;
738a560f3ebSWei Hu 			}
739a560f3ebSWei Hu 			HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
740a560f3ebSWei Hu 			    "%s: non blocked read return, error %d.\n",
741a560f3ebSWei Hu 			    __func__, error);
742a560f3ebSWei Hu 			break;
743a560f3ebSWei Hu 		}
744a560f3ebSWei Hu 
745a560f3ebSWei Hu 		/*
746a560f3ebSWei Hu 		 * Wait and block until (more) data comes in.
747a560f3ebSWei Hu 		 * Note: Drops the sockbuf lock during wait.
748a560f3ebSWei Hu 		 */
74943283184SGleb Smirnoff 		error = sbwait(so, SO_RCV);
750a560f3ebSWei Hu 
751a560f3ebSWei Hu 		if (error)
752a560f3ebSWei Hu 			break;
753a560f3ebSWei Hu 
754a560f3ebSWei Hu 		HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
755a560f3ebSWei Hu 		    "%s: wake up from sbwait, read available is %u\n",
756a560f3ebSWei Hu 		    __func__, vmbus_chan_read_available(pcb->chan));
757a560f3ebSWei Hu 	}
758a560f3ebSWei Hu 
759a560f3ebSWei Hu out:
760a560f3ebSWei Hu 	SOCKBUF_UNLOCK(sb);
761f94acf52SMark Johnston 	SOCK_IO_RECV_UNLOCK(so);
762a560f3ebSWei Hu 
7636dc7bf0cSGordon Bergling 	/* We received a FIN in this call */
764a560f3ebSWei Hu 	if (so->so_error == ESHUTDOWN) {
765a560f3ebSWei Hu 		if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
766a560f3ebSWei Hu 			/* Send has already closed */
767a560f3ebSWei Hu 			soisdisconnecting(so);
768a560f3ebSWei Hu 		} else {
769a560f3ebSWei Hu 			/* Just close the receive side */
770a560f3ebSWei Hu 			socantrcvmore(so);
771a560f3ebSWei Hu 		}
772a560f3ebSWei Hu 	}
773a560f3ebSWei Hu 
774a560f3ebSWei Hu 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
775a560f3ebSWei Hu 	    "%s: returning error = %d, so_error = %d\n",
776a560f3ebSWei Hu 	    __func__, error, so->so_error);
777a560f3ebSWei Hu 
778a560f3ebSWei Hu 	return (error);
779a560f3ebSWei Hu }
780a560f3ebSWei Hu 
781a560f3ebSWei Hu int
hvs_trans_sosend(struct socket * so,struct sockaddr * addr,struct uio * uio,struct mbuf * top,struct mbuf * controlp,int flags,struct thread * td)782a560f3ebSWei Hu hvs_trans_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
783a560f3ebSWei Hu     struct mbuf *top, struct mbuf *controlp, int flags, struct thread *td)
784a560f3ebSWei Hu {
785a560f3ebSWei Hu 	struct hvs_pcb *pcb = so2hvspcb(so);
786a560f3ebSWei Hu 	struct sockbuf *sb;
787a560f3ebSWei Hu 	ssize_t orig_resid;
788a560f3ebSWei Hu 	uint32_t canwrite, to_write;
789a560f3ebSWei Hu 	int error = 0;
790a560f3ebSWei Hu 
791a560f3ebSWei Hu 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
792db7ec3c3SLi-Wen Hsu 	    "%s: HyperV Socket hvs_trans_sosend called, uio_resid = %zd\n",
793a560f3ebSWei Hu 	    __func__, uio->uio_resid);
794a560f3ebSWei Hu 
795a560f3ebSWei Hu 	if (so->so_type != SOCK_STREAM)
796a560f3ebSWei Hu 		return (EINVAL);
797a560f3ebSWei Hu 	if (pcb == NULL)
798a560f3ebSWei Hu 		return (EINVAL);
799a560f3ebSWei Hu 
800a560f3ebSWei Hu 	/* If nothing to send */
801a560f3ebSWei Hu 	if (uio->uio_resid == 0 || uio->uio_rw != UIO_WRITE)
802a560f3ebSWei Hu 		return (EINVAL);
803a560f3ebSWei Hu 
804a560f3ebSWei Hu 	orig_resid = uio->uio_resid;
805a560f3ebSWei Hu 
806a560f3ebSWei Hu 	/* Prevent other writers from entering the socket. */
807f94acf52SMark Johnston 	error = SOCK_IO_SEND_LOCK(so, SBLOCKWAIT(flags));
808a560f3ebSWei Hu 	if (error) {
809a560f3ebSWei Hu 		HVSOCK_DBG(HVSOCK_DBG_ERR,
810f94acf52SMark Johnston 		    "%s: soiolocak returned error = %d\n", __func__, error);
811a560f3ebSWei Hu 		return (error);
812a560f3ebSWei Hu 	}
813a560f3ebSWei Hu 
814f94acf52SMark Johnston 	sb = &so->so_snd;
815a560f3ebSWei Hu 	SOCKBUF_LOCK(sb);
816a560f3ebSWei Hu 
817a560f3ebSWei Hu 	if ((sb->sb_state & SBS_CANTSENDMORE) ||
818a560f3ebSWei Hu 	    so->so_error == ESHUTDOWN) {
819a560f3ebSWei Hu 		error = EPIPE;
820a560f3ebSWei Hu 		goto out;
821a560f3ebSWei Hu 	}
822a560f3ebSWei Hu 
823a560f3ebSWei Hu 	while (uio->uio_resid > 0) {
824a560f3ebSWei Hu 		canwrite = hvsock_canwrite_check(pcb);
825a560f3ebSWei Hu 		if (canwrite == 0) {
826a560f3ebSWei Hu 			/* We have sent some data */
827a560f3ebSWei Hu 			if (orig_resid > uio->uio_resid)
828a560f3ebSWei Hu 				break;
829a560f3ebSWei Hu 			/*
830a560f3ebSWei Hu 			 * We have not sent any data and it is
831a560f3ebSWei Hu 			 * non-blocked io
832a560f3ebSWei Hu 			 */
833a560f3ebSWei Hu 			if (so->so_state & SS_NBIO ||
834a560f3ebSWei Hu 			    (flags & (MSG_NBIO | MSG_DONTWAIT)) != 0) {
835a560f3ebSWei Hu 				error = EWOULDBLOCK;
836a560f3ebSWei Hu 				break;
837a560f3ebSWei Hu 			} else {
838a560f3ebSWei Hu 				/*
839a560f3ebSWei Hu 				 * We are here because there is no space on
840a560f3ebSWei Hu 				 * send buffer ring. Signal the other side
841a560f3ebSWei Hu 				 * to read and free more space.
842a560f3ebSWei Hu 				 * Sleep wait until space avaiable to send
843a560f3ebSWei Hu 				 * Note: Drops the sockbuf lock during wait.
844a560f3ebSWei Hu 				 */
84543283184SGleb Smirnoff 				error = sbwait(so, SO_SND);
846a560f3ebSWei Hu 
847a560f3ebSWei Hu 				if (error)
848a560f3ebSWei Hu 					break;
849a560f3ebSWei Hu 
850a560f3ebSWei Hu 				HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
851a560f3ebSWei Hu 				    "%s: wake up from sbwait, space avail on "
852a560f3ebSWei Hu 				    "tx ring is %u\n",
853a560f3ebSWei Hu 				    __func__,
854a560f3ebSWei Hu 				    vmbus_chan_write_available(pcb->chan));
855a560f3ebSWei Hu 
856a560f3ebSWei Hu 				continue;
857a560f3ebSWei Hu 			}
858a560f3ebSWei Hu 		}
859a560f3ebSWei Hu 		to_write = MIN(canwrite, uio->uio_resid);
860a560f3ebSWei Hu 		to_write = MIN(to_write, HVSOCK_SEND_BUF_SZ);
861a560f3ebSWei Hu 
862a560f3ebSWei Hu 		HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
863a560f3ebSWei Hu 		    "%s: canwrite is %u, to_write = %u\n", __func__,
864a560f3ebSWei Hu 		    canwrite, to_write);
865a560f3ebSWei Hu 		error = hvsock_send_data(pcb->chan, uio, to_write, sb);
866a560f3ebSWei Hu 
867a560f3ebSWei Hu 		if (error)
868a560f3ebSWei Hu 			break;
869a560f3ebSWei Hu 	}
870a560f3ebSWei Hu 
871a560f3ebSWei Hu out:
872a560f3ebSWei Hu 	SOCKBUF_UNLOCK(sb);
873f94acf52SMark Johnston 	SOCK_IO_SEND_UNLOCK(so);
874a560f3ebSWei Hu 
875a560f3ebSWei Hu 	return (error);
876a560f3ebSWei Hu }
877a560f3ebSWei Hu 
878a560f3ebSWei Hu int
hvs_trans_peeraddr(struct socket * so,struct sockaddr * sa)8790fac350cSGleb Smirnoff hvs_trans_peeraddr(struct socket *so, struct sockaddr *sa)
880a560f3ebSWei Hu {
881a560f3ebSWei Hu 	struct hvs_pcb *pcb = so2hvspcb(so);
882a560f3ebSWei Hu 
883a560f3ebSWei Hu 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
884a560f3ebSWei Hu 	    "%s: HyperV Socket hvs_trans_peeraddr called\n", __func__);
885a560f3ebSWei Hu 
886a560f3ebSWei Hu 	if (pcb == NULL)
887a560f3ebSWei Hu 		return (EINVAL);
888a560f3ebSWei Hu 
8890fac350cSGleb Smirnoff 	memcpy(sa, &pcb->remote_addr, pcb->remote_addr.sa_len);
890a560f3ebSWei Hu 
8910fac350cSGleb Smirnoff 	return (0);
892a560f3ebSWei Hu }
893a560f3ebSWei Hu 
894a560f3ebSWei Hu int
hvs_trans_sockaddr(struct socket * so,struct sockaddr * sa)8950fac350cSGleb Smirnoff hvs_trans_sockaddr(struct socket *so, struct sockaddr *sa)
896a560f3ebSWei Hu {
897a560f3ebSWei Hu 	struct hvs_pcb *pcb = so2hvspcb(so);
898a560f3ebSWei Hu 
899a560f3ebSWei Hu 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
900a560f3ebSWei Hu 	    "%s: HyperV Socket hvs_trans_sockaddr called\n", __func__);
901a560f3ebSWei Hu 
902a560f3ebSWei Hu 	if (pcb == NULL)
903a560f3ebSWei Hu 		return (EINVAL);
904a560f3ebSWei Hu 
9050fac350cSGleb Smirnoff 	memcpy(sa, &pcb->local_addr, pcb->local_addr.sa_len);
906a560f3ebSWei Hu 
9070fac350cSGleb Smirnoff 	return (0);
908a560f3ebSWei Hu }
909a560f3ebSWei Hu 
910a560f3ebSWei Hu void
hvs_trans_close(struct socket * so)911a560f3ebSWei Hu hvs_trans_close(struct socket *so)
912a560f3ebSWei Hu {
913a560f3ebSWei Hu 	struct hvs_pcb *pcb;
914a560f3ebSWei Hu 
915a560f3ebSWei Hu 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
916a560f3ebSWei Hu 	    "%s: HyperV Socket hvs_trans_close called\n", __func__);
917a560f3ebSWei Hu 
918a560f3ebSWei Hu 	(void) hvs_trans_lock();
919a560f3ebSWei Hu 	pcb = so2hvspcb(so);
920a560f3ebSWei Hu 	if (!pcb) {
921a560f3ebSWei Hu 		hvs_trans_unlock();
922a560f3ebSWei Hu 		return;
923a560f3ebSWei Hu 	}
924a560f3ebSWei Hu 
925a560f3ebSWei Hu 	if (so->so_state & SS_ISCONNECTED) {
926a560f3ebSWei Hu 		/* Send a FIN to peer */
927a560f3ebSWei Hu 		HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
928a560f3ebSWei Hu 		    "%s: hvs_trans_close sending a FIN to host\n", __func__);
929a560f3ebSWei Hu 		(void) hvsock_send_data(pcb->chan, NULL, 0, NULL);
930a560f3ebSWei Hu 	}
931a560f3ebSWei Hu 
932a560f3ebSWei Hu 	if (so->so_state &
933a560f3ebSWei Hu 	    (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING))
934a560f3ebSWei Hu 		soisdisconnected(so);
935a560f3ebSWei Hu 
936a560f3ebSWei Hu 	pcb->chan = NULL;
937a560f3ebSWei Hu 	pcb->so = NULL;
938a560f3ebSWei Hu 
939a560f3ebSWei Hu 	if (SOLISTENING(so)) {
940a560f3ebSWei Hu 		mtx_lock(&hvs_trans_socks_mtx);
941a560f3ebSWei Hu 		/* Remove from bound list */
942a560f3ebSWei Hu 		__hvs_remove_socket_from_list(so, HVS_LIST_BOUND);
943a560f3ebSWei Hu 		mtx_unlock(&hvs_trans_socks_mtx);
944a560f3ebSWei Hu 	}
945a560f3ebSWei Hu 
946a560f3ebSWei Hu 	hvs_trans_unlock();
947a560f3ebSWei Hu 
948a560f3ebSWei Hu 	return;
949a560f3ebSWei Hu }
950a560f3ebSWei Hu 
951a560f3ebSWei Hu void
hvs_trans_abort(struct socket * so)952a560f3ebSWei Hu hvs_trans_abort(struct socket *so)
953a560f3ebSWei Hu {
954a560f3ebSWei Hu 	struct hvs_pcb *pcb = so2hvspcb(so);
955a560f3ebSWei Hu 
956a560f3ebSWei Hu 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
957a560f3ebSWei Hu 	    "%s: HyperV Socket hvs_trans_abort called\n", __func__);
958a560f3ebSWei Hu 
959a560f3ebSWei Hu 	(void) hvs_trans_lock();
960a560f3ebSWei Hu 	if (pcb == NULL) {
961a560f3ebSWei Hu 		hvs_trans_unlock();
962a560f3ebSWei Hu 		return;
963a560f3ebSWei Hu 	}
964a560f3ebSWei Hu 
965a560f3ebSWei Hu 	if (SOLISTENING(so)) {
966a560f3ebSWei Hu 		mtx_lock(&hvs_trans_socks_mtx);
967a560f3ebSWei Hu 		/* Remove from bound list */
968a560f3ebSWei Hu 		__hvs_remove_socket_from_list(so, HVS_LIST_BOUND);
969a560f3ebSWei Hu 		mtx_unlock(&hvs_trans_socks_mtx);
970a560f3ebSWei Hu 	}
971a560f3ebSWei Hu 
972a560f3ebSWei Hu 	if (so->so_state & SS_ISCONNECTED) {
973a560f3ebSWei Hu 		(void) sodisconnect(so);
974a560f3ebSWei Hu 	}
975a560f3ebSWei Hu 	hvs_trans_unlock();
976a560f3ebSWei Hu 
977a560f3ebSWei Hu 	return;
978a560f3ebSWei Hu }
979a560f3ebSWei Hu 
980a560f3ebSWei Hu int
hvs_trans_shutdown(struct socket * so,enum shutdown_how how)9815bba2728SGleb Smirnoff hvs_trans_shutdown(struct socket *so, enum shutdown_how how)
982a560f3ebSWei Hu {
983a560f3ebSWei Hu 	struct hvs_pcb *pcb = so2hvspcb(so);
984a560f3ebSWei Hu 
985a560f3ebSWei Hu 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
986a560f3ebSWei Hu 	    "%s: HyperV Socket hvs_trans_shutdown called\n", __func__);
987a560f3ebSWei Hu 
9885bba2728SGleb Smirnoff 	SOCK_LOCK(so);
9895bba2728SGleb Smirnoff 	if ((so->so_state &
9905bba2728SGleb Smirnoff 	    (SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) == 0) {
9915bba2728SGleb Smirnoff 		SOCK_UNLOCK(so);
9925bba2728SGleb Smirnoff 		return (ENOTCONN);
9935bba2728SGleb Smirnoff 	}
9945bba2728SGleb Smirnoff 	SOCK_UNLOCK(so);
9955bba2728SGleb Smirnoff 
996a560f3ebSWei Hu 	if (pcb == NULL)
997a560f3ebSWei Hu 		return (EINVAL);
998a560f3ebSWei Hu 
9995bba2728SGleb Smirnoff 	switch (how) {
10005bba2728SGleb Smirnoff 	case SHUT_RD:
10015bba2728SGleb Smirnoff 		socantrcvmore(so);
10025bba2728SGleb Smirnoff 		break;
10035bba2728SGleb Smirnoff 	case SHUT_RDWR:
10045bba2728SGleb Smirnoff 		socantrcvmore(so);
1005a560f3ebSWei Hu 		if (so->so_state & SS_ISCONNECTED) {
1006a560f3ebSWei Hu 			/* Send a FIN to peer */
10075bba2728SGleb Smirnoff 			SOCK_SENDBUF_LOCK(so);
10085bba2728SGleb Smirnoff 			(void) hvsock_send_data(pcb->chan, NULL, 0,
10095bba2728SGleb Smirnoff 			    &so->so_snd);
10105bba2728SGleb Smirnoff 			SOCK_SENDBUF_UNLOCK(so);
1011a560f3ebSWei Hu 			soisdisconnecting(so);
1012a560f3ebSWei Hu 		}
10135bba2728SGleb Smirnoff 		/* FALLTHROUGH */
10145bba2728SGleb Smirnoff 	case SHUT_WR:
10155bba2728SGleb Smirnoff 		socantsendmore(so);
1016a560f3ebSWei Hu 	}
10175bba2728SGleb Smirnoff 	wakeup(&so->so_timeo);
1018a560f3ebSWei Hu 
1019a560f3ebSWei Hu 	return (0);
1020a560f3ebSWei Hu }
1021a560f3ebSWei Hu 
1022a560f3ebSWei Hu /* In the VM, we support Hyper-V Sockets with AF_HYPERV, and the endpoint is
1023a560f3ebSWei Hu  * <port> (see struct sockaddr_hvs).
1024a560f3ebSWei Hu  *
1025a560f3ebSWei Hu  * On the host, Hyper-V Sockets are supported by Winsock AF_HYPERV:
1026a560f3ebSWei Hu  * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/user-
1027a560f3ebSWei Hu  * guide/make-integration-service, and the endpoint is <VmID, ServiceId> with
1028a560f3ebSWei Hu  * the below sockaddr:
1029a560f3ebSWei Hu  *
1030a560f3ebSWei Hu  * struct SOCKADDR_HV
1031a560f3ebSWei Hu  * {
1032a560f3ebSWei Hu  *    ADDRESS_FAMILY Family;
1033a560f3ebSWei Hu  *    USHORT Reserved;
1034a560f3ebSWei Hu  *    GUID VmId;
1035a560f3ebSWei Hu  *    GUID ServiceId;
1036a560f3ebSWei Hu  * };
1037a560f3ebSWei Hu  * Note: VmID is not used by FreeBSD VM and actually it isn't transmitted via
1038a560f3ebSWei Hu  * VMBus, because here it's obvious the host and the VM can easily identify
1039a560f3ebSWei Hu  * each other. Though the VmID is useful on the host, especially in the case
1040a560f3ebSWei Hu  * of Windows container, FreeBSD VM doesn't need it at all.
1041a560f3ebSWei Hu  *
1042a560f3ebSWei Hu  * To be compatible with similar infrastructure in Linux VMs, we have
1043a560f3ebSWei Hu  * to limit the available GUID space of SOCKADDR_HV so that we can create
1044a560f3ebSWei Hu  * a mapping between FreeBSD AF_HYPERV port and SOCKADDR_HV Service GUID.
1045a560f3ebSWei Hu  * The rule of writing Hyper-V Sockets apps on the host and in FreeBSD VM is:
1046a560f3ebSWei Hu  *
1047a560f3ebSWei Hu  ****************************************************************************
1048a560f3ebSWei Hu  * The only valid Service GUIDs, from the perspectives of both the host and *
1049a560f3ebSWei Hu  * FreeBSD VM, that can be connected by the other end, must conform to this *
1050a560f3ebSWei Hu  * format: <port>-facb-11e6-bd58-64006a7986d3.                              *
1051a560f3ebSWei Hu  ****************************************************************************
1052a560f3ebSWei Hu  *
1053a560f3ebSWei Hu  * When we write apps on the host to connect(), the GUID ServiceID is used.
1054a560f3ebSWei Hu  * When we write apps in FreeBSD VM to connect(), we only need to specify the
1055a560f3ebSWei Hu  * port and the driver will form the GUID and use that to request the host.
1056a560f3ebSWei Hu  *
1057a560f3ebSWei Hu  * From the perspective of FreeBSD VM, the remote ephemeral port (i.e. the
1058a560f3ebSWei Hu  * auto-generated remote port for a connect request initiated by the host's
1059a560f3ebSWei Hu  * connect()) is set to HVADDR_PORT_UNKNOWN, which is not realy used on the
1060a560f3ebSWei Hu  * FreeBSD guest.
1061a560f3ebSWei Hu  */
1062a560f3ebSWei Hu 
1063a560f3ebSWei Hu /*
1064a560f3ebSWei Hu  * Older HyperV hosts (vmbus version 'VMBUS_VERSION_WIN10' or before)
1065a560f3ebSWei Hu  * restricts HyperV socket ring buffer size to six 4K pages. Newer
1066a560f3ebSWei Hu  * HyperV hosts doen't have this limit.
1067a560f3ebSWei Hu  */
1068a560f3ebSWei Hu #define HVS_RINGBUF_RCV_SIZE	(PAGE_SIZE * 6)
1069a560f3ebSWei Hu #define HVS_RINGBUF_SND_SIZE	(PAGE_SIZE * 6)
1070a560f3ebSWei Hu #define HVS_RINGBUF_MAX_SIZE	(PAGE_SIZE * 64)
1071a560f3ebSWei Hu 
1072a560f3ebSWei Hu struct hvsock_sc {
1073a560f3ebSWei Hu 	device_t		dev;
1074a560f3ebSWei Hu 	struct hvs_pcb		*pcb;
1075a560f3ebSWei Hu 	struct vmbus_channel	*channel;
1076a560f3ebSWei Hu };
1077a560f3ebSWei Hu 
1078a560f3ebSWei Hu static bool
hvsock_chan_readable(struct vmbus_channel * chan)1079a560f3ebSWei Hu hvsock_chan_readable(struct vmbus_channel *chan)
1080a560f3ebSWei Hu {
1081a560f3ebSWei Hu 	uint32_t readable = vmbus_chan_read_available(chan);
1082a560f3ebSWei Hu 
1083a560f3ebSWei Hu 	return (readable >= HVSOCK_PKT_LEN(0));
1084a560f3ebSWei Hu }
1085a560f3ebSWei Hu 
1086a560f3ebSWei Hu static void
hvsock_chan_cb(struct vmbus_channel * chan,void * context)1087a560f3ebSWei Hu hvsock_chan_cb(struct vmbus_channel *chan, void *context)
1088a560f3ebSWei Hu {
1089a560f3ebSWei Hu 	struct hvs_pcb *pcb = (struct hvs_pcb *) context;
1090a560f3ebSWei Hu 	struct socket *so;
1091a560f3ebSWei Hu 	uint32_t canwrite;
1092a560f3ebSWei Hu 
1093a560f3ebSWei Hu 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
1094a560f3ebSWei Hu 	    "%s: host send us a wakeup on rb data, pcb = %p\n",
1095a560f3ebSWei Hu 	    __func__, pcb);
1096a560f3ebSWei Hu 
1097a560f3ebSWei Hu 	/*
1098a560f3ebSWei Hu 	 * Check if the socket is still attached and valid.
1099a560f3ebSWei Hu 	 * Here we know channel is still open. Need to make
1100a560f3ebSWei Hu 	 * sure the socket has not been closed or freed.
1101a560f3ebSWei Hu 	 */
1102a560f3ebSWei Hu 	(void) hvs_trans_lock();
1103a560f3ebSWei Hu 	so = hsvpcb2so(pcb);
1104a560f3ebSWei Hu 
1105a560f3ebSWei Hu 	if (pcb->chan != NULL && so != NULL) {
1106a560f3ebSWei Hu 		/*
1107a560f3ebSWei Hu 		 * Wake up reader if there are data to read.
1108a560f3ebSWei Hu 		 */
1109a560f3ebSWei Hu 		SOCKBUF_LOCK(&(so)->so_rcv);
1110a560f3ebSWei Hu 
1111a560f3ebSWei Hu 		HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
1112a560f3ebSWei Hu 		    "%s: read available = %u\n", __func__,
1113a560f3ebSWei Hu 		    vmbus_chan_read_available(pcb->chan));
1114a560f3ebSWei Hu 
1115a560f3ebSWei Hu 		if (hvsock_chan_readable(pcb->chan))
1116a560f3ebSWei Hu 			sorwakeup_locked(so);
1117a560f3ebSWei Hu 		else
1118a560f3ebSWei Hu 			SOCKBUF_UNLOCK(&(so)->so_rcv);
1119a560f3ebSWei Hu 
1120a560f3ebSWei Hu 		/*
1121a560f3ebSWei Hu 		 * Wake up sender if space becomes available to write.
1122a560f3ebSWei Hu 		 */
1123a560f3ebSWei Hu 		SOCKBUF_LOCK(&(so)->so_snd);
1124a560f3ebSWei Hu 		canwrite = hvsock_canwrite_check(pcb);
1125a560f3ebSWei Hu 
1126a560f3ebSWei Hu 		HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
1127a560f3ebSWei Hu 		    "%s: canwrite = %u\n", __func__, canwrite);
1128a560f3ebSWei Hu 
1129a560f3ebSWei Hu 		if (canwrite > 0) {
1130a560f3ebSWei Hu 			sowwakeup_locked(so);
1131a560f3ebSWei Hu 		} else {
1132a560f3ebSWei Hu 			SOCKBUF_UNLOCK(&(so)->so_snd);
1133a560f3ebSWei Hu 		}
1134a560f3ebSWei Hu 	}
1135a560f3ebSWei Hu 
1136a560f3ebSWei Hu 	hvs_trans_unlock();
1137a560f3ebSWei Hu 
1138a560f3ebSWei Hu 	return;
1139a560f3ebSWei Hu }
1140a560f3ebSWei Hu 
1141a560f3ebSWei Hu static int
hvsock_br_callback(void * datap,int cplen,void * cbarg)1142a560f3ebSWei Hu hvsock_br_callback(void *datap, int cplen, void *cbarg)
1143a560f3ebSWei Hu {
1144a560f3ebSWei Hu 	struct hvs_callback_arg *arg = (struct hvs_callback_arg *)cbarg;
1145a560f3ebSWei Hu 	struct uio *uio = arg->uio;
1146a560f3ebSWei Hu 	struct sockbuf *sb = arg->sb;
1147a560f3ebSWei Hu 	int error = 0;
1148a560f3ebSWei Hu 
1149a560f3ebSWei Hu 	if (cbarg == NULL || datap == NULL)
1150a560f3ebSWei Hu 		return (EINVAL);
1151a560f3ebSWei Hu 
1152a560f3ebSWei Hu 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
1153db7ec3c3SLi-Wen Hsu 	    "%s: called, uio_rw = %s, uio_resid = %zd, cplen = %u, "
1154a560f3ebSWei Hu 	    "datap = %p\n",
1155a560f3ebSWei Hu 	    __func__, (uio->uio_rw == UIO_READ) ? "read from br":"write to br",
1156a560f3ebSWei Hu 	    uio->uio_resid, cplen, datap);
1157a560f3ebSWei Hu 
1158a560f3ebSWei Hu 	if (sb)
1159a560f3ebSWei Hu 		SOCKBUF_UNLOCK(sb);
1160a560f3ebSWei Hu 
1161a560f3ebSWei Hu 	error = uiomove(datap, cplen, uio);
1162a560f3ebSWei Hu 
1163a560f3ebSWei Hu 	if (sb)
1164a560f3ebSWei Hu 		SOCKBUF_LOCK(sb);
1165a560f3ebSWei Hu 
1166a560f3ebSWei Hu 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
1167db7ec3c3SLi-Wen Hsu 	    "%s: after uiomove, uio_resid = %zd, error = %d\n",
1168a560f3ebSWei Hu 	    __func__, uio->uio_resid, error);
1169a560f3ebSWei Hu 
1170a560f3ebSWei Hu 	return (error);
1171a560f3ebSWei Hu }
1172a560f3ebSWei Hu 
1173a560f3ebSWei Hu static int
hvsock_send_data(struct vmbus_channel * chan,struct uio * uio,uint32_t to_write,struct sockbuf * sb)1174a560f3ebSWei Hu hvsock_send_data(struct vmbus_channel *chan, struct uio *uio,
1175a560f3ebSWei Hu     uint32_t to_write, struct sockbuf *sb)
1176a560f3ebSWei Hu {
1177a560f3ebSWei Hu 	struct hvs_pkt_header hvs_pkt;
1178a560f3ebSWei Hu 	int hvs_pkthlen, hvs_pktlen, pad_pktlen, hlen, error = 0;
1179a560f3ebSWei Hu 	uint64_t pad = 0;
1180a560f3ebSWei Hu 	struct iovec iov[3];
1181a560f3ebSWei Hu 	struct hvs_callback_arg cbarg;
1182a560f3ebSWei Hu 
1183a560f3ebSWei Hu 	if (chan == NULL)
1184a560f3ebSWei Hu 		return (ENOTCONN);
1185a560f3ebSWei Hu 
1186a560f3ebSWei Hu 	hlen = sizeof(struct vmbus_chanpkt_hdr);
1187a560f3ebSWei Hu 	hvs_pkthlen = sizeof(struct hvs_pkt_header);
1188a560f3ebSWei Hu 	hvs_pktlen = hvs_pkthlen + to_write;
1189a560f3ebSWei Hu 	pad_pktlen = VMBUS_CHANPKT_TOTLEN(hvs_pktlen);
1190a560f3ebSWei Hu 
1191a560f3ebSWei Hu 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
1192a560f3ebSWei Hu 	    "%s: hlen = %u, hvs_pkthlen = %u, hvs_pktlen = %u, "
1193a560f3ebSWei Hu 	    "pad_pktlen = %u, data_len = %u\n",
1194a560f3ebSWei Hu 	    __func__, hlen, hvs_pkthlen, hvs_pktlen, pad_pktlen, to_write);
1195a560f3ebSWei Hu 
1196a560f3ebSWei Hu 	hvs_pkt.chan_pkt_hdr.cph_type = VMBUS_CHANPKT_TYPE_INBAND;
1197a560f3ebSWei Hu 	hvs_pkt.chan_pkt_hdr.cph_flags = 0;
1198a560f3ebSWei Hu 	VMBUS_CHANPKT_SETLEN(hvs_pkt.chan_pkt_hdr.cph_hlen, hlen);
1199a560f3ebSWei Hu 	VMBUS_CHANPKT_SETLEN(hvs_pkt.chan_pkt_hdr.cph_tlen, pad_pktlen);
1200a560f3ebSWei Hu 	hvs_pkt.chan_pkt_hdr.cph_xactid = 0;
1201a560f3ebSWei Hu 
1202a560f3ebSWei Hu 	hvs_pkt.vmpipe_pkt_hdr.vmpipe_pkt_type = 1;
1203a560f3ebSWei Hu 	hvs_pkt.vmpipe_pkt_hdr.vmpipe_data_size = to_write;
1204a560f3ebSWei Hu 
1205a560f3ebSWei Hu 	cbarg.uio = uio;
1206a560f3ebSWei Hu 	cbarg.sb = sb;
1207a560f3ebSWei Hu 
1208a560f3ebSWei Hu 	if (uio && to_write > 0) {
1209a560f3ebSWei Hu 		iov[0].iov_base = &hvs_pkt;
1210a560f3ebSWei Hu 		iov[0].iov_len = hvs_pkthlen;
1211a560f3ebSWei Hu 		iov[1].iov_base = NULL;
1212a560f3ebSWei Hu 		iov[1].iov_len = to_write;
1213a560f3ebSWei Hu 		iov[2].iov_base = &pad;
1214a560f3ebSWei Hu 		iov[2].iov_len = pad_pktlen - hvs_pktlen;
1215a560f3ebSWei Hu 
1216a560f3ebSWei Hu 		error = vmbus_chan_iov_send(chan, iov, 3,
1217a560f3ebSWei Hu 		    hvsock_br_callback, &cbarg);
1218a560f3ebSWei Hu 	} else {
1219a560f3ebSWei Hu 		if (to_write == 0) {
1220a560f3ebSWei Hu 			iov[0].iov_base = &hvs_pkt;
1221a560f3ebSWei Hu 			iov[0].iov_len = hvs_pkthlen;
1222a560f3ebSWei Hu 			iov[1].iov_base = &pad;
1223a560f3ebSWei Hu 			iov[1].iov_len = pad_pktlen - hvs_pktlen;
1224a560f3ebSWei Hu 			error = vmbus_chan_iov_send(chan, iov, 2, NULL, NULL);
1225a560f3ebSWei Hu 		}
1226a560f3ebSWei Hu 	}
1227a560f3ebSWei Hu 
1228a560f3ebSWei Hu 	if (error) {
1229a560f3ebSWei Hu 		HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
1230a560f3ebSWei Hu 		    "%s: error = %d\n", __func__, error);
1231a560f3ebSWei Hu 	}
1232a560f3ebSWei Hu 
1233a560f3ebSWei Hu 	return (error);
1234a560f3ebSWei Hu }
1235a560f3ebSWei Hu 
1236a560f3ebSWei Hu /*
1237a560f3ebSWei Hu  * Check if we have data on current ring buffer to read
1238a560f3ebSWei Hu  * or not. If not, advance the ring buffer read index to
1239a560f3ebSWei Hu  * next packet. Update the recev_data_len and recev_data_off
1240a560f3ebSWei Hu  * to new value.
1241a560f3ebSWei Hu  * Return the number of bytes can read.
1242a560f3ebSWei Hu  */
1243a560f3ebSWei Hu static uint32_t
hvsock_canread_check(struct hvs_pcb * pcb)1244a560f3ebSWei Hu hvsock_canread_check(struct hvs_pcb *pcb)
1245a560f3ebSWei Hu {
1246a560f3ebSWei Hu 	uint32_t advance;
1247a560f3ebSWei Hu 	uint32_t tlen, hlen, dlen;
1248a560f3ebSWei Hu 	uint32_t bytes_canread = 0;
1249a560f3ebSWei Hu 	int error;
1250a560f3ebSWei Hu 
1251a560f3ebSWei Hu 	if (pcb == NULL || pcb->chan == NULL) {
1252a560f3ebSWei Hu 		pcb->so->so_error = EIO;
1253a560f3ebSWei Hu 		return (0);
1254a560f3ebSWei Hu 	}
1255a560f3ebSWei Hu 
1256a560f3ebSWei Hu 	/* Still have data not read yet on current packet */
1257a560f3ebSWei Hu 	if (pcb->recv_data_len > 0)
1258a560f3ebSWei Hu 		return (pcb->recv_data_len);
1259a560f3ebSWei Hu 
1260a560f3ebSWei Hu 	if (pcb->rb_init)
1261a560f3ebSWei Hu 		advance =
1262a560f3ebSWei Hu 		    VMBUS_CHANPKT_GETLEN(pcb->hvs_pkt.chan_pkt_hdr.cph_tlen);
1263a560f3ebSWei Hu 	else
1264a560f3ebSWei Hu 		advance = 0;
1265a560f3ebSWei Hu 
1266a560f3ebSWei Hu 	bytes_canread = vmbus_chan_read_available(pcb->chan);
1267a560f3ebSWei Hu 
1268a560f3ebSWei Hu 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
1269a560f3ebSWei Hu 	    "%s: bytes_canread on br = %u, advance = %u\n",
1270a560f3ebSWei Hu 	    __func__, bytes_canread, advance);
1271a560f3ebSWei Hu 
1272a560f3ebSWei Hu 	if (pcb->rb_init && bytes_canread == (advance + sizeof(uint64_t))) {
1273a560f3ebSWei Hu 		/*
1274a560f3ebSWei Hu 		 * Nothing to read. Need to advance the rindex before
1275a560f3ebSWei Hu 		 * calling sbwait, so host knows to wake us up when data
1276a560f3ebSWei Hu 		 * is available to read on rb.
1277a560f3ebSWei Hu 		 */
1278a560f3ebSWei Hu 		error = vmbus_chan_recv_idxadv(pcb->chan, advance);
1279a560f3ebSWei Hu 		if (error) {
1280a560f3ebSWei Hu 			HVSOCK_DBG(HVSOCK_DBG_ERR,
1281a560f3ebSWei Hu 			    "%s: after calling vmbus_chan_recv_idxadv, "
1282a560f3ebSWei Hu 			    "got error = %d\n",  __func__, error);
1283a560f3ebSWei Hu 			return (0);
1284a560f3ebSWei Hu 		} else {
1285a560f3ebSWei Hu 			pcb->rb_init = false;
1286a560f3ebSWei Hu 			pcb->recv_data_len = 0;
1287a560f3ebSWei Hu 			pcb->recv_data_off = 0;
1288a560f3ebSWei Hu 			bytes_canread = vmbus_chan_read_available(pcb->chan);
1289a560f3ebSWei Hu 
1290a560f3ebSWei Hu 			HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
1291a560f3ebSWei Hu 			    "%s: advanced %u bytes, "
1292a560f3ebSWei Hu 			    " bytes_canread on br now = %u\n",
1293a560f3ebSWei Hu 			    __func__, advance, bytes_canread);
1294a560f3ebSWei Hu 
1295a560f3ebSWei Hu 			if (bytes_canread == 0)
1296a560f3ebSWei Hu 				return (0);
1297a560f3ebSWei Hu 			else
1298a560f3ebSWei Hu 				advance = 0;
1299a560f3ebSWei Hu 		}
1300a560f3ebSWei Hu 	}
1301a560f3ebSWei Hu 
1302a560f3ebSWei Hu 	if (bytes_canread <
1303a560f3ebSWei Hu 	    advance + (sizeof(struct hvs_pkt_header) + sizeof(uint64_t)))
1304a560f3ebSWei Hu 		return (0);
1305a560f3ebSWei Hu 
1306a560f3ebSWei Hu 	error = vmbus_chan_recv_peek(pcb->chan, &pcb->hvs_pkt,
1307a560f3ebSWei Hu 	    sizeof(struct hvs_pkt_header), advance);
1308a560f3ebSWei Hu 
1309a560f3ebSWei Hu 	/* Don't have anything to read */
1310a560f3ebSWei Hu 	if (error) {
1311a560f3ebSWei Hu 		HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
1312a560f3ebSWei Hu 		    "%s: after calling vmbus_chan_recv_peek, got error = %d\n",
1313a560f3ebSWei Hu 		    __func__, error);
1314a560f3ebSWei Hu 		return (0);
1315a560f3ebSWei Hu 	}
1316a560f3ebSWei Hu 
1317a560f3ebSWei Hu 	/*
1318a560f3ebSWei Hu 	 * We just read in a new packet header. Do some sanity checks.
1319a560f3ebSWei Hu 	 */
1320a560f3ebSWei Hu 	tlen = VMBUS_CHANPKT_GETLEN(pcb->hvs_pkt.chan_pkt_hdr.cph_tlen);
1321a560f3ebSWei Hu 	hlen = VMBUS_CHANPKT_GETLEN(pcb->hvs_pkt.chan_pkt_hdr.cph_hlen);
1322a560f3ebSWei Hu 	dlen = pcb->hvs_pkt.vmpipe_pkt_hdr.vmpipe_data_size;
1323a560f3ebSWei Hu 	if (__predict_false(hlen < sizeof(struct vmbus_chanpkt_hdr)) ||
1324a560f3ebSWei Hu 	    __predict_false(hlen > tlen) ||
1325a560f3ebSWei Hu 	    __predict_false(tlen < dlen + sizeof(struct hvs_pkt_header))) {
1326a560f3ebSWei Hu 		HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
1327a560f3ebSWei Hu 		    "invalid tlen(%u), hlen(%u) or dlen(%u)\n",
1328a560f3ebSWei Hu 		    tlen, hlen, dlen);
1329a560f3ebSWei Hu 		pcb->so->so_error = EIO;
1330a560f3ebSWei Hu 		return (0);
1331a560f3ebSWei Hu 	}
1332a560f3ebSWei Hu 	if (pcb->rb_init == false)
1333a560f3ebSWei Hu 		pcb->rb_init = true;
1334a560f3ebSWei Hu 
1335a560f3ebSWei Hu 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
1336a560f3ebSWei Hu 	    "Got new pkt tlen(%u), hlen(%u) or dlen(%u)\n",
1337a560f3ebSWei Hu 	    tlen, hlen, dlen);
1338a560f3ebSWei Hu 
1339a560f3ebSWei Hu 	/* The other side has sent a close FIN */
1340a560f3ebSWei Hu 	if (dlen == 0) {
1341a560f3ebSWei Hu 		HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
1342a560f3ebSWei Hu 		    "%s: Received FIN from other side\n", __func__);
1343a560f3ebSWei Hu 		/* inform the caller by seting so_error to ESHUTDOWN */
1344a560f3ebSWei Hu 		pcb->so->so_error = ESHUTDOWN;
1345a560f3ebSWei Hu 	}
1346a560f3ebSWei Hu 
1347a560f3ebSWei Hu 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
1348a560f3ebSWei Hu 	    "%s: canread on receive ring is %u \n", __func__, dlen);
1349a560f3ebSWei Hu 
1350a560f3ebSWei Hu 	pcb->recv_data_len = dlen;
1351a560f3ebSWei Hu 	pcb->recv_data_off = 0;
1352a560f3ebSWei Hu 
1353a560f3ebSWei Hu 	return (pcb->recv_data_len);
1354a560f3ebSWei Hu }
1355a560f3ebSWei Hu 
1356a560f3ebSWei Hu static uint32_t
hvsock_canwrite_check(struct hvs_pcb * pcb)1357a560f3ebSWei Hu hvsock_canwrite_check(struct hvs_pcb *pcb)
1358a560f3ebSWei Hu {
1359a560f3ebSWei Hu 	uint32_t writeable;
1360a560f3ebSWei Hu 	uint32_t ret;
1361a560f3ebSWei Hu 
1362a560f3ebSWei Hu 	if (pcb == NULL || pcb->chan == NULL)
1363a560f3ebSWei Hu 		return (0);
1364a560f3ebSWei Hu 
1365a560f3ebSWei Hu 	writeable = vmbus_chan_write_available(pcb->chan);
1366a560f3ebSWei Hu 
1367a560f3ebSWei Hu 	/*
1368a560f3ebSWei Hu 	 * We must always reserve a 0-length-payload packet for the FIN.
1369a560f3ebSWei Hu 	 */
1370a560f3ebSWei Hu 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
1371db7ec3c3SLi-Wen Hsu 	    "%s: writeable is %u, should be greater than %ju\n",
1372db7ec3c3SLi-Wen Hsu 	    __func__, writeable,
1373db7ec3c3SLi-Wen Hsu 	    (uintmax_t)(HVSOCK_PKT_LEN(1) + HVSOCK_PKT_LEN(0)));
1374a560f3ebSWei Hu 
1375a560f3ebSWei Hu 	if (writeable < HVSOCK_PKT_LEN(1) + HVSOCK_PKT_LEN(0)) {
1376a560f3ebSWei Hu 		/*
1377a560f3ebSWei Hu 		 * The Tx ring seems full.
1378a560f3ebSWei Hu 		 */
1379a560f3ebSWei Hu 		return (0);
1380a560f3ebSWei Hu 	}
1381a560f3ebSWei Hu 
1382a560f3ebSWei Hu 	ret = writeable - HVSOCK_PKT_LEN(0) - HVSOCK_PKT_LEN(0);
1383a560f3ebSWei Hu 
1384a560f3ebSWei Hu 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
1385a560f3ebSWei Hu 	    "%s: available size is %u\n", __func__, rounddown2(ret, 8));
1386a560f3ebSWei Hu 
1387a560f3ebSWei Hu 	return (rounddown2(ret, 8));
1388a560f3ebSWei Hu }
1389a560f3ebSWei Hu 
1390a560f3ebSWei Hu static void
hvsock_set_chan_pending_send_size(struct vmbus_channel * chan)1391a560f3ebSWei Hu hvsock_set_chan_pending_send_size(struct vmbus_channel *chan)
1392a560f3ebSWei Hu {
1393a560f3ebSWei Hu 	vmbus_chan_set_pending_send_size(chan,
1394a560f3ebSWei Hu 	    HVSOCK_PKT_LEN(HVSOCK_SEND_BUF_SZ));
1395a560f3ebSWei Hu }
1396a560f3ebSWei Hu 
1397a560f3ebSWei Hu static int
hvsock_open_channel(struct vmbus_channel * chan,struct socket * so)1398a560f3ebSWei Hu hvsock_open_channel(struct vmbus_channel *chan, struct socket *so)
1399a560f3ebSWei Hu {
1400a560f3ebSWei Hu 	unsigned int rcvbuf, sndbuf;
1401a560f3ebSWei Hu 	struct hvs_pcb *pcb = so2hvspcb(so);
1402a560f3ebSWei Hu 	int ret;
1403a560f3ebSWei Hu 
1404a560f3ebSWei Hu 	if (vmbus_current_version < VMBUS_VERSION_WIN10_V5) {
1405a560f3ebSWei Hu 		sndbuf = HVS_RINGBUF_SND_SIZE;
1406a560f3ebSWei Hu 		rcvbuf = HVS_RINGBUF_RCV_SIZE;
1407a560f3ebSWei Hu 	} else {
1408a560f3ebSWei Hu 		sndbuf = MAX(so->so_snd.sb_hiwat, HVS_RINGBUF_SND_SIZE);
1409a560f3ebSWei Hu 		sndbuf = MIN(sndbuf, HVS_RINGBUF_MAX_SIZE);
1410a560f3ebSWei Hu 		sndbuf = rounddown2(sndbuf, PAGE_SIZE);
1411a560f3ebSWei Hu 		rcvbuf = MAX(so->so_rcv.sb_hiwat, HVS_RINGBUF_RCV_SIZE);
1412a560f3ebSWei Hu 		rcvbuf = MIN(rcvbuf, HVS_RINGBUF_MAX_SIZE);
1413a560f3ebSWei Hu 		rcvbuf = rounddown2(rcvbuf, PAGE_SIZE);
1414a560f3ebSWei Hu 	}
1415a560f3ebSWei Hu 
1416a560f3ebSWei Hu 	/*
1417a560f3ebSWei Hu 	 * Can only read whatever user provided size of data
1418a560f3ebSWei Hu 	 * from ring buffer. Turn off batched reading.
1419a560f3ebSWei Hu 	 */
1420a560f3ebSWei Hu 	vmbus_chan_set_readbatch(chan, false);
1421a560f3ebSWei Hu 
1422a560f3ebSWei Hu 	ret = vmbus_chan_open(chan, sndbuf, rcvbuf, NULL, 0,
1423a560f3ebSWei Hu 	    hvsock_chan_cb, pcb);
1424a560f3ebSWei Hu 
1425a560f3ebSWei Hu 	if (ret != 0) {
1426a560f3ebSWei Hu 		HVSOCK_DBG(HVSOCK_DBG_ERR,
1427a560f3ebSWei Hu 		    "%s: failed to open hvsock channel, sndbuf = %u, "
1428a560f3ebSWei Hu 		    "rcvbuf = %u\n", __func__, sndbuf, rcvbuf);
1429a560f3ebSWei Hu 	} else {
1430a560f3ebSWei Hu 		HVSOCK_DBG(HVSOCK_DBG_INFO,
1431a560f3ebSWei Hu 		    "%s: hvsock channel opened, sndbuf = %u, i"
1432a560f3ebSWei Hu 		    "rcvbuf = %u\n", __func__, sndbuf, rcvbuf);
1433a560f3ebSWei Hu 		/*
1434a560f3ebSWei Hu 		 * Se the pending send size so to receive wakeup
1435a560f3ebSWei Hu 		 * signals from host when there is enough space on
1436a560f3ebSWei Hu 		 * rx buffer ring to write.
1437a560f3ebSWei Hu 		 */
1438a560f3ebSWei Hu 		hvsock_set_chan_pending_send_size(chan);
1439a560f3ebSWei Hu 	}
1440a560f3ebSWei Hu 
1441a560f3ebSWei Hu 	return ret;
1442a560f3ebSWei Hu }
1443a560f3ebSWei Hu 
1444a560f3ebSWei Hu /*
1445a560f3ebSWei Hu  * Guest is listening passively on the socket. Open channel and
1446a560f3ebSWei Hu  * create a new socket for the conneciton.
1447a560f3ebSWei Hu  */
1448a560f3ebSWei Hu static void
hvsock_open_conn_passive(struct vmbus_channel * chan,struct socket * so,struct hvsock_sc * sc)1449a560f3ebSWei Hu hvsock_open_conn_passive(struct vmbus_channel *chan, struct socket *so,
1450a560f3ebSWei Hu     struct hvsock_sc *sc)
1451a560f3ebSWei Hu {
1452a560f3ebSWei Hu 	struct socket *new_so;
1453a560f3ebSWei Hu 	struct hvs_pcb *new_pcb, *pcb;
1454a560f3ebSWei Hu 	int error;
1455a560f3ebSWei Hu 
1456a560f3ebSWei Hu 	/* Do nothing if socket is not listening */
1457f4bb1869SMark Johnston 	if (!SOLISTENING(so)) {
1458a560f3ebSWei Hu 		HVSOCK_DBG(HVSOCK_DBG_ERR,
1459a560f3ebSWei Hu 		    "%s: socket is not a listening one\n", __func__);
1460a560f3ebSWei Hu 		return;
1461a560f3ebSWei Hu 	}
1462a560f3ebSWei Hu 
1463a560f3ebSWei Hu 	/*
1464*a0da2f73SDag-Erling Smørgrav 	 * Create a new socket. This will call pr_attach() to complete
1465a560f3ebSWei Hu 	 * the socket initialization and put the new socket onto
1466a560f3ebSWei Hu 	 * listening socket's sol_incomp list, waiting to be promoted
1467a560f3ebSWei Hu 	 * to sol_comp list.
1468a560f3ebSWei Hu 	 * The new socket created has ref count 0. There is no other
1469a560f3ebSWei Hu 	 * thread that changes the state of this new one at the
1470a560f3ebSWei Hu 	 * moment, so we don't need to hold its lock while opening
1471a560f3ebSWei Hu 	 * channel and filling out its pcb information.
1472a560f3ebSWei Hu 	 */
1473a560f3ebSWei Hu 	new_so = sonewconn(so, 0);
1474a560f3ebSWei Hu 	if (!new_so)
1475a560f3ebSWei Hu 		HVSOCK_DBG(HVSOCK_DBG_ERR,
1476a560f3ebSWei Hu 		    "%s: creating new socket failed\n", __func__);
1477a560f3ebSWei Hu 
1478a560f3ebSWei Hu 	/*
1479a560f3ebSWei Hu 	 * Now open the vmbus channel. If it fails, the socket will be
1480a560f3ebSWei Hu 	 * on the listening socket's sol_incomp queue until it is
1481a560f3ebSWei Hu 	 * replaced and aborted.
1482a560f3ebSWei Hu 	 */
1483a560f3ebSWei Hu 	error = hvsock_open_channel(chan, new_so);
1484a560f3ebSWei Hu 	if (error) {
1485a560f3ebSWei Hu 		new_so->so_error = error;
1486a560f3ebSWei Hu 		return;
1487a560f3ebSWei Hu 	}
1488a560f3ebSWei Hu 
1489a560f3ebSWei Hu 	pcb = so->so_pcb;
1490a560f3ebSWei Hu 	new_pcb = new_so->so_pcb;
1491a560f3ebSWei Hu 
1492a560f3ebSWei Hu 	hvs_addr_set(&(new_pcb->local_addr), pcb->local_addr.hvs_port);
1493a560f3ebSWei Hu 	/* Remote port is unknown to guest in this type of conneciton */
1494a560f3ebSWei Hu 	hvs_addr_set(&(new_pcb->remote_addr), HVADDR_PORT_UNKNOWN);
1495a560f3ebSWei Hu 	new_pcb->chan = chan;
1496a560f3ebSWei Hu 	new_pcb->recv_data_len = 0;
1497a560f3ebSWei Hu 	new_pcb->recv_data_off = 0;
1498a560f3ebSWei Hu 	new_pcb->rb_init = false;
1499a560f3ebSWei Hu 
1500a560f3ebSWei Hu 	new_pcb->vm_srv_id = *vmbus_chan_guid_type(chan);
1501a560f3ebSWei Hu 	new_pcb->host_srv_id = *vmbus_chan_guid_inst(chan);
1502a560f3ebSWei Hu 
1503a560f3ebSWei Hu 	hvs_insert_socket_on_list(new_so, HVS_LIST_CONNECTED);
1504a560f3ebSWei Hu 
1505a560f3ebSWei Hu 	sc->pcb = new_pcb;
1506a560f3ebSWei Hu 
1507a560f3ebSWei Hu 	/*
1508a560f3ebSWei Hu 	 * Change the socket state to SS_ISCONNECTED. This will promote
1509a560f3ebSWei Hu 	 * the socket to sol_comp queue and wake up the thread which
1510a560f3ebSWei Hu 	 * is accepting connection.
1511a560f3ebSWei Hu 	 */
1512a560f3ebSWei Hu 	soisconnected(new_so);
1513a560f3ebSWei Hu }
1514a560f3ebSWei Hu 
1515a560f3ebSWei Hu 
1516a560f3ebSWei Hu /*
1517a560f3ebSWei Hu  * Guest is actively connecting to host.
1518a560f3ebSWei Hu  */
1519a560f3ebSWei Hu static void
hvsock_open_conn_active(struct vmbus_channel * chan,struct socket * so)1520a560f3ebSWei Hu hvsock_open_conn_active(struct vmbus_channel *chan, struct socket *so)
1521a560f3ebSWei Hu {
1522a560f3ebSWei Hu 	struct hvs_pcb *pcb;
1523a560f3ebSWei Hu 	int error;
1524a560f3ebSWei Hu 
1525a560f3ebSWei Hu 	error = hvsock_open_channel(chan, so);
1526a560f3ebSWei Hu 	if (error) {
1527a560f3ebSWei Hu 		so->so_error = error;
1528a560f3ebSWei Hu 		return;
1529a560f3ebSWei Hu 	}
1530a560f3ebSWei Hu 
1531a560f3ebSWei Hu 	pcb = so->so_pcb;
1532a560f3ebSWei Hu 	pcb->chan = chan;
1533a560f3ebSWei Hu 	pcb->recv_data_len = 0;
1534a560f3ebSWei Hu 	pcb->recv_data_off = 0;
1535a560f3ebSWei Hu 	pcb->rb_init = false;
1536a560f3ebSWei Hu 
1537a560f3ebSWei Hu 	mtx_lock(&hvs_trans_socks_mtx);
1538a560f3ebSWei Hu 	__hvs_remove_socket_from_list(so, HVS_LIST_BOUND);
1539a560f3ebSWei Hu 	__hvs_insert_socket_on_list(so, HVS_LIST_CONNECTED);
1540a560f3ebSWei Hu 	mtx_unlock(&hvs_trans_socks_mtx);
1541a560f3ebSWei Hu 
1542a560f3ebSWei Hu 	/*
1543a560f3ebSWei Hu 	 * Change the socket state to SS_ISCONNECTED. This will wake up
1544a560f3ebSWei Hu 	 * the thread sleeping in connect call.
1545a560f3ebSWei Hu 	 */
1546a560f3ebSWei Hu 	soisconnected(so);
1547a560f3ebSWei Hu }
1548a560f3ebSWei Hu 
1549a560f3ebSWei Hu static void
hvsock_open_connection(struct vmbus_channel * chan,struct hvsock_sc * sc)1550a560f3ebSWei Hu hvsock_open_connection(struct vmbus_channel *chan, struct hvsock_sc *sc)
1551a560f3ebSWei Hu {
1552a560f3ebSWei Hu 	struct hyperv_guid *inst_guid, *type_guid;
1553a560f3ebSWei Hu 	bool conn_from_host;
1554a560f3ebSWei Hu 	struct sockaddr_hvs addr;
1555a560f3ebSWei Hu 	struct socket *so;
1556a560f3ebSWei Hu 	struct hvs_pcb *pcb;
1557a560f3ebSWei Hu 
1558a560f3ebSWei Hu 	type_guid = (struct hyperv_guid *) vmbus_chan_guid_type(chan);
1559a560f3ebSWei Hu 	inst_guid = (struct hyperv_guid *) vmbus_chan_guid_inst(chan);
1560a560f3ebSWei Hu 	conn_from_host = vmbus_chan_is_hvs_conn_from_host(chan);
1561a560f3ebSWei Hu 
1562a560f3ebSWei Hu 	HVSOCK_DBG(HVSOCK_DBG_INFO, "type_guid is ");
1563a560f3ebSWei Hu 	hvsock_print_guid(type_guid);
1564a560f3ebSWei Hu 	HVSOCK_DBG(HVSOCK_DBG_INFO, "inst_guid is ");
1565a560f3ebSWei Hu 	hvsock_print_guid(inst_guid);
1566a560f3ebSWei Hu 	HVSOCK_DBG(HVSOCK_DBG_INFO, "connection %s host\n",
1567a560f3ebSWei Hu 	    (conn_from_host == true ) ? "from" : "to");
1568a560f3ebSWei Hu 
1569a560f3ebSWei Hu 	/*
1570a560f3ebSWei Hu 	 * The listening port should be in [0, MAX_LISTEN_PORT]
1571a560f3ebSWei Hu 	 */
1572a560f3ebSWei Hu 	if (!is_valid_srv_id(type_guid))
1573a560f3ebSWei Hu 		return;
1574a560f3ebSWei Hu 
1575a560f3ebSWei Hu 	/*
1576a560f3ebSWei Hu 	 * There should be a bound socket already created no matter
1577a560f3ebSWei Hu 	 * it is a passive or active connection.
1578a560f3ebSWei Hu 	 * For host initiated connection (passive on guest side),
1579a560f3ebSWei Hu 	 * the  type_guid contains the port which guest is bound and
1580a560f3ebSWei Hu 	 * listening.
1581a560f3ebSWei Hu 	 * For the guest initiated connection (active on guest side),
1582a560f3ebSWei Hu 	 * the inst_guid contains the port that guest has auto bound
1583a560f3ebSWei Hu 	 * to.
1584a560f3ebSWei Hu 	 */
1585a560f3ebSWei Hu 	hvs_addr_init(&addr, conn_from_host ? type_guid : inst_guid);
1586a560f3ebSWei Hu 	so = hvs_find_socket_on_list(&addr, HVS_LIST_BOUND);
1587a560f3ebSWei Hu 	if (!so) {
1588a560f3ebSWei Hu 		HVSOCK_DBG(HVSOCK_DBG_ERR,
1589a560f3ebSWei Hu 		    "%s: no bound socket found for port %u\n",
1590a560f3ebSWei Hu 		    __func__, addr.hvs_port);
1591a560f3ebSWei Hu 		return;
1592a560f3ebSWei Hu 	}
1593a560f3ebSWei Hu 
1594a560f3ebSWei Hu 	if (conn_from_host) {
1595a560f3ebSWei Hu 		hvsock_open_conn_passive(chan, so, sc);
1596a560f3ebSWei Hu 	} else {
1597a560f3ebSWei Hu 		(void) hvs_trans_lock();
1598a560f3ebSWei Hu 		pcb = so->so_pcb;
1599a560f3ebSWei Hu 		if (pcb && pcb->so) {
1600a560f3ebSWei Hu 			sc->pcb = so2hvspcb(so);
1601a560f3ebSWei Hu 			hvsock_open_conn_active(chan, so);
1602a560f3ebSWei Hu 		} else {
1603a560f3ebSWei Hu 			HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
1604a560f3ebSWei Hu 			    "%s: channel detached before open\n", __func__);
1605a560f3ebSWei Hu 		}
1606a560f3ebSWei Hu 		hvs_trans_unlock();
1607a560f3ebSWei Hu 	}
1608a560f3ebSWei Hu 
1609a560f3ebSWei Hu }
1610a560f3ebSWei Hu 
1611a560f3ebSWei Hu static int
hvsock_probe(device_t dev)1612a560f3ebSWei Hu hvsock_probe(device_t dev)
1613a560f3ebSWei Hu {
1614a560f3ebSWei Hu 	struct vmbus_channel *channel = vmbus_get_channel(dev);
1615a560f3ebSWei Hu 
1616a560f3ebSWei Hu 	if (!channel || !vmbus_chan_is_hvs(channel)) {
1617a560f3ebSWei Hu 		HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
1618a560f3ebSWei Hu 		    "hvsock_probe called but not a hvsock channel id %u\n",
1619a560f3ebSWei Hu 		    vmbus_chan_id(channel));
1620a560f3ebSWei Hu 
1621a560f3ebSWei Hu 		return ENXIO;
1622a560f3ebSWei Hu 	} else {
1623a560f3ebSWei Hu 		HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
1624a560f3ebSWei Hu 		    "hvsock_probe got a hvsock channel id %u\n",
1625a560f3ebSWei Hu 		    vmbus_chan_id(channel));
1626a560f3ebSWei Hu 
1627a560f3ebSWei Hu 		return BUS_PROBE_DEFAULT;
1628a560f3ebSWei Hu 	}
1629a560f3ebSWei Hu }
1630a560f3ebSWei Hu 
1631a560f3ebSWei Hu static int
hvsock_attach(device_t dev)1632a560f3ebSWei Hu hvsock_attach(device_t dev)
1633a560f3ebSWei Hu {
1634a560f3ebSWei Hu 	struct vmbus_channel *channel = vmbus_get_channel(dev);
1635a560f3ebSWei Hu 	struct hvsock_sc *sc = (struct hvsock_sc *)device_get_softc(dev);
1636a560f3ebSWei Hu 
1637a560f3ebSWei Hu 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "hvsock_attach called.\n");
1638a560f3ebSWei Hu 
1639a560f3ebSWei Hu 	hvsock_open_connection(channel, sc);
1640a560f3ebSWei Hu 
1641a560f3ebSWei Hu 	/*
1642a560f3ebSWei Hu 	 * Always return success. On error the host will rescind the device
1643a560f3ebSWei Hu 	 * in 30 seconds and we can do cleanup at that time in
1644a560f3ebSWei Hu 	 * vmbus_chan_msgproc_chrescind().
1645a560f3ebSWei Hu 	 */
1646a560f3ebSWei Hu 	return (0);
1647a560f3ebSWei Hu }
1648a560f3ebSWei Hu 
1649a560f3ebSWei Hu static int
hvsock_detach(device_t dev)1650a560f3ebSWei Hu hvsock_detach(device_t dev)
1651a560f3ebSWei Hu {
1652a560f3ebSWei Hu 	struct hvsock_sc *sc = (struct hvsock_sc *)device_get_softc(dev);
1653a560f3ebSWei Hu 	struct socket *so;
1654f94acf52SMark Johnston 	int retry;
1655a560f3ebSWei Hu 
1656a560f3ebSWei Hu 	if (bootverbose)
1657a560f3ebSWei Hu 		device_printf(dev, "hvsock_detach called.\n");
1658a560f3ebSWei Hu 
1659a560f3ebSWei Hu 	HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "hvsock_detach called.\n");
1660a560f3ebSWei Hu 
1661a560f3ebSWei Hu 	if (sc->pcb != NULL) {
1662a560f3ebSWei Hu 		(void) hvs_trans_lock();
1663a560f3ebSWei Hu 
1664a560f3ebSWei Hu 		so = hsvpcb2so(sc->pcb);
1665a560f3ebSWei Hu 		if (so) {
1666a560f3ebSWei Hu 			/* Close the connection */
1667a560f3ebSWei Hu 			if (so->so_state &
1668a560f3ebSWei Hu 			    (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING))
1669a560f3ebSWei Hu 				soisdisconnected(so);
1670a560f3ebSWei Hu 		}
1671a560f3ebSWei Hu 
1672a560f3ebSWei Hu 		mtx_lock(&hvs_trans_socks_mtx);
1673a560f3ebSWei Hu 		__hvs_remove_pcb_from_list(sc->pcb,
1674a560f3ebSWei Hu 		    HVS_LIST_BOUND | HVS_LIST_CONNECTED);
1675a560f3ebSWei Hu 		mtx_unlock(&hvs_trans_socks_mtx);
1676a560f3ebSWei Hu 
1677a560f3ebSWei Hu 		/*
1678a560f3ebSWei Hu 		 * Close channel while no reader and sender are working
1679a560f3ebSWei Hu 		 * on the buffer rings.
1680a560f3ebSWei Hu 		 */
1681a560f3ebSWei Hu 		if (so) {
1682a560f3ebSWei Hu 			retry = 0;
1683f94acf52SMark Johnston 			while (SOCK_IO_RECV_LOCK(so, 0) == EWOULDBLOCK) {
1684a560f3ebSWei Hu 				/*
1685a560f3ebSWei Hu 				 * Someone is reading, rx br is busy
1686a560f3ebSWei Hu 				 */
1687a560f3ebSWei Hu 				soisdisconnected(so);
1688a560f3ebSWei Hu 				DELAY(500);
1689a560f3ebSWei Hu 				HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
1690a560f3ebSWei Hu 				    "waiting for rx reader to exit, "
1691a560f3ebSWei Hu 				    "retry = %d\n", retry++);
1692a560f3ebSWei Hu 			}
1693a560f3ebSWei Hu 			retry = 0;
1694f94acf52SMark Johnston 			while (SOCK_IO_SEND_LOCK(so, 0) == EWOULDBLOCK) {
1695a560f3ebSWei Hu 				/*
1696a560f3ebSWei Hu 				 * Someone is sending, tx br is busy
1697a560f3ebSWei Hu 				 */
1698a560f3ebSWei Hu 				soisdisconnected(so);
1699a560f3ebSWei Hu 				DELAY(500);
1700a560f3ebSWei Hu 				HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
1701a560f3ebSWei Hu 				    "waiting for tx sender to exit, "
1702a560f3ebSWei Hu 				    "retry = %d\n", retry++);
1703a560f3ebSWei Hu 			}
1704a560f3ebSWei Hu 		}
1705a560f3ebSWei Hu 
1706a560f3ebSWei Hu 
1707a560f3ebSWei Hu 		bzero(sc->pcb, sizeof(struct hvs_pcb));
1708a560f3ebSWei Hu 		free(sc->pcb, M_HVSOCK);
1709a560f3ebSWei Hu 		sc->pcb = NULL;
1710a560f3ebSWei Hu 
1711a560f3ebSWei Hu 		if (so) {
1712f94acf52SMark Johnston 			SOCK_IO_RECV_UNLOCK(so);
1713f94acf52SMark Johnston 			SOCK_IO_SEND_UNLOCK(so);
1714a560f3ebSWei Hu 			so->so_pcb = NULL;
1715a560f3ebSWei Hu 		}
1716a560f3ebSWei Hu 
1717a560f3ebSWei Hu 		hvs_trans_unlock();
1718a560f3ebSWei Hu 	}
1719a560f3ebSWei Hu 
1720a560f3ebSWei Hu 	vmbus_chan_close(vmbus_get_channel(dev));
1721a560f3ebSWei Hu 
1722a560f3ebSWei Hu 	return (0);
1723a560f3ebSWei Hu }
1724a560f3ebSWei Hu 
1725a560f3ebSWei Hu static device_method_t hvsock_methods[] = {
1726a560f3ebSWei Hu 	/* Device interface */
1727a560f3ebSWei Hu 	DEVMETHOD(device_probe, hvsock_probe),
1728a560f3ebSWei Hu 	DEVMETHOD(device_attach, hvsock_attach),
1729a560f3ebSWei Hu 	DEVMETHOD(device_detach, hvsock_detach),
1730a560f3ebSWei Hu 	DEVMETHOD_END
1731a560f3ebSWei Hu };
1732a560f3ebSWei Hu 
1733a560f3ebSWei Hu static driver_t hvsock_driver = {
1734a560f3ebSWei Hu 	"hv_sock",
1735a560f3ebSWei Hu 	hvsock_methods,
1736a560f3ebSWei Hu 	sizeof(struct hvsock_sc)
1737a560f3ebSWei Hu };
1738a560f3ebSWei Hu 
1739c1cef544SJohn Baldwin DRIVER_MODULE(hvsock, vmbus, hvsock_driver, NULL, NULL);
1740a560f3ebSWei Hu MODULE_VERSION(hvsock, 1);
1741a560f3ebSWei Hu MODULE_DEPEND(hvsock, vmbus, 1, 1, 1);
1742