1a560f3ebSWei Hu /*-
24d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause
3a560f3ebSWei Hu *
4a560f3ebSWei Hu * Copyright (c) 2020 Microsoft Corp.
5a560f3ebSWei Hu * All rights reserved.
6a560f3ebSWei Hu *
7a560f3ebSWei Hu * Redistribution and use in source and binary forms, with or without
8a560f3ebSWei Hu * modification, are permitted provided that the following conditions
9a560f3ebSWei Hu * are met:
10a560f3ebSWei Hu * 1. Redistributions of source code must retain the above copyright
11a560f3ebSWei Hu * notice unmodified, this list of conditions, and the following
12a560f3ebSWei Hu * disclaimer.
13a560f3ebSWei Hu * 2. Redistributions in binary form must reproduce the above copyright
14a560f3ebSWei Hu * notice, this list of conditions and the following disclaimer in the
15a560f3ebSWei Hu * documentation and/or other materials provided with the distribution.
16a560f3ebSWei Hu *
17a560f3ebSWei Hu * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18a560f3ebSWei Hu * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19a560f3ebSWei Hu * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20a560f3ebSWei Hu * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21a560f3ebSWei Hu * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22a560f3ebSWei Hu * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23a560f3ebSWei Hu * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24a560f3ebSWei Hu * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25a560f3ebSWei Hu * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26a560f3ebSWei Hu * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27a560f3ebSWei Hu */
28a560f3ebSWei Hu
29a560f3ebSWei Hu #include <sys/param.h>
30a560f3ebSWei Hu #include <sys/bus.h>
31a560f3ebSWei Hu #include <sys/domain.h>
32a560f3ebSWei Hu #include <sys/lock.h>
33a560f3ebSWei Hu #include <sys/kernel.h>
34a560f3ebSWei Hu #include <sys/types.h>
35a560f3ebSWei Hu #include <sys/malloc.h>
36a560f3ebSWei Hu #include <sys/module.h>
37a560f3ebSWei Hu #include <sys/mutex.h>
38a560f3ebSWei Hu #include <sys/proc.h>
39a560f3ebSWei Hu #include <sys/protosw.h>
40a560f3ebSWei Hu #include <sys/socket.h>
41a560f3ebSWei Hu #include <sys/sysctl.h>
42a560f3ebSWei Hu #include <sys/sysproto.h>
43a560f3ebSWei Hu #include <sys/systm.h>
44a560f3ebSWei Hu #include <sys/sockbuf.h>
45a560f3ebSWei Hu #include <sys/sx.h>
46a560f3ebSWei Hu #include <sys/uio.h>
47a560f3ebSWei Hu
48a560f3ebSWei Hu #include <net/vnet.h>
49a560f3ebSWei Hu
50a560f3ebSWei Hu #include <dev/hyperv/vmbus/vmbus_reg.h>
51a560f3ebSWei Hu
52a560f3ebSWei Hu #include "hv_sock.h"
53a560f3ebSWei Hu
54a560f3ebSWei Hu #define HVSOCK_DBG_NONE 0x0
55a560f3ebSWei Hu #define HVSOCK_DBG_INFO 0x1
56a560f3ebSWei Hu #define HVSOCK_DBG_ERR 0x2
57a560f3ebSWei Hu #define HVSOCK_DBG_VERBOSE 0x3
58a560f3ebSWei Hu
59a560f3ebSWei Hu
60a560f3ebSWei Hu SYSCTL_NODE(_net, OID_AUTO, hvsock, CTLFLAG_RD, 0, "HyperV socket");
61a560f3ebSWei Hu
62a560f3ebSWei Hu static int hvs_dbg_level;
63a560f3ebSWei Hu SYSCTL_INT(_net_hvsock, OID_AUTO, hvs_dbg_level, CTLFLAG_RWTUN, &hvs_dbg_level,
64a560f3ebSWei Hu 0, "hyperv socket debug level: 0 = none, 1 = info, 2 = error, 3 = verbose");
65a560f3ebSWei Hu
66a560f3ebSWei Hu
67a560f3ebSWei Hu #define HVSOCK_DBG(level, ...) do { \
68a560f3ebSWei Hu if (hvs_dbg_level >= (level)) \
69a560f3ebSWei Hu printf(__VA_ARGS__); \
70a560f3ebSWei Hu } while (0)
71a560f3ebSWei Hu
72a560f3ebSWei Hu MALLOC_DEFINE(M_HVSOCK, "hyperv_socket", "hyperv socket control structures");
73a560f3ebSWei Hu
74625932c9SKyle Evans static int hvs_dom_probe(void);
75625932c9SKyle Evans
76a560f3ebSWei Hu /* The MTU is 16KB per host side's design */
77a560f3ebSWei Hu #define HVSOCK_MTU_SIZE (1024 * 16)
78a560f3ebSWei Hu #define HVSOCK_SEND_BUF_SZ (PAGE_SIZE - sizeof(struct vmpipe_proto_header))
79a560f3ebSWei Hu
80a560f3ebSWei Hu #define HVSOCK_HEADER_LEN (sizeof(struct hvs_pkt_header))
81a560f3ebSWei Hu
82a560f3ebSWei Hu #define HVSOCK_PKT_LEN(payload_len) (HVSOCK_HEADER_LEN + \
83a560f3ebSWei Hu roundup2(payload_len, 8) + \
84a560f3ebSWei Hu sizeof(uint64_t))
85a560f3ebSWei Hu
86a560f3ebSWei Hu /*
87a560f3ebSWei Hu * HyperV Transport sockets
88a560f3ebSWei Hu */
89e7d02be1SGleb Smirnoff static struct protosw hv_socket_protosw = {
90a560f3ebSWei Hu .pr_type = SOCK_STREAM,
91a560f3ebSWei Hu .pr_protocol = HYPERV_SOCK_PROTO_TRANS,
92a560f3ebSWei Hu .pr_flags = PR_CONNREQUIRED,
93e7d02be1SGleb Smirnoff .pr_attach = hvs_trans_attach,
94e7d02be1SGleb Smirnoff .pr_bind = hvs_trans_bind,
95e7d02be1SGleb Smirnoff .pr_listen = hvs_trans_listen,
96e7d02be1SGleb Smirnoff .pr_accept = hvs_trans_accept,
97e7d02be1SGleb Smirnoff .pr_connect = hvs_trans_connect,
98e7d02be1SGleb Smirnoff .pr_peeraddr = hvs_trans_peeraddr,
99e7d02be1SGleb Smirnoff .pr_sockaddr = hvs_trans_sockaddr,
100e7d02be1SGleb Smirnoff .pr_soreceive = hvs_trans_soreceive,
101e7d02be1SGleb Smirnoff .pr_sosend = hvs_trans_sosend,
102e7d02be1SGleb Smirnoff .pr_disconnect = hvs_trans_disconnect,
103e7d02be1SGleb Smirnoff .pr_close = hvs_trans_close,
104e7d02be1SGleb Smirnoff .pr_detach = hvs_trans_detach,
105e7d02be1SGleb Smirnoff .pr_shutdown = hvs_trans_shutdown,
106e7d02be1SGleb Smirnoff .pr_abort = hvs_trans_abort,
107a560f3ebSWei Hu };
108a560f3ebSWei Hu
109a560f3ebSWei Hu static struct domain hv_socket_domain = {
110a560f3ebSWei Hu .dom_family = AF_HYPERV,
111a560f3ebSWei Hu .dom_name = "hyperv",
112625932c9SKyle Evans .dom_probe = hvs_dom_probe,
113e7d02be1SGleb Smirnoff .dom_nprotosw = 1,
114e7d02be1SGleb Smirnoff .dom_protosw = { &hv_socket_protosw },
115a560f3ebSWei Hu };
116a560f3ebSWei Hu
117644ca084SGleb Smirnoff DOMAIN_SET(hv_socket_);
118a560f3ebSWei Hu
119a560f3ebSWei Hu #define MAX_PORT ((uint32_t)0xFFFFFFFF)
120a560f3ebSWei Hu #define MIN_PORT ((uint32_t)0x0)
121a560f3ebSWei Hu
122a560f3ebSWei Hu /* 00000000-facb-11e6-bd58-64006a7986d3 */
123a560f3ebSWei Hu static const struct hyperv_guid srv_id_template = {
124a560f3ebSWei Hu .hv_guid = {
125a560f3ebSWei Hu 0x00, 0x00, 0x00, 0x00, 0xcb, 0xfa, 0xe6, 0x11,
126a560f3ebSWei Hu 0xbd, 0x58, 0x64, 0x00, 0x6a, 0x79, 0x86, 0xd3 }
127a560f3ebSWei Hu };
128a560f3ebSWei Hu
129a560f3ebSWei Hu static int hvsock_br_callback(void *, int, void *);
130a560f3ebSWei Hu static uint32_t hvsock_canread_check(struct hvs_pcb *);
131a560f3ebSWei Hu static uint32_t hvsock_canwrite_check(struct hvs_pcb *);
132a560f3ebSWei Hu static int hvsock_send_data(struct vmbus_channel *chan,
133a560f3ebSWei Hu struct uio *uio, uint32_t to_write, struct sockbuf *sb);
134a560f3ebSWei Hu
135a560f3ebSWei Hu
136a560f3ebSWei Hu
137a560f3ebSWei Hu /* Globals */
138a560f3ebSWei Hu static struct sx hvs_trans_socks_sx;
139a560f3ebSWei Hu static struct mtx hvs_trans_socks_mtx;
140a560f3ebSWei Hu static LIST_HEAD(, hvs_pcb) hvs_trans_bound_socks;
141a560f3ebSWei Hu static LIST_HEAD(, hvs_pcb) hvs_trans_connected_socks;
142a560f3ebSWei Hu static uint32_t previous_auto_bound_port;
143a560f3ebSWei Hu
144a560f3ebSWei Hu static void
hvsock_print_guid(struct hyperv_guid * guid)145a560f3ebSWei Hu hvsock_print_guid(struct hyperv_guid *guid)
146a560f3ebSWei Hu {
147a560f3ebSWei Hu unsigned char *p = (unsigned char *)guid;
148a560f3ebSWei Hu
149a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_INFO,
150a560f3ebSWei Hu "0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x\n",
151a560f3ebSWei Hu *(unsigned int *)p,
152a560f3ebSWei Hu *((unsigned short *) &p[4]),
153a560f3ebSWei Hu *((unsigned short *) &p[6]),
154a560f3ebSWei Hu p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]);
155a560f3ebSWei Hu }
156a560f3ebSWei Hu
157a560f3ebSWei Hu static bool
is_valid_srv_id(const struct hyperv_guid * id)158a560f3ebSWei Hu is_valid_srv_id(const struct hyperv_guid *id)
159a560f3ebSWei Hu {
160a560f3ebSWei Hu return !memcmp(&id->hv_guid[4],
161a560f3ebSWei Hu &srv_id_template.hv_guid[4], sizeof(struct hyperv_guid) - 4);
162a560f3ebSWei Hu }
163a560f3ebSWei Hu
164a560f3ebSWei Hu static unsigned int
get_port_by_srv_id(const struct hyperv_guid * srv_id)165a560f3ebSWei Hu get_port_by_srv_id(const struct hyperv_guid *srv_id)
166a560f3ebSWei Hu {
167a560f3ebSWei Hu return *((const unsigned int *)srv_id);
168a560f3ebSWei Hu }
169a560f3ebSWei Hu
170a560f3ebSWei Hu static void
set_port_by_srv_id(struct hyperv_guid * srv_id,unsigned int port)171a560f3ebSWei Hu set_port_by_srv_id(struct hyperv_guid *srv_id, unsigned int port)
172a560f3ebSWei Hu {
173a560f3ebSWei Hu *((unsigned int *)srv_id) = port;
174a560f3ebSWei Hu }
175a560f3ebSWei Hu
176a560f3ebSWei Hu
177a560f3ebSWei Hu static void
__hvs_remove_pcb_from_list(struct hvs_pcb * pcb,unsigned char list)178a560f3ebSWei Hu __hvs_remove_pcb_from_list(struct hvs_pcb *pcb, unsigned char list)
179a560f3ebSWei Hu {
180a560f3ebSWei Hu struct hvs_pcb *p = NULL;
181a560f3ebSWei Hu
182a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: pcb is %p\n", __func__, pcb);
183a560f3ebSWei Hu
184a560f3ebSWei Hu if (!pcb)
185a560f3ebSWei Hu return;
186a560f3ebSWei Hu
187a560f3ebSWei Hu if (list & HVS_LIST_BOUND) {
188a560f3ebSWei Hu LIST_FOREACH(p, &hvs_trans_bound_socks, bound_next)
189a560f3ebSWei Hu if (p == pcb)
190a560f3ebSWei Hu LIST_REMOVE(p, bound_next);
191a560f3ebSWei Hu }
192a560f3ebSWei Hu
193a560f3ebSWei Hu if (list & HVS_LIST_CONNECTED) {
194a560f3ebSWei Hu LIST_FOREACH(p, &hvs_trans_connected_socks, connected_next)
195a560f3ebSWei Hu if (p == pcb)
196a560f3ebSWei Hu LIST_REMOVE(pcb, connected_next);
197a560f3ebSWei Hu }
198a560f3ebSWei Hu }
199a560f3ebSWei Hu
200a560f3ebSWei Hu static void
__hvs_remove_socket_from_list(struct socket * so,unsigned char list)201a560f3ebSWei Hu __hvs_remove_socket_from_list(struct socket *so, unsigned char list)
202a560f3ebSWei Hu {
203a560f3ebSWei Hu struct hvs_pcb *pcb = so2hvspcb(so);
204a560f3ebSWei Hu
205a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: pcb is %p\n", __func__, pcb);
206a560f3ebSWei Hu
207a560f3ebSWei Hu __hvs_remove_pcb_from_list(pcb, list);
208a560f3ebSWei Hu }
209a560f3ebSWei Hu
210a560f3ebSWei Hu static void
__hvs_insert_socket_on_list(struct socket * so,unsigned char list)211a560f3ebSWei Hu __hvs_insert_socket_on_list(struct socket *so, unsigned char list)
212a560f3ebSWei Hu {
213a560f3ebSWei Hu struct hvs_pcb *pcb = so2hvspcb(so);
214a560f3ebSWei Hu
215a560f3ebSWei Hu if (list & HVS_LIST_BOUND)
216a560f3ebSWei Hu LIST_INSERT_HEAD(&hvs_trans_bound_socks,
217a560f3ebSWei Hu pcb, bound_next);
218a560f3ebSWei Hu
219a560f3ebSWei Hu if (list & HVS_LIST_CONNECTED)
220a560f3ebSWei Hu LIST_INSERT_HEAD(&hvs_trans_connected_socks,
221a560f3ebSWei Hu pcb, connected_next);
222a560f3ebSWei Hu }
223a560f3ebSWei Hu
224a560f3ebSWei Hu void
hvs_remove_socket_from_list(struct socket * so,unsigned char list)225a560f3ebSWei Hu hvs_remove_socket_from_list(struct socket *so, unsigned char list)
226a560f3ebSWei Hu {
227a560f3ebSWei Hu if (!so || !so->so_pcb) {
228a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
229a560f3ebSWei Hu "%s: socket or so_pcb is null\n", __func__);
230a560f3ebSWei Hu return;
231a560f3ebSWei Hu }
232a560f3ebSWei Hu
233a560f3ebSWei Hu mtx_lock(&hvs_trans_socks_mtx);
234a560f3ebSWei Hu __hvs_remove_socket_from_list(so, list);
235a560f3ebSWei Hu mtx_unlock(&hvs_trans_socks_mtx);
236a560f3ebSWei Hu }
237a560f3ebSWei Hu
238a560f3ebSWei Hu static void
hvs_insert_socket_on_list(struct socket * so,unsigned char list)239a560f3ebSWei Hu hvs_insert_socket_on_list(struct socket *so, unsigned char list)
240a560f3ebSWei Hu {
241a560f3ebSWei Hu if (!so || !so->so_pcb) {
242a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
243a560f3ebSWei Hu "%s: socket or so_pcb is null\n", __func__);
244a560f3ebSWei Hu return;
245a560f3ebSWei Hu }
246a560f3ebSWei Hu
247a560f3ebSWei Hu mtx_lock(&hvs_trans_socks_mtx);
248a560f3ebSWei Hu __hvs_insert_socket_on_list(so, list);
249a560f3ebSWei Hu mtx_unlock(&hvs_trans_socks_mtx);
250a560f3ebSWei Hu }
251a560f3ebSWei Hu
252a560f3ebSWei Hu static struct socket *
__hvs_find_socket_on_list(struct sockaddr_hvs * addr,unsigned char list)253a560f3ebSWei Hu __hvs_find_socket_on_list(struct sockaddr_hvs *addr, unsigned char list)
254a560f3ebSWei Hu {
255a560f3ebSWei Hu struct hvs_pcb *p = NULL;
256a560f3ebSWei Hu
257a560f3ebSWei Hu if (list & HVS_LIST_BOUND)
258a560f3ebSWei Hu LIST_FOREACH(p, &hvs_trans_bound_socks, bound_next)
259a560f3ebSWei Hu if (p->so != NULL &&
260a560f3ebSWei Hu addr->hvs_port == p->local_addr.hvs_port)
261a560f3ebSWei Hu return p->so;
262a560f3ebSWei Hu
263a560f3ebSWei Hu if (list & HVS_LIST_CONNECTED)
264a560f3ebSWei Hu LIST_FOREACH(p, &hvs_trans_connected_socks, connected_next)
265a560f3ebSWei Hu if (p->so != NULL &&
266a560f3ebSWei Hu addr->hvs_port == p->local_addr.hvs_port)
267a560f3ebSWei Hu return p->so;
268a560f3ebSWei Hu
269a560f3ebSWei Hu return NULL;
270a560f3ebSWei Hu }
271a560f3ebSWei Hu
272a560f3ebSWei Hu static struct socket *
hvs_find_socket_on_list(struct sockaddr_hvs * addr,unsigned char list)273a560f3ebSWei Hu hvs_find_socket_on_list(struct sockaddr_hvs *addr, unsigned char list)
274a560f3ebSWei Hu {
275a560f3ebSWei Hu struct socket *s = NULL;
276a560f3ebSWei Hu
277a560f3ebSWei Hu mtx_lock(&hvs_trans_socks_mtx);
278a560f3ebSWei Hu s = __hvs_find_socket_on_list(addr, list);
279a560f3ebSWei Hu mtx_unlock(&hvs_trans_socks_mtx);
280a560f3ebSWei Hu
281a560f3ebSWei Hu return s;
282a560f3ebSWei Hu }
283a560f3ebSWei Hu
284a560f3ebSWei Hu static inline void
hvs_addr_set(struct sockaddr_hvs * addr,unsigned int port)285a560f3ebSWei Hu hvs_addr_set(struct sockaddr_hvs *addr, unsigned int port)
286a560f3ebSWei Hu {
287a560f3ebSWei Hu memset(addr, 0, sizeof(*addr));
288a560f3ebSWei Hu addr->sa_family = AF_HYPERV;
289f161d294SMark Johnston addr->sa_len = sizeof(*addr);
290a560f3ebSWei Hu addr->hvs_port = port;
291a560f3ebSWei Hu }
292a560f3ebSWei Hu
293a560f3ebSWei Hu void
hvs_addr_init(struct sockaddr_hvs * addr,const struct hyperv_guid * svr_id)294a560f3ebSWei Hu hvs_addr_init(struct sockaddr_hvs *addr, const struct hyperv_guid *svr_id)
295a560f3ebSWei Hu {
296a560f3ebSWei Hu hvs_addr_set(addr, get_port_by_srv_id(svr_id));
297a560f3ebSWei Hu }
298a560f3ebSWei Hu
299a560f3ebSWei Hu int
hvs_trans_lock(void)300a560f3ebSWei Hu hvs_trans_lock(void)
301a560f3ebSWei Hu {
302a560f3ebSWei Hu sx_xlock(&hvs_trans_socks_sx);
303a560f3ebSWei Hu return (0);
304a560f3ebSWei Hu }
305a560f3ebSWei Hu
306a560f3ebSWei Hu void
hvs_trans_unlock(void)307a560f3ebSWei Hu hvs_trans_unlock(void)
308a560f3ebSWei Hu {
309a560f3ebSWei Hu sx_xunlock(&hvs_trans_socks_sx);
310a560f3ebSWei Hu }
311a560f3ebSWei Hu
312625932c9SKyle Evans static int
hvs_dom_probe(void)313625932c9SKyle Evans hvs_dom_probe(void)
314625932c9SKyle Evans {
315625932c9SKyle Evans
316625932c9SKyle Evans /* Don't even give us a chance to attach on non-HyperV. */
317625932c9SKyle Evans if (vm_guest != VM_GUEST_HV)
318625932c9SKyle Evans return (ENXIO);
319625932c9SKyle Evans return (0);
320625932c9SKyle Evans }
321625932c9SKyle Evans
32289128ff3SGleb Smirnoff static void
hvs_trans_init(void * arg __unused)32389128ff3SGleb Smirnoff hvs_trans_init(void *arg __unused)
324a560f3ebSWei Hu {
325a560f3ebSWei Hu
326a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
327a560f3ebSWei Hu "%s: HyperV Socket hvs_trans_init called\n", __func__);
328a560f3ebSWei Hu
329a560f3ebSWei Hu /* Initialize Globals */
330a560f3ebSWei Hu previous_auto_bound_port = MAX_PORT;
331a560f3ebSWei Hu sx_init(&hvs_trans_socks_sx, "hvs_trans_sock_sx");
332a560f3ebSWei Hu mtx_init(&hvs_trans_socks_mtx,
333a560f3ebSWei Hu "hvs_trans_socks_mtx", NULL, MTX_DEF);
334a560f3ebSWei Hu LIST_INIT(&hvs_trans_bound_socks);
335a560f3ebSWei Hu LIST_INIT(&hvs_trans_connected_socks);
336a560f3ebSWei Hu }
33789128ff3SGleb Smirnoff SYSINIT(hvs_trans_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
33889128ff3SGleb Smirnoff hvs_trans_init, NULL);
339a560f3ebSWei Hu
340a560f3ebSWei Hu /*
341a560f3ebSWei Hu * Called in two cases:
342a560f3ebSWei Hu * 1) When user calls socket();
343a560f3ebSWei Hu * 2) When we accept new incoming conneciton and call sonewconn().
344a560f3ebSWei Hu */
345a560f3ebSWei Hu int
hvs_trans_attach(struct socket * so,int proto,struct thread * td)346a560f3ebSWei Hu hvs_trans_attach(struct socket *so, int proto, struct thread *td)
347a560f3ebSWei Hu {
348a560f3ebSWei Hu struct hvs_pcb *pcb = so2hvspcb(so);
349a560f3ebSWei Hu
350a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
351a560f3ebSWei Hu "%s: HyperV Socket hvs_trans_attach called\n", __func__);
352a560f3ebSWei Hu
353a560f3ebSWei Hu if (so->so_type != SOCK_STREAM)
354a560f3ebSWei Hu return (ESOCKTNOSUPPORT);
355a560f3ebSWei Hu
356a560f3ebSWei Hu if (proto != 0 && proto != HYPERV_SOCK_PROTO_TRANS)
357a560f3ebSWei Hu return (EPROTONOSUPPORT);
358a560f3ebSWei Hu
359a560f3ebSWei Hu if (pcb != NULL)
360a560f3ebSWei Hu return (EISCONN);
361a560f3ebSWei Hu pcb = malloc(sizeof(struct hvs_pcb), M_HVSOCK, M_NOWAIT | M_ZERO);
362a560f3ebSWei Hu if (pcb == NULL)
363a560f3ebSWei Hu return (ENOMEM);
364a560f3ebSWei Hu
365a560f3ebSWei Hu pcb->so = so;
366a560f3ebSWei Hu so->so_pcb = (void *)pcb;
367a560f3ebSWei Hu
368a560f3ebSWei Hu return (0);
369a560f3ebSWei Hu }
370a560f3ebSWei Hu
371a560f3ebSWei Hu void
hvs_trans_detach(struct socket * so)372a560f3ebSWei Hu hvs_trans_detach(struct socket *so)
373a560f3ebSWei Hu {
374a560f3ebSWei Hu struct hvs_pcb *pcb;
375a560f3ebSWei Hu
376a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
377a560f3ebSWei Hu "%s: HyperV Socket hvs_trans_detach called\n", __func__);
378a560f3ebSWei Hu
379a560f3ebSWei Hu (void) hvs_trans_lock();
380a560f3ebSWei Hu pcb = so2hvspcb(so);
381a560f3ebSWei Hu if (pcb == NULL) {
382a560f3ebSWei Hu hvs_trans_unlock();
383a560f3ebSWei Hu return;
384a560f3ebSWei Hu }
385a560f3ebSWei Hu
386a560f3ebSWei Hu if (SOLISTENING(so)) {
387a560f3ebSWei Hu bzero(pcb, sizeof(*pcb));
388a560f3ebSWei Hu free(pcb, M_HVSOCK);
389a560f3ebSWei Hu }
390a560f3ebSWei Hu
391a560f3ebSWei Hu so->so_pcb = NULL;
392a560f3ebSWei Hu
393a560f3ebSWei Hu hvs_trans_unlock();
394a560f3ebSWei Hu }
395a560f3ebSWei Hu
396a560f3ebSWei Hu int
hvs_trans_bind(struct socket * so,struct sockaddr * addr,struct thread * td)397a560f3ebSWei Hu hvs_trans_bind(struct socket *so, struct sockaddr *addr, struct thread *td)
398a560f3ebSWei Hu {
399a560f3ebSWei Hu struct hvs_pcb *pcb = so2hvspcb(so);
400a560f3ebSWei Hu struct sockaddr_hvs *sa = (struct sockaddr_hvs *) addr;
401a560f3ebSWei Hu int error = 0;
402a560f3ebSWei Hu
403a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
404a560f3ebSWei Hu "%s: HyperV Socket hvs_trans_bind called\n", __func__);
405a560f3ebSWei Hu
406a560f3ebSWei Hu if (sa == NULL) {
407a560f3ebSWei Hu return (EINVAL);
408a560f3ebSWei Hu }
409a560f3ebSWei Hu
410a560f3ebSWei Hu if (pcb == NULL) {
411a560f3ebSWei Hu return (EINVAL);
412a560f3ebSWei Hu }
413a560f3ebSWei Hu
414a560f3ebSWei Hu if (sa->sa_family != AF_HYPERV) {
415a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_ERR,
416a560f3ebSWei Hu "%s: Not supported, sa_family is %u\n",
417a560f3ebSWei Hu __func__, sa->sa_family);
418a560f3ebSWei Hu return (EAFNOSUPPORT);
419a560f3ebSWei Hu }
420f161d294SMark Johnston if (sa->sa_len != sizeof(*sa)) {
421f161d294SMark Johnston HVSOCK_DBG(HVSOCK_DBG_ERR,
422f161d294SMark Johnston "%s: Not supported, sa_len is %u\n",
423f161d294SMark Johnston __func__, sa->sa_len);
424f161d294SMark Johnston return (EINVAL);
425f161d294SMark Johnston }
426a560f3ebSWei Hu
427a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
428a560f3ebSWei Hu "%s: binding port = 0x%x\n", __func__, sa->hvs_port);
429a560f3ebSWei Hu
430a560f3ebSWei Hu mtx_lock(&hvs_trans_socks_mtx);
431a560f3ebSWei Hu if (__hvs_find_socket_on_list(sa,
432a560f3ebSWei Hu HVS_LIST_BOUND | HVS_LIST_CONNECTED)) {
433a560f3ebSWei Hu error = EADDRINUSE;
434a560f3ebSWei Hu } else {
435a560f3ebSWei Hu /*
436a560f3ebSWei Hu * The address is available for us to bind.
437a560f3ebSWei Hu * Add socket to the bound list.
438a560f3ebSWei Hu */
439a560f3ebSWei Hu hvs_addr_set(&pcb->local_addr, sa->hvs_port);
440a560f3ebSWei Hu hvs_addr_set(&pcb->remote_addr, HVADDR_PORT_ANY);
441a560f3ebSWei Hu __hvs_insert_socket_on_list(so, HVS_LIST_BOUND);
442a560f3ebSWei Hu }
443a560f3ebSWei Hu mtx_unlock(&hvs_trans_socks_mtx);
444a560f3ebSWei Hu
445a560f3ebSWei Hu return (error);
446a560f3ebSWei Hu }
447a560f3ebSWei Hu
448a560f3ebSWei Hu int
hvs_trans_listen(struct socket * so,int backlog,struct thread * td)449a560f3ebSWei Hu hvs_trans_listen(struct socket *so, int backlog, struct thread *td)
450a560f3ebSWei Hu {
451a560f3ebSWei Hu struct hvs_pcb *pcb = so2hvspcb(so);
452a560f3ebSWei Hu struct socket *bound_so;
453a560f3ebSWei Hu int error;
454a560f3ebSWei Hu
455a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
456a560f3ebSWei Hu "%s: HyperV Socket hvs_trans_listen called\n", __func__);
457a560f3ebSWei Hu
458a560f3ebSWei Hu if (pcb == NULL)
459a560f3ebSWei Hu return (EINVAL);
460a560f3ebSWei Hu
461a560f3ebSWei Hu /* Check if the address is already bound and it was by us. */
462a560f3ebSWei Hu bound_so = hvs_find_socket_on_list(&pcb->local_addr, HVS_LIST_BOUND);
463a560f3ebSWei Hu if (bound_so == NULL || bound_so != so) {
464a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_ERR,
465a560f3ebSWei Hu "%s: Address not bound or not by us.\n", __func__);
466a560f3ebSWei Hu return (EADDRNOTAVAIL);
467a560f3ebSWei Hu }
468a560f3ebSWei Hu
469a560f3ebSWei Hu SOCK_LOCK(so);
470a560f3ebSWei Hu error = solisten_proto_check(so);
471a560f3ebSWei Hu if (error == 0)
472a560f3ebSWei Hu solisten_proto(so, backlog);
473a560f3ebSWei Hu SOCK_UNLOCK(so);
474a560f3ebSWei Hu
475a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
476a560f3ebSWei Hu "%s: HyperV Socket listen error = %d\n", __func__, error);
477a560f3ebSWei Hu return (error);
478a560f3ebSWei Hu }
479a560f3ebSWei Hu
480a560f3ebSWei Hu int
hvs_trans_accept(struct socket * so,struct sockaddr * sa)481cfb1e929SGleb Smirnoff hvs_trans_accept(struct socket *so, struct sockaddr *sa)
482a560f3ebSWei Hu {
483a560f3ebSWei Hu struct hvs_pcb *pcb = so2hvspcb(so);
484a560f3ebSWei Hu
485a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
486a560f3ebSWei Hu "%s: HyperV Socket hvs_trans_accept called\n", __func__);
487a560f3ebSWei Hu
488a560f3ebSWei Hu if (pcb == NULL)
489a560f3ebSWei Hu return (EINVAL);
490a560f3ebSWei Hu
491cfb1e929SGleb Smirnoff memcpy(sa, &pcb->remote_addr, pcb->remote_addr.sa_len);
492a560f3ebSWei Hu
493cfb1e929SGleb Smirnoff return (0);
494a560f3ebSWei Hu }
495a560f3ebSWei Hu
496a560f3ebSWei Hu int
hvs_trans_connect(struct socket * so,struct sockaddr * nam,struct thread * td)497a560f3ebSWei Hu hvs_trans_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
498a560f3ebSWei Hu {
499a560f3ebSWei Hu struct hvs_pcb *pcb = so2hvspcb(so);
500a560f3ebSWei Hu struct sockaddr_hvs *raddr = (struct sockaddr_hvs *)nam;
501a560f3ebSWei Hu bool found_auto_bound_port = false;
502a560f3ebSWei Hu int i, error = 0;
503a560f3ebSWei Hu
504a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
505a560f3ebSWei Hu "%s: HyperV Socket hvs_trans_connect called, remote port is %x\n",
506a560f3ebSWei Hu __func__, raddr->hvs_port);
507a560f3ebSWei Hu
508a560f3ebSWei Hu if (pcb == NULL)
509a560f3ebSWei Hu return (EINVAL);
510a560f3ebSWei Hu
511a560f3ebSWei Hu /* Verify the remote address */
512a560f3ebSWei Hu if (raddr == NULL)
513a560f3ebSWei Hu return (EINVAL);
514a560f3ebSWei Hu if (raddr->sa_family != AF_HYPERV)
515a560f3ebSWei Hu return (EAFNOSUPPORT);
516f161d294SMark Johnston if (raddr->sa_len != sizeof(*raddr))
517f161d294SMark Johnston return (EINVAL);
518a560f3ebSWei Hu
519a560f3ebSWei Hu mtx_lock(&hvs_trans_socks_mtx);
520a560f3ebSWei Hu if (so->so_state &
521a560f3ebSWei Hu (SS_ISCONNECTED|SS_ISDISCONNECTING|SS_ISCONNECTING)) {
522a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_ERR,
523a560f3ebSWei Hu "%s: socket connect in progress\n",
524a560f3ebSWei Hu __func__);
525a560f3ebSWei Hu error = EINPROGRESS;
526a560f3ebSWei Hu goto out;
527a560f3ebSWei Hu }
528a560f3ebSWei Hu
529a560f3ebSWei Hu /*
530a560f3ebSWei Hu * Find an available port for us to auto bind the local
531a560f3ebSWei Hu * address.
532a560f3ebSWei Hu */
533a560f3ebSWei Hu hvs_addr_set(&pcb->local_addr, 0);
534a560f3ebSWei Hu
535a560f3ebSWei Hu for (i = previous_auto_bound_port - 1;
536a560f3ebSWei Hu i != previous_auto_bound_port; i --) {
537a560f3ebSWei Hu if (i == MIN_PORT)
538a560f3ebSWei Hu i = MAX_PORT;
539a560f3ebSWei Hu
540a560f3ebSWei Hu pcb->local_addr.hvs_port = i;
541a560f3ebSWei Hu
542a560f3ebSWei Hu if (__hvs_find_socket_on_list(&pcb->local_addr,
543a560f3ebSWei Hu HVS_LIST_BOUND | HVS_LIST_CONNECTED) == NULL) {
544a560f3ebSWei Hu found_auto_bound_port = true;
545a560f3ebSWei Hu previous_auto_bound_port = i;
546a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
547a560f3ebSWei Hu "%s: found local bound port is %x\n",
548a560f3ebSWei Hu __func__, pcb->local_addr.hvs_port);
549a560f3ebSWei Hu break;
550a560f3ebSWei Hu }
551a560f3ebSWei Hu }
552a560f3ebSWei Hu
553a560f3ebSWei Hu if (found_auto_bound_port == true) {
554a560f3ebSWei Hu /* Found available port for auto bound, put on list */
555a560f3ebSWei Hu __hvs_insert_socket_on_list(so, HVS_LIST_BOUND);
556a560f3ebSWei Hu /* Set VM service ID */
557a560f3ebSWei Hu pcb->vm_srv_id = srv_id_template;
558a560f3ebSWei Hu set_port_by_srv_id(&pcb->vm_srv_id, pcb->local_addr.hvs_port);
559a560f3ebSWei Hu /* Set host service ID and remote port */
560a560f3ebSWei Hu pcb->host_srv_id = srv_id_template;
561a560f3ebSWei Hu set_port_by_srv_id(&pcb->host_srv_id, raddr->hvs_port);
562a560f3ebSWei Hu hvs_addr_set(&pcb->remote_addr, raddr->hvs_port);
563a560f3ebSWei Hu
564a560f3ebSWei Hu /* Change the socket state to SS_ISCONNECTING */
565a560f3ebSWei Hu soisconnecting(so);
566a560f3ebSWei Hu } else {
567a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_ERR,
568a560f3ebSWei Hu "%s: No local port available for auto bound\n",
569a560f3ebSWei Hu __func__);
570a560f3ebSWei Hu error = EADDRINUSE;
571a560f3ebSWei Hu }
572a560f3ebSWei Hu
573a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_INFO, "Connect vm_srv_id is ");
574a560f3ebSWei Hu hvsock_print_guid(&pcb->vm_srv_id);
575a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_INFO, "Connect host_srv_id is ");
576a560f3ebSWei Hu hvsock_print_guid(&pcb->host_srv_id);
577a560f3ebSWei Hu
578a560f3ebSWei Hu out:
579a560f3ebSWei Hu mtx_unlock(&hvs_trans_socks_mtx);
580a560f3ebSWei Hu
581a560f3ebSWei Hu if (found_auto_bound_port == true)
582a560f3ebSWei Hu vmbus_req_tl_connect(&pcb->vm_srv_id, &pcb->host_srv_id);
583a560f3ebSWei Hu
584a560f3ebSWei Hu return (error);
585a560f3ebSWei Hu }
586a560f3ebSWei Hu
587a560f3ebSWei Hu int
hvs_trans_disconnect(struct socket * so)588a560f3ebSWei Hu hvs_trans_disconnect(struct socket *so)
589a560f3ebSWei Hu {
590a560f3ebSWei Hu struct hvs_pcb *pcb;
591a560f3ebSWei Hu
592a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
593a560f3ebSWei Hu "%s: HyperV Socket hvs_trans_disconnect called\n", __func__);
594a560f3ebSWei Hu
595a560f3ebSWei Hu (void) hvs_trans_lock();
596a560f3ebSWei Hu pcb = so2hvspcb(so);
597a560f3ebSWei Hu if (pcb == NULL) {
598a560f3ebSWei Hu hvs_trans_unlock();
599a560f3ebSWei Hu return (EINVAL);
600a560f3ebSWei Hu }
601a560f3ebSWei Hu
602a560f3ebSWei Hu /* If socket is already disconnected, skip this */
603a560f3ebSWei Hu if ((so->so_state & SS_ISDISCONNECTED) == 0)
604a560f3ebSWei Hu soisdisconnecting(so);
605a560f3ebSWei Hu
606a560f3ebSWei Hu hvs_trans_unlock();
607a560f3ebSWei Hu
608a560f3ebSWei Hu return (0);
609a560f3ebSWei Hu }
610a560f3ebSWei Hu
611a560f3ebSWei Hu struct hvs_callback_arg {
612a560f3ebSWei Hu struct uio *uio;
613a560f3ebSWei Hu struct sockbuf *sb;
614a560f3ebSWei Hu };
615a560f3ebSWei Hu
616a560f3ebSWei Hu int
hvs_trans_soreceive(struct socket * so,struct sockaddr ** paddr,struct uio * uio,struct mbuf ** mp0,struct mbuf ** controlp,int * flagsp)617a560f3ebSWei Hu hvs_trans_soreceive(struct socket *so, struct sockaddr **paddr,
618a560f3ebSWei Hu struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
619a560f3ebSWei Hu {
620a560f3ebSWei Hu struct hvs_pcb *pcb = so2hvspcb(so);
621a560f3ebSWei Hu struct sockbuf *sb;
622a560f3ebSWei Hu ssize_t orig_resid;
623a560f3ebSWei Hu uint32_t canread, to_read;
624a560f3ebSWei Hu int flags, error = 0;
625a560f3ebSWei Hu struct hvs_callback_arg cbarg;
626a560f3ebSWei Hu
627a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
628a560f3ebSWei Hu "%s: HyperV Socket hvs_trans_soreceive called\n", __func__);
629a560f3ebSWei Hu
630a560f3ebSWei Hu if (so->so_type != SOCK_STREAM)
631a560f3ebSWei Hu return (EINVAL);
632a560f3ebSWei Hu if (pcb == NULL)
633a560f3ebSWei Hu return (EINVAL);
634a560f3ebSWei Hu
635a560f3ebSWei Hu if (flagsp != NULL)
636a560f3ebSWei Hu flags = *flagsp &~ MSG_EOR;
637a560f3ebSWei Hu else
638a560f3ebSWei Hu flags = 0;
639a560f3ebSWei Hu
640a560f3ebSWei Hu if (flags & MSG_PEEK)
641a560f3ebSWei Hu return (EOPNOTSUPP);
642a560f3ebSWei Hu
643a560f3ebSWei Hu /* If no space to copy out anything */
644a560f3ebSWei Hu if (uio->uio_resid == 0 || uio->uio_rw != UIO_READ)
645a560f3ebSWei Hu return (EINVAL);
646a560f3ebSWei Hu
647a560f3ebSWei Hu orig_resid = uio->uio_resid;
648a560f3ebSWei Hu
649a560f3ebSWei Hu /* Prevent other readers from entering the socket. */
650f94acf52SMark Johnston error = SOCK_IO_RECV_LOCK(so, SBLOCKWAIT(flags));
651a560f3ebSWei Hu if (error) {
652a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_ERR,
653f94acf52SMark Johnston "%s: soiolock returned error = %d\n", __func__, error);
654a560f3ebSWei Hu return (error);
655a560f3ebSWei Hu }
656a560f3ebSWei Hu
657f94acf52SMark Johnston sb = &so->so_rcv;
658a560f3ebSWei Hu SOCKBUF_LOCK(sb);
659a560f3ebSWei Hu
660a560f3ebSWei Hu cbarg.uio = uio;
661a560f3ebSWei Hu cbarg.sb = sb;
662a560f3ebSWei Hu /*
663a560f3ebSWei Hu * If the socket is closing, there might still be some data
664a560f3ebSWei Hu * in rx br to read. However we need to make sure
665a560f3ebSWei Hu * the channel is still open.
666a560f3ebSWei Hu */
667a560f3ebSWei Hu if ((sb->sb_state & SBS_CANTRCVMORE) &&
668a560f3ebSWei Hu (so->so_state & SS_ISDISCONNECTED)) {
669a560f3ebSWei Hu /* Other thread already closed the channel */
670a560f3ebSWei Hu error = EPIPE;
671a560f3ebSWei Hu goto out;
672a560f3ebSWei Hu }
673a560f3ebSWei Hu
674a560f3ebSWei Hu while (true) {
675a560f3ebSWei Hu while (uio->uio_resid > 0 &&
676a560f3ebSWei Hu (canread = hvsock_canread_check(pcb)) > 0) {
677a560f3ebSWei Hu to_read = MIN(canread, uio->uio_resid);
678a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
679a560f3ebSWei Hu "%s: to_read = %u, skip = %u\n", __func__, to_read,
680a560f3ebSWei Hu (unsigned int)(sizeof(struct hvs_pkt_header) +
681a560f3ebSWei Hu pcb->recv_data_off));
682a560f3ebSWei Hu
683a560f3ebSWei Hu error = vmbus_chan_recv_peek_call(pcb->chan, to_read,
684a560f3ebSWei Hu sizeof(struct hvs_pkt_header) + pcb->recv_data_off,
685a560f3ebSWei Hu hvsock_br_callback, (void *)&cbarg);
686a560f3ebSWei Hu /*
687a560f3ebSWei Hu * It is possible socket is disconnected becasue
688a560f3ebSWei Hu * we released lock in hvsock_br_callback. So we
689a560f3ebSWei Hu * need to check the state to make sure it is not
690a560f3ebSWei Hu * disconnected.
691a560f3ebSWei Hu */
692a560f3ebSWei Hu if (error || so->so_state & SS_ISDISCONNECTED) {
693a560f3ebSWei Hu break;
694a560f3ebSWei Hu }
695a560f3ebSWei Hu
696a560f3ebSWei Hu pcb->recv_data_len -= to_read;
697a560f3ebSWei Hu pcb->recv_data_off += to_read;
698a560f3ebSWei Hu }
699a560f3ebSWei Hu
700a560f3ebSWei Hu if (error)
701a560f3ebSWei Hu break;
702a560f3ebSWei Hu
703a560f3ebSWei Hu /* Abort if socket has reported problems. */
704a560f3ebSWei Hu if (so->so_error) {
705a560f3ebSWei Hu if (so->so_error == ESHUTDOWN &&
706a560f3ebSWei Hu orig_resid > uio->uio_resid) {
707a560f3ebSWei Hu /*
708a560f3ebSWei Hu * Although we got a FIN, we also received
709a560f3ebSWei Hu * some data in this round. Delivery it
710a560f3ebSWei Hu * to user.
711a560f3ebSWei Hu */
712a560f3ebSWei Hu error = 0;
713a560f3ebSWei Hu } else {
714a560f3ebSWei Hu if (so->so_error != ESHUTDOWN)
715a560f3ebSWei Hu error = so->so_error;
716a560f3ebSWei Hu }
717a560f3ebSWei Hu
718a560f3ebSWei Hu break;
719a560f3ebSWei Hu }
720a560f3ebSWei Hu
721a560f3ebSWei Hu /* Cannot received more. */
722a560f3ebSWei Hu if (sb->sb_state & SBS_CANTRCVMORE)
723a560f3ebSWei Hu break;
724a560f3ebSWei Hu
725a560f3ebSWei Hu /* We are done if buffer has been filled */
726a560f3ebSWei Hu if (uio->uio_resid == 0)
727a560f3ebSWei Hu break;
728a560f3ebSWei Hu
729a560f3ebSWei Hu if (!(flags & MSG_WAITALL) && orig_resid > uio->uio_resid)
730a560f3ebSWei Hu break;
731a560f3ebSWei Hu
732a560f3ebSWei Hu /* Buffer ring is empty and we shall not block */
733a560f3ebSWei Hu if ((so->so_state & SS_NBIO) ||
734a560f3ebSWei Hu (flags & (MSG_DONTWAIT|MSG_NBIO))) {
735a560f3ebSWei Hu if (orig_resid == uio->uio_resid) {
736a560f3ebSWei Hu /* We have not read anything */
737a560f3ebSWei Hu error = EAGAIN;
738a560f3ebSWei Hu }
739a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
740a560f3ebSWei Hu "%s: non blocked read return, error %d.\n",
741a560f3ebSWei Hu __func__, error);
742a560f3ebSWei Hu break;
743a560f3ebSWei Hu }
744a560f3ebSWei Hu
745a560f3ebSWei Hu /*
746a560f3ebSWei Hu * Wait and block until (more) data comes in.
747a560f3ebSWei Hu * Note: Drops the sockbuf lock during wait.
748a560f3ebSWei Hu */
74943283184SGleb Smirnoff error = sbwait(so, SO_RCV);
750a560f3ebSWei Hu
751a560f3ebSWei Hu if (error)
752a560f3ebSWei Hu break;
753a560f3ebSWei Hu
754a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
755a560f3ebSWei Hu "%s: wake up from sbwait, read available is %u\n",
756a560f3ebSWei Hu __func__, vmbus_chan_read_available(pcb->chan));
757a560f3ebSWei Hu }
758a560f3ebSWei Hu
759a560f3ebSWei Hu out:
760a560f3ebSWei Hu SOCKBUF_UNLOCK(sb);
761f94acf52SMark Johnston SOCK_IO_RECV_UNLOCK(so);
762a560f3ebSWei Hu
7636dc7bf0cSGordon Bergling /* We received a FIN in this call */
764a560f3ebSWei Hu if (so->so_error == ESHUTDOWN) {
765a560f3ebSWei Hu if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
766a560f3ebSWei Hu /* Send has already closed */
767a560f3ebSWei Hu soisdisconnecting(so);
768a560f3ebSWei Hu } else {
769a560f3ebSWei Hu /* Just close the receive side */
770a560f3ebSWei Hu socantrcvmore(so);
771a560f3ebSWei Hu }
772a560f3ebSWei Hu }
773a560f3ebSWei Hu
774a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
775a560f3ebSWei Hu "%s: returning error = %d, so_error = %d\n",
776a560f3ebSWei Hu __func__, error, so->so_error);
777a560f3ebSWei Hu
778a560f3ebSWei Hu return (error);
779a560f3ebSWei Hu }
780a560f3ebSWei Hu
781a560f3ebSWei Hu int
hvs_trans_sosend(struct socket * so,struct sockaddr * addr,struct uio * uio,struct mbuf * top,struct mbuf * controlp,int flags,struct thread * td)782a560f3ebSWei Hu hvs_trans_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
783a560f3ebSWei Hu struct mbuf *top, struct mbuf *controlp, int flags, struct thread *td)
784a560f3ebSWei Hu {
785a560f3ebSWei Hu struct hvs_pcb *pcb = so2hvspcb(so);
786a560f3ebSWei Hu struct sockbuf *sb;
787a560f3ebSWei Hu ssize_t orig_resid;
788a560f3ebSWei Hu uint32_t canwrite, to_write;
789a560f3ebSWei Hu int error = 0;
790a560f3ebSWei Hu
791a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
792db7ec3c3SLi-Wen Hsu "%s: HyperV Socket hvs_trans_sosend called, uio_resid = %zd\n",
793a560f3ebSWei Hu __func__, uio->uio_resid);
794a560f3ebSWei Hu
795a560f3ebSWei Hu if (so->so_type != SOCK_STREAM)
796a560f3ebSWei Hu return (EINVAL);
797a560f3ebSWei Hu if (pcb == NULL)
798a560f3ebSWei Hu return (EINVAL);
799a560f3ebSWei Hu
800a560f3ebSWei Hu /* If nothing to send */
801a560f3ebSWei Hu if (uio->uio_resid == 0 || uio->uio_rw != UIO_WRITE)
802a560f3ebSWei Hu return (EINVAL);
803a560f3ebSWei Hu
804a560f3ebSWei Hu orig_resid = uio->uio_resid;
805a560f3ebSWei Hu
806a560f3ebSWei Hu /* Prevent other writers from entering the socket. */
807f94acf52SMark Johnston error = SOCK_IO_SEND_LOCK(so, SBLOCKWAIT(flags));
808a560f3ebSWei Hu if (error) {
809a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_ERR,
810f94acf52SMark Johnston "%s: soiolocak returned error = %d\n", __func__, error);
811a560f3ebSWei Hu return (error);
812a560f3ebSWei Hu }
813a560f3ebSWei Hu
814f94acf52SMark Johnston sb = &so->so_snd;
815a560f3ebSWei Hu SOCKBUF_LOCK(sb);
816a560f3ebSWei Hu
817a560f3ebSWei Hu if ((sb->sb_state & SBS_CANTSENDMORE) ||
818a560f3ebSWei Hu so->so_error == ESHUTDOWN) {
819a560f3ebSWei Hu error = EPIPE;
820a560f3ebSWei Hu goto out;
821a560f3ebSWei Hu }
822a560f3ebSWei Hu
823a560f3ebSWei Hu while (uio->uio_resid > 0) {
824a560f3ebSWei Hu canwrite = hvsock_canwrite_check(pcb);
825a560f3ebSWei Hu if (canwrite == 0) {
826a560f3ebSWei Hu /* We have sent some data */
827a560f3ebSWei Hu if (orig_resid > uio->uio_resid)
828a560f3ebSWei Hu break;
829a560f3ebSWei Hu /*
830a560f3ebSWei Hu * We have not sent any data and it is
831a560f3ebSWei Hu * non-blocked io
832a560f3ebSWei Hu */
833a560f3ebSWei Hu if (so->so_state & SS_NBIO ||
834a560f3ebSWei Hu (flags & (MSG_NBIO | MSG_DONTWAIT)) != 0) {
835a560f3ebSWei Hu error = EWOULDBLOCK;
836a560f3ebSWei Hu break;
837a560f3ebSWei Hu } else {
838a560f3ebSWei Hu /*
839a560f3ebSWei Hu * We are here because there is no space on
840a560f3ebSWei Hu * send buffer ring. Signal the other side
841a560f3ebSWei Hu * to read and free more space.
842a560f3ebSWei Hu * Sleep wait until space avaiable to send
843a560f3ebSWei Hu * Note: Drops the sockbuf lock during wait.
844a560f3ebSWei Hu */
84543283184SGleb Smirnoff error = sbwait(so, SO_SND);
846a560f3ebSWei Hu
847a560f3ebSWei Hu if (error)
848a560f3ebSWei Hu break;
849a560f3ebSWei Hu
850a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
851a560f3ebSWei Hu "%s: wake up from sbwait, space avail on "
852a560f3ebSWei Hu "tx ring is %u\n",
853a560f3ebSWei Hu __func__,
854a560f3ebSWei Hu vmbus_chan_write_available(pcb->chan));
855a560f3ebSWei Hu
856a560f3ebSWei Hu continue;
857a560f3ebSWei Hu }
858a560f3ebSWei Hu }
859a560f3ebSWei Hu to_write = MIN(canwrite, uio->uio_resid);
860a560f3ebSWei Hu to_write = MIN(to_write, HVSOCK_SEND_BUF_SZ);
861a560f3ebSWei Hu
862a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
863a560f3ebSWei Hu "%s: canwrite is %u, to_write = %u\n", __func__,
864a560f3ebSWei Hu canwrite, to_write);
865a560f3ebSWei Hu error = hvsock_send_data(pcb->chan, uio, to_write, sb);
866a560f3ebSWei Hu
867a560f3ebSWei Hu if (error)
868a560f3ebSWei Hu break;
869a560f3ebSWei Hu }
870a560f3ebSWei Hu
871a560f3ebSWei Hu out:
872a560f3ebSWei Hu SOCKBUF_UNLOCK(sb);
873f94acf52SMark Johnston SOCK_IO_SEND_UNLOCK(so);
874a560f3ebSWei Hu
875a560f3ebSWei Hu return (error);
876a560f3ebSWei Hu }
877a560f3ebSWei Hu
878a560f3ebSWei Hu int
hvs_trans_peeraddr(struct socket * so,struct sockaddr * sa)8790fac350cSGleb Smirnoff hvs_trans_peeraddr(struct socket *so, struct sockaddr *sa)
880a560f3ebSWei Hu {
881a560f3ebSWei Hu struct hvs_pcb *pcb = so2hvspcb(so);
882a560f3ebSWei Hu
883a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
884a560f3ebSWei Hu "%s: HyperV Socket hvs_trans_peeraddr called\n", __func__);
885a560f3ebSWei Hu
886a560f3ebSWei Hu if (pcb == NULL)
887a560f3ebSWei Hu return (EINVAL);
888a560f3ebSWei Hu
8890fac350cSGleb Smirnoff memcpy(sa, &pcb->remote_addr, pcb->remote_addr.sa_len);
890a560f3ebSWei Hu
8910fac350cSGleb Smirnoff return (0);
892a560f3ebSWei Hu }
893a560f3ebSWei Hu
894a560f3ebSWei Hu int
hvs_trans_sockaddr(struct socket * so,struct sockaddr * sa)8950fac350cSGleb Smirnoff hvs_trans_sockaddr(struct socket *so, struct sockaddr *sa)
896a560f3ebSWei Hu {
897a560f3ebSWei Hu struct hvs_pcb *pcb = so2hvspcb(so);
898a560f3ebSWei Hu
899a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
900a560f3ebSWei Hu "%s: HyperV Socket hvs_trans_sockaddr called\n", __func__);
901a560f3ebSWei Hu
902a560f3ebSWei Hu if (pcb == NULL)
903a560f3ebSWei Hu return (EINVAL);
904a560f3ebSWei Hu
9050fac350cSGleb Smirnoff memcpy(sa, &pcb->local_addr, pcb->local_addr.sa_len);
906a560f3ebSWei Hu
9070fac350cSGleb Smirnoff return (0);
908a560f3ebSWei Hu }
909a560f3ebSWei Hu
910a560f3ebSWei Hu void
hvs_trans_close(struct socket * so)911a560f3ebSWei Hu hvs_trans_close(struct socket *so)
912a560f3ebSWei Hu {
913a560f3ebSWei Hu struct hvs_pcb *pcb;
914a560f3ebSWei Hu
915a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
916a560f3ebSWei Hu "%s: HyperV Socket hvs_trans_close called\n", __func__);
917a560f3ebSWei Hu
918a560f3ebSWei Hu (void) hvs_trans_lock();
919a560f3ebSWei Hu pcb = so2hvspcb(so);
920a560f3ebSWei Hu if (!pcb) {
921a560f3ebSWei Hu hvs_trans_unlock();
922a560f3ebSWei Hu return;
923a560f3ebSWei Hu }
924a560f3ebSWei Hu
925a560f3ebSWei Hu if (so->so_state & SS_ISCONNECTED) {
926a560f3ebSWei Hu /* Send a FIN to peer */
927a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
928a560f3ebSWei Hu "%s: hvs_trans_close sending a FIN to host\n", __func__);
929a560f3ebSWei Hu (void) hvsock_send_data(pcb->chan, NULL, 0, NULL);
930a560f3ebSWei Hu }
931a560f3ebSWei Hu
932a560f3ebSWei Hu if (so->so_state &
933a560f3ebSWei Hu (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING))
934a560f3ebSWei Hu soisdisconnected(so);
935a560f3ebSWei Hu
936a560f3ebSWei Hu pcb->chan = NULL;
937a560f3ebSWei Hu pcb->so = NULL;
938a560f3ebSWei Hu
939a560f3ebSWei Hu if (SOLISTENING(so)) {
940a560f3ebSWei Hu mtx_lock(&hvs_trans_socks_mtx);
941a560f3ebSWei Hu /* Remove from bound list */
942a560f3ebSWei Hu __hvs_remove_socket_from_list(so, HVS_LIST_BOUND);
943a560f3ebSWei Hu mtx_unlock(&hvs_trans_socks_mtx);
944a560f3ebSWei Hu }
945a560f3ebSWei Hu
946a560f3ebSWei Hu hvs_trans_unlock();
947a560f3ebSWei Hu
948a560f3ebSWei Hu return;
949a560f3ebSWei Hu }
950a560f3ebSWei Hu
951a560f3ebSWei Hu void
hvs_trans_abort(struct socket * so)952a560f3ebSWei Hu hvs_trans_abort(struct socket *so)
953a560f3ebSWei Hu {
954a560f3ebSWei Hu struct hvs_pcb *pcb = so2hvspcb(so);
955a560f3ebSWei Hu
956a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
957a560f3ebSWei Hu "%s: HyperV Socket hvs_trans_abort called\n", __func__);
958a560f3ebSWei Hu
959a560f3ebSWei Hu (void) hvs_trans_lock();
960a560f3ebSWei Hu if (pcb == NULL) {
961a560f3ebSWei Hu hvs_trans_unlock();
962a560f3ebSWei Hu return;
963a560f3ebSWei Hu }
964a560f3ebSWei Hu
965a560f3ebSWei Hu if (SOLISTENING(so)) {
966a560f3ebSWei Hu mtx_lock(&hvs_trans_socks_mtx);
967a560f3ebSWei Hu /* Remove from bound list */
968a560f3ebSWei Hu __hvs_remove_socket_from_list(so, HVS_LIST_BOUND);
969a560f3ebSWei Hu mtx_unlock(&hvs_trans_socks_mtx);
970a560f3ebSWei Hu }
971a560f3ebSWei Hu
972a560f3ebSWei Hu if (so->so_state & SS_ISCONNECTED) {
973a560f3ebSWei Hu (void) sodisconnect(so);
974a560f3ebSWei Hu }
975a560f3ebSWei Hu hvs_trans_unlock();
976a560f3ebSWei Hu
977a560f3ebSWei Hu return;
978a560f3ebSWei Hu }
979a560f3ebSWei Hu
980a560f3ebSWei Hu int
hvs_trans_shutdown(struct socket * so,enum shutdown_how how)9815bba2728SGleb Smirnoff hvs_trans_shutdown(struct socket *so, enum shutdown_how how)
982a560f3ebSWei Hu {
983a560f3ebSWei Hu struct hvs_pcb *pcb = so2hvspcb(so);
984a560f3ebSWei Hu
985a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
986a560f3ebSWei Hu "%s: HyperV Socket hvs_trans_shutdown called\n", __func__);
987a560f3ebSWei Hu
9885bba2728SGleb Smirnoff SOCK_LOCK(so);
9895bba2728SGleb Smirnoff if ((so->so_state &
9905bba2728SGleb Smirnoff (SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) == 0) {
9915bba2728SGleb Smirnoff SOCK_UNLOCK(so);
9925bba2728SGleb Smirnoff return (ENOTCONN);
9935bba2728SGleb Smirnoff }
9945bba2728SGleb Smirnoff SOCK_UNLOCK(so);
9955bba2728SGleb Smirnoff
996a560f3ebSWei Hu if (pcb == NULL)
997a560f3ebSWei Hu return (EINVAL);
998a560f3ebSWei Hu
9995bba2728SGleb Smirnoff switch (how) {
10005bba2728SGleb Smirnoff case SHUT_RD:
10015bba2728SGleb Smirnoff socantrcvmore(so);
10025bba2728SGleb Smirnoff break;
10035bba2728SGleb Smirnoff case SHUT_RDWR:
10045bba2728SGleb Smirnoff socantrcvmore(so);
1005a560f3ebSWei Hu if (so->so_state & SS_ISCONNECTED) {
1006a560f3ebSWei Hu /* Send a FIN to peer */
10075bba2728SGleb Smirnoff SOCK_SENDBUF_LOCK(so);
10085bba2728SGleb Smirnoff (void) hvsock_send_data(pcb->chan, NULL, 0,
10095bba2728SGleb Smirnoff &so->so_snd);
10105bba2728SGleb Smirnoff SOCK_SENDBUF_UNLOCK(so);
1011a560f3ebSWei Hu soisdisconnecting(so);
1012a560f3ebSWei Hu }
10135bba2728SGleb Smirnoff /* FALLTHROUGH */
10145bba2728SGleb Smirnoff case SHUT_WR:
10155bba2728SGleb Smirnoff socantsendmore(so);
1016a560f3ebSWei Hu }
10175bba2728SGleb Smirnoff wakeup(&so->so_timeo);
1018a560f3ebSWei Hu
1019a560f3ebSWei Hu return (0);
1020a560f3ebSWei Hu }
1021a560f3ebSWei Hu
1022a560f3ebSWei Hu /* In the VM, we support Hyper-V Sockets with AF_HYPERV, and the endpoint is
1023a560f3ebSWei Hu * <port> (see struct sockaddr_hvs).
1024a560f3ebSWei Hu *
1025a560f3ebSWei Hu * On the host, Hyper-V Sockets are supported by Winsock AF_HYPERV:
1026a560f3ebSWei Hu * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/user-
1027a560f3ebSWei Hu * guide/make-integration-service, and the endpoint is <VmID, ServiceId> with
1028a560f3ebSWei Hu * the below sockaddr:
1029a560f3ebSWei Hu *
1030a560f3ebSWei Hu * struct SOCKADDR_HV
1031a560f3ebSWei Hu * {
1032a560f3ebSWei Hu * ADDRESS_FAMILY Family;
1033a560f3ebSWei Hu * USHORT Reserved;
1034a560f3ebSWei Hu * GUID VmId;
1035a560f3ebSWei Hu * GUID ServiceId;
1036a560f3ebSWei Hu * };
1037a560f3ebSWei Hu * Note: VmID is not used by FreeBSD VM and actually it isn't transmitted via
1038a560f3ebSWei Hu * VMBus, because here it's obvious the host and the VM can easily identify
1039a560f3ebSWei Hu * each other. Though the VmID is useful on the host, especially in the case
1040a560f3ebSWei Hu * of Windows container, FreeBSD VM doesn't need it at all.
1041a560f3ebSWei Hu *
1042a560f3ebSWei Hu * To be compatible with similar infrastructure in Linux VMs, we have
1043a560f3ebSWei Hu * to limit the available GUID space of SOCKADDR_HV so that we can create
1044a560f3ebSWei Hu * a mapping between FreeBSD AF_HYPERV port and SOCKADDR_HV Service GUID.
1045a560f3ebSWei Hu * The rule of writing Hyper-V Sockets apps on the host and in FreeBSD VM is:
1046a560f3ebSWei Hu *
1047a560f3ebSWei Hu ****************************************************************************
1048a560f3ebSWei Hu * The only valid Service GUIDs, from the perspectives of both the host and *
1049a560f3ebSWei Hu * FreeBSD VM, that can be connected by the other end, must conform to this *
1050a560f3ebSWei Hu * format: <port>-facb-11e6-bd58-64006a7986d3. *
1051a560f3ebSWei Hu ****************************************************************************
1052a560f3ebSWei Hu *
1053a560f3ebSWei Hu * When we write apps on the host to connect(), the GUID ServiceID is used.
1054a560f3ebSWei Hu * When we write apps in FreeBSD VM to connect(), we only need to specify the
1055a560f3ebSWei Hu * port and the driver will form the GUID and use that to request the host.
1056a560f3ebSWei Hu *
1057a560f3ebSWei Hu * From the perspective of FreeBSD VM, the remote ephemeral port (i.e. the
1058a560f3ebSWei Hu * auto-generated remote port for a connect request initiated by the host's
1059a560f3ebSWei Hu * connect()) is set to HVADDR_PORT_UNKNOWN, which is not realy used on the
1060a560f3ebSWei Hu * FreeBSD guest.
1061a560f3ebSWei Hu */
1062a560f3ebSWei Hu
1063a560f3ebSWei Hu /*
1064a560f3ebSWei Hu * Older HyperV hosts (vmbus version 'VMBUS_VERSION_WIN10' or before)
1065a560f3ebSWei Hu * restricts HyperV socket ring buffer size to six 4K pages. Newer
1066a560f3ebSWei Hu * HyperV hosts doen't have this limit.
1067a560f3ebSWei Hu */
1068a560f3ebSWei Hu #define HVS_RINGBUF_RCV_SIZE (PAGE_SIZE * 6)
1069a560f3ebSWei Hu #define HVS_RINGBUF_SND_SIZE (PAGE_SIZE * 6)
1070a560f3ebSWei Hu #define HVS_RINGBUF_MAX_SIZE (PAGE_SIZE * 64)
1071a560f3ebSWei Hu
1072a560f3ebSWei Hu struct hvsock_sc {
1073a560f3ebSWei Hu device_t dev;
1074a560f3ebSWei Hu struct hvs_pcb *pcb;
1075a560f3ebSWei Hu struct vmbus_channel *channel;
1076a560f3ebSWei Hu };
1077a560f3ebSWei Hu
1078a560f3ebSWei Hu static bool
hvsock_chan_readable(struct vmbus_channel * chan)1079a560f3ebSWei Hu hvsock_chan_readable(struct vmbus_channel *chan)
1080a560f3ebSWei Hu {
1081a560f3ebSWei Hu uint32_t readable = vmbus_chan_read_available(chan);
1082a560f3ebSWei Hu
1083a560f3ebSWei Hu return (readable >= HVSOCK_PKT_LEN(0));
1084a560f3ebSWei Hu }
1085a560f3ebSWei Hu
1086a560f3ebSWei Hu static void
hvsock_chan_cb(struct vmbus_channel * chan,void * context)1087a560f3ebSWei Hu hvsock_chan_cb(struct vmbus_channel *chan, void *context)
1088a560f3ebSWei Hu {
1089a560f3ebSWei Hu struct hvs_pcb *pcb = (struct hvs_pcb *) context;
1090a560f3ebSWei Hu struct socket *so;
1091a560f3ebSWei Hu uint32_t canwrite;
1092a560f3ebSWei Hu
1093a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
1094a560f3ebSWei Hu "%s: host send us a wakeup on rb data, pcb = %p\n",
1095a560f3ebSWei Hu __func__, pcb);
1096a560f3ebSWei Hu
1097a560f3ebSWei Hu /*
1098a560f3ebSWei Hu * Check if the socket is still attached and valid.
1099a560f3ebSWei Hu * Here we know channel is still open. Need to make
1100a560f3ebSWei Hu * sure the socket has not been closed or freed.
1101a560f3ebSWei Hu */
1102a560f3ebSWei Hu (void) hvs_trans_lock();
1103a560f3ebSWei Hu so = hsvpcb2so(pcb);
1104a560f3ebSWei Hu
1105a560f3ebSWei Hu if (pcb->chan != NULL && so != NULL) {
1106a560f3ebSWei Hu /*
1107a560f3ebSWei Hu * Wake up reader if there are data to read.
1108a560f3ebSWei Hu */
1109a560f3ebSWei Hu SOCKBUF_LOCK(&(so)->so_rcv);
1110a560f3ebSWei Hu
1111a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
1112a560f3ebSWei Hu "%s: read available = %u\n", __func__,
1113a560f3ebSWei Hu vmbus_chan_read_available(pcb->chan));
1114a560f3ebSWei Hu
1115a560f3ebSWei Hu if (hvsock_chan_readable(pcb->chan))
1116a560f3ebSWei Hu sorwakeup_locked(so);
1117a560f3ebSWei Hu else
1118a560f3ebSWei Hu SOCKBUF_UNLOCK(&(so)->so_rcv);
1119a560f3ebSWei Hu
1120a560f3ebSWei Hu /*
1121a560f3ebSWei Hu * Wake up sender if space becomes available to write.
1122a560f3ebSWei Hu */
1123a560f3ebSWei Hu SOCKBUF_LOCK(&(so)->so_snd);
1124a560f3ebSWei Hu canwrite = hvsock_canwrite_check(pcb);
1125a560f3ebSWei Hu
1126a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
1127a560f3ebSWei Hu "%s: canwrite = %u\n", __func__, canwrite);
1128a560f3ebSWei Hu
1129a560f3ebSWei Hu if (canwrite > 0) {
1130a560f3ebSWei Hu sowwakeup_locked(so);
1131a560f3ebSWei Hu } else {
1132a560f3ebSWei Hu SOCKBUF_UNLOCK(&(so)->so_snd);
1133a560f3ebSWei Hu }
1134a560f3ebSWei Hu }
1135a560f3ebSWei Hu
1136a560f3ebSWei Hu hvs_trans_unlock();
1137a560f3ebSWei Hu
1138a560f3ebSWei Hu return;
1139a560f3ebSWei Hu }
1140a560f3ebSWei Hu
1141a560f3ebSWei Hu static int
hvsock_br_callback(void * datap,int cplen,void * cbarg)1142a560f3ebSWei Hu hvsock_br_callback(void *datap, int cplen, void *cbarg)
1143a560f3ebSWei Hu {
1144a560f3ebSWei Hu struct hvs_callback_arg *arg = (struct hvs_callback_arg *)cbarg;
1145a560f3ebSWei Hu struct uio *uio = arg->uio;
1146a560f3ebSWei Hu struct sockbuf *sb = arg->sb;
1147a560f3ebSWei Hu int error = 0;
1148a560f3ebSWei Hu
1149a560f3ebSWei Hu if (cbarg == NULL || datap == NULL)
1150a560f3ebSWei Hu return (EINVAL);
1151a560f3ebSWei Hu
1152a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
1153db7ec3c3SLi-Wen Hsu "%s: called, uio_rw = %s, uio_resid = %zd, cplen = %u, "
1154a560f3ebSWei Hu "datap = %p\n",
1155a560f3ebSWei Hu __func__, (uio->uio_rw == UIO_READ) ? "read from br":"write to br",
1156a560f3ebSWei Hu uio->uio_resid, cplen, datap);
1157a560f3ebSWei Hu
1158a560f3ebSWei Hu if (sb)
1159a560f3ebSWei Hu SOCKBUF_UNLOCK(sb);
1160a560f3ebSWei Hu
1161a560f3ebSWei Hu error = uiomove(datap, cplen, uio);
1162a560f3ebSWei Hu
1163a560f3ebSWei Hu if (sb)
1164a560f3ebSWei Hu SOCKBUF_LOCK(sb);
1165a560f3ebSWei Hu
1166a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
1167db7ec3c3SLi-Wen Hsu "%s: after uiomove, uio_resid = %zd, error = %d\n",
1168a560f3ebSWei Hu __func__, uio->uio_resid, error);
1169a560f3ebSWei Hu
1170a560f3ebSWei Hu return (error);
1171a560f3ebSWei Hu }
1172a560f3ebSWei Hu
1173a560f3ebSWei Hu static int
hvsock_send_data(struct vmbus_channel * chan,struct uio * uio,uint32_t to_write,struct sockbuf * sb)1174a560f3ebSWei Hu hvsock_send_data(struct vmbus_channel *chan, struct uio *uio,
1175a560f3ebSWei Hu uint32_t to_write, struct sockbuf *sb)
1176a560f3ebSWei Hu {
1177a560f3ebSWei Hu struct hvs_pkt_header hvs_pkt;
1178a560f3ebSWei Hu int hvs_pkthlen, hvs_pktlen, pad_pktlen, hlen, error = 0;
1179a560f3ebSWei Hu uint64_t pad = 0;
1180a560f3ebSWei Hu struct iovec iov[3];
1181a560f3ebSWei Hu struct hvs_callback_arg cbarg;
1182a560f3ebSWei Hu
1183a560f3ebSWei Hu if (chan == NULL)
1184a560f3ebSWei Hu return (ENOTCONN);
1185a560f3ebSWei Hu
1186a560f3ebSWei Hu hlen = sizeof(struct vmbus_chanpkt_hdr);
1187a560f3ebSWei Hu hvs_pkthlen = sizeof(struct hvs_pkt_header);
1188a560f3ebSWei Hu hvs_pktlen = hvs_pkthlen + to_write;
1189a560f3ebSWei Hu pad_pktlen = VMBUS_CHANPKT_TOTLEN(hvs_pktlen);
1190a560f3ebSWei Hu
1191a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
1192a560f3ebSWei Hu "%s: hlen = %u, hvs_pkthlen = %u, hvs_pktlen = %u, "
1193a560f3ebSWei Hu "pad_pktlen = %u, data_len = %u\n",
1194a560f3ebSWei Hu __func__, hlen, hvs_pkthlen, hvs_pktlen, pad_pktlen, to_write);
1195a560f3ebSWei Hu
1196a560f3ebSWei Hu hvs_pkt.chan_pkt_hdr.cph_type = VMBUS_CHANPKT_TYPE_INBAND;
1197a560f3ebSWei Hu hvs_pkt.chan_pkt_hdr.cph_flags = 0;
1198a560f3ebSWei Hu VMBUS_CHANPKT_SETLEN(hvs_pkt.chan_pkt_hdr.cph_hlen, hlen);
1199a560f3ebSWei Hu VMBUS_CHANPKT_SETLEN(hvs_pkt.chan_pkt_hdr.cph_tlen, pad_pktlen);
1200a560f3ebSWei Hu hvs_pkt.chan_pkt_hdr.cph_xactid = 0;
1201a560f3ebSWei Hu
1202a560f3ebSWei Hu hvs_pkt.vmpipe_pkt_hdr.vmpipe_pkt_type = 1;
1203a560f3ebSWei Hu hvs_pkt.vmpipe_pkt_hdr.vmpipe_data_size = to_write;
1204a560f3ebSWei Hu
1205a560f3ebSWei Hu cbarg.uio = uio;
1206a560f3ebSWei Hu cbarg.sb = sb;
1207a560f3ebSWei Hu
1208a560f3ebSWei Hu if (uio && to_write > 0) {
1209a560f3ebSWei Hu iov[0].iov_base = &hvs_pkt;
1210a560f3ebSWei Hu iov[0].iov_len = hvs_pkthlen;
1211a560f3ebSWei Hu iov[1].iov_base = NULL;
1212a560f3ebSWei Hu iov[1].iov_len = to_write;
1213a560f3ebSWei Hu iov[2].iov_base = &pad;
1214a560f3ebSWei Hu iov[2].iov_len = pad_pktlen - hvs_pktlen;
1215a560f3ebSWei Hu
1216a560f3ebSWei Hu error = vmbus_chan_iov_send(chan, iov, 3,
1217a560f3ebSWei Hu hvsock_br_callback, &cbarg);
1218a560f3ebSWei Hu } else {
1219a560f3ebSWei Hu if (to_write == 0) {
1220a560f3ebSWei Hu iov[0].iov_base = &hvs_pkt;
1221a560f3ebSWei Hu iov[0].iov_len = hvs_pkthlen;
1222a560f3ebSWei Hu iov[1].iov_base = &pad;
1223a560f3ebSWei Hu iov[1].iov_len = pad_pktlen - hvs_pktlen;
1224a560f3ebSWei Hu error = vmbus_chan_iov_send(chan, iov, 2, NULL, NULL);
1225a560f3ebSWei Hu }
1226a560f3ebSWei Hu }
1227a560f3ebSWei Hu
1228a560f3ebSWei Hu if (error) {
1229a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
1230a560f3ebSWei Hu "%s: error = %d\n", __func__, error);
1231a560f3ebSWei Hu }
1232a560f3ebSWei Hu
1233a560f3ebSWei Hu return (error);
1234a560f3ebSWei Hu }
1235a560f3ebSWei Hu
1236a560f3ebSWei Hu /*
1237a560f3ebSWei Hu * Check if we have data on current ring buffer to read
1238a560f3ebSWei Hu * or not. If not, advance the ring buffer read index to
1239a560f3ebSWei Hu * next packet. Update the recev_data_len and recev_data_off
1240a560f3ebSWei Hu * to new value.
1241a560f3ebSWei Hu * Return the number of bytes can read.
1242a560f3ebSWei Hu */
1243a560f3ebSWei Hu static uint32_t
hvsock_canread_check(struct hvs_pcb * pcb)1244a560f3ebSWei Hu hvsock_canread_check(struct hvs_pcb *pcb)
1245a560f3ebSWei Hu {
1246a560f3ebSWei Hu uint32_t advance;
1247a560f3ebSWei Hu uint32_t tlen, hlen, dlen;
1248a560f3ebSWei Hu uint32_t bytes_canread = 0;
1249a560f3ebSWei Hu int error;
1250a560f3ebSWei Hu
1251a560f3ebSWei Hu if (pcb == NULL || pcb->chan == NULL) {
1252a560f3ebSWei Hu pcb->so->so_error = EIO;
1253a560f3ebSWei Hu return (0);
1254a560f3ebSWei Hu }
1255a560f3ebSWei Hu
1256a560f3ebSWei Hu /* Still have data not read yet on current packet */
1257a560f3ebSWei Hu if (pcb->recv_data_len > 0)
1258a560f3ebSWei Hu return (pcb->recv_data_len);
1259a560f3ebSWei Hu
1260a560f3ebSWei Hu if (pcb->rb_init)
1261a560f3ebSWei Hu advance =
1262a560f3ebSWei Hu VMBUS_CHANPKT_GETLEN(pcb->hvs_pkt.chan_pkt_hdr.cph_tlen);
1263a560f3ebSWei Hu else
1264a560f3ebSWei Hu advance = 0;
1265a560f3ebSWei Hu
1266a560f3ebSWei Hu bytes_canread = vmbus_chan_read_available(pcb->chan);
1267a560f3ebSWei Hu
1268a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
1269a560f3ebSWei Hu "%s: bytes_canread on br = %u, advance = %u\n",
1270a560f3ebSWei Hu __func__, bytes_canread, advance);
1271a560f3ebSWei Hu
1272a560f3ebSWei Hu if (pcb->rb_init && bytes_canread == (advance + sizeof(uint64_t))) {
1273a560f3ebSWei Hu /*
1274a560f3ebSWei Hu * Nothing to read. Need to advance the rindex before
1275a560f3ebSWei Hu * calling sbwait, so host knows to wake us up when data
1276a560f3ebSWei Hu * is available to read on rb.
1277a560f3ebSWei Hu */
1278a560f3ebSWei Hu error = vmbus_chan_recv_idxadv(pcb->chan, advance);
1279a560f3ebSWei Hu if (error) {
1280a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_ERR,
1281a560f3ebSWei Hu "%s: after calling vmbus_chan_recv_idxadv, "
1282a560f3ebSWei Hu "got error = %d\n", __func__, error);
1283a560f3ebSWei Hu return (0);
1284a560f3ebSWei Hu } else {
1285a560f3ebSWei Hu pcb->rb_init = false;
1286a560f3ebSWei Hu pcb->recv_data_len = 0;
1287a560f3ebSWei Hu pcb->recv_data_off = 0;
1288a560f3ebSWei Hu bytes_canread = vmbus_chan_read_available(pcb->chan);
1289a560f3ebSWei Hu
1290a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
1291a560f3ebSWei Hu "%s: advanced %u bytes, "
1292a560f3ebSWei Hu " bytes_canread on br now = %u\n",
1293a560f3ebSWei Hu __func__, advance, bytes_canread);
1294a560f3ebSWei Hu
1295a560f3ebSWei Hu if (bytes_canread == 0)
1296a560f3ebSWei Hu return (0);
1297a560f3ebSWei Hu else
1298a560f3ebSWei Hu advance = 0;
1299a560f3ebSWei Hu }
1300a560f3ebSWei Hu }
1301a560f3ebSWei Hu
1302a560f3ebSWei Hu if (bytes_canread <
1303a560f3ebSWei Hu advance + (sizeof(struct hvs_pkt_header) + sizeof(uint64_t)))
1304a560f3ebSWei Hu return (0);
1305a560f3ebSWei Hu
1306a560f3ebSWei Hu error = vmbus_chan_recv_peek(pcb->chan, &pcb->hvs_pkt,
1307a560f3ebSWei Hu sizeof(struct hvs_pkt_header), advance);
1308a560f3ebSWei Hu
1309a560f3ebSWei Hu /* Don't have anything to read */
1310a560f3ebSWei Hu if (error) {
1311a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
1312a560f3ebSWei Hu "%s: after calling vmbus_chan_recv_peek, got error = %d\n",
1313a560f3ebSWei Hu __func__, error);
1314a560f3ebSWei Hu return (0);
1315a560f3ebSWei Hu }
1316a560f3ebSWei Hu
1317a560f3ebSWei Hu /*
1318a560f3ebSWei Hu * We just read in a new packet header. Do some sanity checks.
1319a560f3ebSWei Hu */
1320a560f3ebSWei Hu tlen = VMBUS_CHANPKT_GETLEN(pcb->hvs_pkt.chan_pkt_hdr.cph_tlen);
1321a560f3ebSWei Hu hlen = VMBUS_CHANPKT_GETLEN(pcb->hvs_pkt.chan_pkt_hdr.cph_hlen);
1322a560f3ebSWei Hu dlen = pcb->hvs_pkt.vmpipe_pkt_hdr.vmpipe_data_size;
1323a560f3ebSWei Hu if (__predict_false(hlen < sizeof(struct vmbus_chanpkt_hdr)) ||
1324a560f3ebSWei Hu __predict_false(hlen > tlen) ||
1325a560f3ebSWei Hu __predict_false(tlen < dlen + sizeof(struct hvs_pkt_header))) {
1326a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
1327a560f3ebSWei Hu "invalid tlen(%u), hlen(%u) or dlen(%u)\n",
1328a560f3ebSWei Hu tlen, hlen, dlen);
1329a560f3ebSWei Hu pcb->so->so_error = EIO;
1330a560f3ebSWei Hu return (0);
1331a560f3ebSWei Hu }
1332a560f3ebSWei Hu if (pcb->rb_init == false)
1333a560f3ebSWei Hu pcb->rb_init = true;
1334a560f3ebSWei Hu
1335a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
1336a560f3ebSWei Hu "Got new pkt tlen(%u), hlen(%u) or dlen(%u)\n",
1337a560f3ebSWei Hu tlen, hlen, dlen);
1338a560f3ebSWei Hu
1339a560f3ebSWei Hu /* The other side has sent a close FIN */
1340a560f3ebSWei Hu if (dlen == 0) {
1341a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
1342a560f3ebSWei Hu "%s: Received FIN from other side\n", __func__);
1343a560f3ebSWei Hu /* inform the caller by seting so_error to ESHUTDOWN */
1344a560f3ebSWei Hu pcb->so->so_error = ESHUTDOWN;
1345a560f3ebSWei Hu }
1346a560f3ebSWei Hu
1347a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
1348a560f3ebSWei Hu "%s: canread on receive ring is %u \n", __func__, dlen);
1349a560f3ebSWei Hu
1350a560f3ebSWei Hu pcb->recv_data_len = dlen;
1351a560f3ebSWei Hu pcb->recv_data_off = 0;
1352a560f3ebSWei Hu
1353a560f3ebSWei Hu return (pcb->recv_data_len);
1354a560f3ebSWei Hu }
1355a560f3ebSWei Hu
1356a560f3ebSWei Hu static uint32_t
hvsock_canwrite_check(struct hvs_pcb * pcb)1357a560f3ebSWei Hu hvsock_canwrite_check(struct hvs_pcb *pcb)
1358a560f3ebSWei Hu {
1359a560f3ebSWei Hu uint32_t writeable;
1360a560f3ebSWei Hu uint32_t ret;
1361a560f3ebSWei Hu
1362a560f3ebSWei Hu if (pcb == NULL || pcb->chan == NULL)
1363a560f3ebSWei Hu return (0);
1364a560f3ebSWei Hu
1365a560f3ebSWei Hu writeable = vmbus_chan_write_available(pcb->chan);
1366a560f3ebSWei Hu
1367a560f3ebSWei Hu /*
1368a560f3ebSWei Hu * We must always reserve a 0-length-payload packet for the FIN.
1369a560f3ebSWei Hu */
1370a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
1371db7ec3c3SLi-Wen Hsu "%s: writeable is %u, should be greater than %ju\n",
1372db7ec3c3SLi-Wen Hsu __func__, writeable,
1373db7ec3c3SLi-Wen Hsu (uintmax_t)(HVSOCK_PKT_LEN(1) + HVSOCK_PKT_LEN(0)));
1374a560f3ebSWei Hu
1375a560f3ebSWei Hu if (writeable < HVSOCK_PKT_LEN(1) + HVSOCK_PKT_LEN(0)) {
1376a560f3ebSWei Hu /*
1377a560f3ebSWei Hu * The Tx ring seems full.
1378a560f3ebSWei Hu */
1379a560f3ebSWei Hu return (0);
1380a560f3ebSWei Hu }
1381a560f3ebSWei Hu
1382a560f3ebSWei Hu ret = writeable - HVSOCK_PKT_LEN(0) - HVSOCK_PKT_LEN(0);
1383a560f3ebSWei Hu
1384a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
1385a560f3ebSWei Hu "%s: available size is %u\n", __func__, rounddown2(ret, 8));
1386a560f3ebSWei Hu
1387a560f3ebSWei Hu return (rounddown2(ret, 8));
1388a560f3ebSWei Hu }
1389a560f3ebSWei Hu
1390a560f3ebSWei Hu static void
hvsock_set_chan_pending_send_size(struct vmbus_channel * chan)1391a560f3ebSWei Hu hvsock_set_chan_pending_send_size(struct vmbus_channel *chan)
1392a560f3ebSWei Hu {
1393a560f3ebSWei Hu vmbus_chan_set_pending_send_size(chan,
1394a560f3ebSWei Hu HVSOCK_PKT_LEN(HVSOCK_SEND_BUF_SZ));
1395a560f3ebSWei Hu }
1396a560f3ebSWei Hu
1397a560f3ebSWei Hu static int
hvsock_open_channel(struct vmbus_channel * chan,struct socket * so)1398a560f3ebSWei Hu hvsock_open_channel(struct vmbus_channel *chan, struct socket *so)
1399a560f3ebSWei Hu {
1400a560f3ebSWei Hu unsigned int rcvbuf, sndbuf;
1401a560f3ebSWei Hu struct hvs_pcb *pcb = so2hvspcb(so);
1402a560f3ebSWei Hu int ret;
1403a560f3ebSWei Hu
1404a560f3ebSWei Hu if (vmbus_current_version < VMBUS_VERSION_WIN10_V5) {
1405a560f3ebSWei Hu sndbuf = HVS_RINGBUF_SND_SIZE;
1406a560f3ebSWei Hu rcvbuf = HVS_RINGBUF_RCV_SIZE;
1407a560f3ebSWei Hu } else {
1408a560f3ebSWei Hu sndbuf = MAX(so->so_snd.sb_hiwat, HVS_RINGBUF_SND_SIZE);
1409a560f3ebSWei Hu sndbuf = MIN(sndbuf, HVS_RINGBUF_MAX_SIZE);
1410a560f3ebSWei Hu sndbuf = rounddown2(sndbuf, PAGE_SIZE);
1411a560f3ebSWei Hu rcvbuf = MAX(so->so_rcv.sb_hiwat, HVS_RINGBUF_RCV_SIZE);
1412a560f3ebSWei Hu rcvbuf = MIN(rcvbuf, HVS_RINGBUF_MAX_SIZE);
1413a560f3ebSWei Hu rcvbuf = rounddown2(rcvbuf, PAGE_SIZE);
1414a560f3ebSWei Hu }
1415a560f3ebSWei Hu
1416a560f3ebSWei Hu /*
1417a560f3ebSWei Hu * Can only read whatever user provided size of data
1418a560f3ebSWei Hu * from ring buffer. Turn off batched reading.
1419a560f3ebSWei Hu */
1420a560f3ebSWei Hu vmbus_chan_set_readbatch(chan, false);
1421a560f3ebSWei Hu
1422a560f3ebSWei Hu ret = vmbus_chan_open(chan, sndbuf, rcvbuf, NULL, 0,
1423a560f3ebSWei Hu hvsock_chan_cb, pcb);
1424a560f3ebSWei Hu
1425a560f3ebSWei Hu if (ret != 0) {
1426a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_ERR,
1427a560f3ebSWei Hu "%s: failed to open hvsock channel, sndbuf = %u, "
1428a560f3ebSWei Hu "rcvbuf = %u\n", __func__, sndbuf, rcvbuf);
1429a560f3ebSWei Hu } else {
1430a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_INFO,
1431a560f3ebSWei Hu "%s: hvsock channel opened, sndbuf = %u, i"
1432a560f3ebSWei Hu "rcvbuf = %u\n", __func__, sndbuf, rcvbuf);
1433a560f3ebSWei Hu /*
1434a560f3ebSWei Hu * Se the pending send size so to receive wakeup
1435a560f3ebSWei Hu * signals from host when there is enough space on
1436a560f3ebSWei Hu * rx buffer ring to write.
1437a560f3ebSWei Hu */
1438a560f3ebSWei Hu hvsock_set_chan_pending_send_size(chan);
1439a560f3ebSWei Hu }
1440a560f3ebSWei Hu
1441a560f3ebSWei Hu return ret;
1442a560f3ebSWei Hu }
1443a560f3ebSWei Hu
1444a560f3ebSWei Hu /*
1445a560f3ebSWei Hu * Guest is listening passively on the socket. Open channel and
1446a560f3ebSWei Hu * create a new socket for the conneciton.
1447a560f3ebSWei Hu */
1448a560f3ebSWei Hu static void
hvsock_open_conn_passive(struct vmbus_channel * chan,struct socket * so,struct hvsock_sc * sc)1449a560f3ebSWei Hu hvsock_open_conn_passive(struct vmbus_channel *chan, struct socket *so,
1450a560f3ebSWei Hu struct hvsock_sc *sc)
1451a560f3ebSWei Hu {
1452a560f3ebSWei Hu struct socket *new_so;
1453a560f3ebSWei Hu struct hvs_pcb *new_pcb, *pcb;
1454a560f3ebSWei Hu int error;
1455a560f3ebSWei Hu
1456a560f3ebSWei Hu /* Do nothing if socket is not listening */
1457f4bb1869SMark Johnston if (!SOLISTENING(so)) {
1458a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_ERR,
1459a560f3ebSWei Hu "%s: socket is not a listening one\n", __func__);
1460a560f3ebSWei Hu return;
1461a560f3ebSWei Hu }
1462a560f3ebSWei Hu
1463a560f3ebSWei Hu /*
1464*a0da2f73SDag-Erling Smørgrav * Create a new socket. This will call pr_attach() to complete
1465a560f3ebSWei Hu * the socket initialization and put the new socket onto
1466a560f3ebSWei Hu * listening socket's sol_incomp list, waiting to be promoted
1467a560f3ebSWei Hu * to sol_comp list.
1468a560f3ebSWei Hu * The new socket created has ref count 0. There is no other
1469a560f3ebSWei Hu * thread that changes the state of this new one at the
1470a560f3ebSWei Hu * moment, so we don't need to hold its lock while opening
1471a560f3ebSWei Hu * channel and filling out its pcb information.
1472a560f3ebSWei Hu */
1473a560f3ebSWei Hu new_so = sonewconn(so, 0);
1474a560f3ebSWei Hu if (!new_so)
1475a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_ERR,
1476a560f3ebSWei Hu "%s: creating new socket failed\n", __func__);
1477a560f3ebSWei Hu
1478a560f3ebSWei Hu /*
1479a560f3ebSWei Hu * Now open the vmbus channel. If it fails, the socket will be
1480a560f3ebSWei Hu * on the listening socket's sol_incomp queue until it is
1481a560f3ebSWei Hu * replaced and aborted.
1482a560f3ebSWei Hu */
1483a560f3ebSWei Hu error = hvsock_open_channel(chan, new_so);
1484a560f3ebSWei Hu if (error) {
1485a560f3ebSWei Hu new_so->so_error = error;
1486a560f3ebSWei Hu return;
1487a560f3ebSWei Hu }
1488a560f3ebSWei Hu
1489a560f3ebSWei Hu pcb = so->so_pcb;
1490a560f3ebSWei Hu new_pcb = new_so->so_pcb;
1491a560f3ebSWei Hu
1492a560f3ebSWei Hu hvs_addr_set(&(new_pcb->local_addr), pcb->local_addr.hvs_port);
1493a560f3ebSWei Hu /* Remote port is unknown to guest in this type of conneciton */
1494a560f3ebSWei Hu hvs_addr_set(&(new_pcb->remote_addr), HVADDR_PORT_UNKNOWN);
1495a560f3ebSWei Hu new_pcb->chan = chan;
1496a560f3ebSWei Hu new_pcb->recv_data_len = 0;
1497a560f3ebSWei Hu new_pcb->recv_data_off = 0;
1498a560f3ebSWei Hu new_pcb->rb_init = false;
1499a560f3ebSWei Hu
1500a560f3ebSWei Hu new_pcb->vm_srv_id = *vmbus_chan_guid_type(chan);
1501a560f3ebSWei Hu new_pcb->host_srv_id = *vmbus_chan_guid_inst(chan);
1502a560f3ebSWei Hu
1503a560f3ebSWei Hu hvs_insert_socket_on_list(new_so, HVS_LIST_CONNECTED);
1504a560f3ebSWei Hu
1505a560f3ebSWei Hu sc->pcb = new_pcb;
1506a560f3ebSWei Hu
1507a560f3ebSWei Hu /*
1508a560f3ebSWei Hu * Change the socket state to SS_ISCONNECTED. This will promote
1509a560f3ebSWei Hu * the socket to sol_comp queue and wake up the thread which
1510a560f3ebSWei Hu * is accepting connection.
1511a560f3ebSWei Hu */
1512a560f3ebSWei Hu soisconnected(new_so);
1513a560f3ebSWei Hu }
1514a560f3ebSWei Hu
1515a560f3ebSWei Hu
1516a560f3ebSWei Hu /*
1517a560f3ebSWei Hu * Guest is actively connecting to host.
1518a560f3ebSWei Hu */
1519a560f3ebSWei Hu static void
hvsock_open_conn_active(struct vmbus_channel * chan,struct socket * so)1520a560f3ebSWei Hu hvsock_open_conn_active(struct vmbus_channel *chan, struct socket *so)
1521a560f3ebSWei Hu {
1522a560f3ebSWei Hu struct hvs_pcb *pcb;
1523a560f3ebSWei Hu int error;
1524a560f3ebSWei Hu
1525a560f3ebSWei Hu error = hvsock_open_channel(chan, so);
1526a560f3ebSWei Hu if (error) {
1527a560f3ebSWei Hu so->so_error = error;
1528a560f3ebSWei Hu return;
1529a560f3ebSWei Hu }
1530a560f3ebSWei Hu
1531a560f3ebSWei Hu pcb = so->so_pcb;
1532a560f3ebSWei Hu pcb->chan = chan;
1533a560f3ebSWei Hu pcb->recv_data_len = 0;
1534a560f3ebSWei Hu pcb->recv_data_off = 0;
1535a560f3ebSWei Hu pcb->rb_init = false;
1536a560f3ebSWei Hu
1537a560f3ebSWei Hu mtx_lock(&hvs_trans_socks_mtx);
1538a560f3ebSWei Hu __hvs_remove_socket_from_list(so, HVS_LIST_BOUND);
1539a560f3ebSWei Hu __hvs_insert_socket_on_list(so, HVS_LIST_CONNECTED);
1540a560f3ebSWei Hu mtx_unlock(&hvs_trans_socks_mtx);
1541a560f3ebSWei Hu
1542a560f3ebSWei Hu /*
1543a560f3ebSWei Hu * Change the socket state to SS_ISCONNECTED. This will wake up
1544a560f3ebSWei Hu * the thread sleeping in connect call.
1545a560f3ebSWei Hu */
1546a560f3ebSWei Hu soisconnected(so);
1547a560f3ebSWei Hu }
1548a560f3ebSWei Hu
1549a560f3ebSWei Hu static void
hvsock_open_connection(struct vmbus_channel * chan,struct hvsock_sc * sc)1550a560f3ebSWei Hu hvsock_open_connection(struct vmbus_channel *chan, struct hvsock_sc *sc)
1551a560f3ebSWei Hu {
1552a560f3ebSWei Hu struct hyperv_guid *inst_guid, *type_guid;
1553a560f3ebSWei Hu bool conn_from_host;
1554a560f3ebSWei Hu struct sockaddr_hvs addr;
1555a560f3ebSWei Hu struct socket *so;
1556a560f3ebSWei Hu struct hvs_pcb *pcb;
1557a560f3ebSWei Hu
1558a560f3ebSWei Hu type_guid = (struct hyperv_guid *) vmbus_chan_guid_type(chan);
1559a560f3ebSWei Hu inst_guid = (struct hyperv_guid *) vmbus_chan_guid_inst(chan);
1560a560f3ebSWei Hu conn_from_host = vmbus_chan_is_hvs_conn_from_host(chan);
1561a560f3ebSWei Hu
1562a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_INFO, "type_guid is ");
1563a560f3ebSWei Hu hvsock_print_guid(type_guid);
1564a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_INFO, "inst_guid is ");
1565a560f3ebSWei Hu hvsock_print_guid(inst_guid);
1566a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_INFO, "connection %s host\n",
1567a560f3ebSWei Hu (conn_from_host == true ) ? "from" : "to");
1568a560f3ebSWei Hu
1569a560f3ebSWei Hu /*
1570a560f3ebSWei Hu * The listening port should be in [0, MAX_LISTEN_PORT]
1571a560f3ebSWei Hu */
1572a560f3ebSWei Hu if (!is_valid_srv_id(type_guid))
1573a560f3ebSWei Hu return;
1574a560f3ebSWei Hu
1575a560f3ebSWei Hu /*
1576a560f3ebSWei Hu * There should be a bound socket already created no matter
1577a560f3ebSWei Hu * it is a passive or active connection.
1578a560f3ebSWei Hu * For host initiated connection (passive on guest side),
1579a560f3ebSWei Hu * the type_guid contains the port which guest is bound and
1580a560f3ebSWei Hu * listening.
1581a560f3ebSWei Hu * For the guest initiated connection (active on guest side),
1582a560f3ebSWei Hu * the inst_guid contains the port that guest has auto bound
1583a560f3ebSWei Hu * to.
1584a560f3ebSWei Hu */
1585a560f3ebSWei Hu hvs_addr_init(&addr, conn_from_host ? type_guid : inst_guid);
1586a560f3ebSWei Hu so = hvs_find_socket_on_list(&addr, HVS_LIST_BOUND);
1587a560f3ebSWei Hu if (!so) {
1588a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_ERR,
1589a560f3ebSWei Hu "%s: no bound socket found for port %u\n",
1590a560f3ebSWei Hu __func__, addr.hvs_port);
1591a560f3ebSWei Hu return;
1592a560f3ebSWei Hu }
1593a560f3ebSWei Hu
1594a560f3ebSWei Hu if (conn_from_host) {
1595a560f3ebSWei Hu hvsock_open_conn_passive(chan, so, sc);
1596a560f3ebSWei Hu } else {
1597a560f3ebSWei Hu (void) hvs_trans_lock();
1598a560f3ebSWei Hu pcb = so->so_pcb;
1599a560f3ebSWei Hu if (pcb && pcb->so) {
1600a560f3ebSWei Hu sc->pcb = so2hvspcb(so);
1601a560f3ebSWei Hu hvsock_open_conn_active(chan, so);
1602a560f3ebSWei Hu } else {
1603a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
1604a560f3ebSWei Hu "%s: channel detached before open\n", __func__);
1605a560f3ebSWei Hu }
1606a560f3ebSWei Hu hvs_trans_unlock();
1607a560f3ebSWei Hu }
1608a560f3ebSWei Hu
1609a560f3ebSWei Hu }
1610a560f3ebSWei Hu
1611a560f3ebSWei Hu static int
hvsock_probe(device_t dev)1612a560f3ebSWei Hu hvsock_probe(device_t dev)
1613a560f3ebSWei Hu {
1614a560f3ebSWei Hu struct vmbus_channel *channel = vmbus_get_channel(dev);
1615a560f3ebSWei Hu
1616a560f3ebSWei Hu if (!channel || !vmbus_chan_is_hvs(channel)) {
1617a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
1618a560f3ebSWei Hu "hvsock_probe called but not a hvsock channel id %u\n",
1619a560f3ebSWei Hu vmbus_chan_id(channel));
1620a560f3ebSWei Hu
1621a560f3ebSWei Hu return ENXIO;
1622a560f3ebSWei Hu } else {
1623a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
1624a560f3ebSWei Hu "hvsock_probe got a hvsock channel id %u\n",
1625a560f3ebSWei Hu vmbus_chan_id(channel));
1626a560f3ebSWei Hu
1627a560f3ebSWei Hu return BUS_PROBE_DEFAULT;
1628a560f3ebSWei Hu }
1629a560f3ebSWei Hu }
1630a560f3ebSWei Hu
1631a560f3ebSWei Hu static int
hvsock_attach(device_t dev)1632a560f3ebSWei Hu hvsock_attach(device_t dev)
1633a560f3ebSWei Hu {
1634a560f3ebSWei Hu struct vmbus_channel *channel = vmbus_get_channel(dev);
1635a560f3ebSWei Hu struct hvsock_sc *sc = (struct hvsock_sc *)device_get_softc(dev);
1636a560f3ebSWei Hu
1637a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "hvsock_attach called.\n");
1638a560f3ebSWei Hu
1639a560f3ebSWei Hu hvsock_open_connection(channel, sc);
1640a560f3ebSWei Hu
1641a560f3ebSWei Hu /*
1642a560f3ebSWei Hu * Always return success. On error the host will rescind the device
1643a560f3ebSWei Hu * in 30 seconds and we can do cleanup at that time in
1644a560f3ebSWei Hu * vmbus_chan_msgproc_chrescind().
1645a560f3ebSWei Hu */
1646a560f3ebSWei Hu return (0);
1647a560f3ebSWei Hu }
1648a560f3ebSWei Hu
1649a560f3ebSWei Hu static int
hvsock_detach(device_t dev)1650a560f3ebSWei Hu hvsock_detach(device_t dev)
1651a560f3ebSWei Hu {
1652a560f3ebSWei Hu struct hvsock_sc *sc = (struct hvsock_sc *)device_get_softc(dev);
1653a560f3ebSWei Hu struct socket *so;
1654f94acf52SMark Johnston int retry;
1655a560f3ebSWei Hu
1656a560f3ebSWei Hu if (bootverbose)
1657a560f3ebSWei Hu device_printf(dev, "hvsock_detach called.\n");
1658a560f3ebSWei Hu
1659a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "hvsock_detach called.\n");
1660a560f3ebSWei Hu
1661a560f3ebSWei Hu if (sc->pcb != NULL) {
1662a560f3ebSWei Hu (void) hvs_trans_lock();
1663a560f3ebSWei Hu
1664a560f3ebSWei Hu so = hsvpcb2so(sc->pcb);
1665a560f3ebSWei Hu if (so) {
1666a560f3ebSWei Hu /* Close the connection */
1667a560f3ebSWei Hu if (so->so_state &
1668a560f3ebSWei Hu (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING))
1669a560f3ebSWei Hu soisdisconnected(so);
1670a560f3ebSWei Hu }
1671a560f3ebSWei Hu
1672a560f3ebSWei Hu mtx_lock(&hvs_trans_socks_mtx);
1673a560f3ebSWei Hu __hvs_remove_pcb_from_list(sc->pcb,
1674a560f3ebSWei Hu HVS_LIST_BOUND | HVS_LIST_CONNECTED);
1675a560f3ebSWei Hu mtx_unlock(&hvs_trans_socks_mtx);
1676a560f3ebSWei Hu
1677a560f3ebSWei Hu /*
1678a560f3ebSWei Hu * Close channel while no reader and sender are working
1679a560f3ebSWei Hu * on the buffer rings.
1680a560f3ebSWei Hu */
1681a560f3ebSWei Hu if (so) {
1682a560f3ebSWei Hu retry = 0;
1683f94acf52SMark Johnston while (SOCK_IO_RECV_LOCK(so, 0) == EWOULDBLOCK) {
1684a560f3ebSWei Hu /*
1685a560f3ebSWei Hu * Someone is reading, rx br is busy
1686a560f3ebSWei Hu */
1687a560f3ebSWei Hu soisdisconnected(so);
1688a560f3ebSWei Hu DELAY(500);
1689a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
1690a560f3ebSWei Hu "waiting for rx reader to exit, "
1691a560f3ebSWei Hu "retry = %d\n", retry++);
1692a560f3ebSWei Hu }
1693a560f3ebSWei Hu retry = 0;
1694f94acf52SMark Johnston while (SOCK_IO_SEND_LOCK(so, 0) == EWOULDBLOCK) {
1695a560f3ebSWei Hu /*
1696a560f3ebSWei Hu * Someone is sending, tx br is busy
1697a560f3ebSWei Hu */
1698a560f3ebSWei Hu soisdisconnected(so);
1699a560f3ebSWei Hu DELAY(500);
1700a560f3ebSWei Hu HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
1701a560f3ebSWei Hu "waiting for tx sender to exit, "
1702a560f3ebSWei Hu "retry = %d\n", retry++);
1703a560f3ebSWei Hu }
1704a560f3ebSWei Hu }
1705a560f3ebSWei Hu
1706a560f3ebSWei Hu
1707a560f3ebSWei Hu bzero(sc->pcb, sizeof(struct hvs_pcb));
1708a560f3ebSWei Hu free(sc->pcb, M_HVSOCK);
1709a560f3ebSWei Hu sc->pcb = NULL;
1710a560f3ebSWei Hu
1711a560f3ebSWei Hu if (so) {
1712f94acf52SMark Johnston SOCK_IO_RECV_UNLOCK(so);
1713f94acf52SMark Johnston SOCK_IO_SEND_UNLOCK(so);
1714a560f3ebSWei Hu so->so_pcb = NULL;
1715a560f3ebSWei Hu }
1716a560f3ebSWei Hu
1717a560f3ebSWei Hu hvs_trans_unlock();
1718a560f3ebSWei Hu }
1719a560f3ebSWei Hu
1720a560f3ebSWei Hu vmbus_chan_close(vmbus_get_channel(dev));
1721a560f3ebSWei Hu
1722a560f3ebSWei Hu return (0);
1723a560f3ebSWei Hu }
1724a560f3ebSWei Hu
1725a560f3ebSWei Hu static device_method_t hvsock_methods[] = {
1726a560f3ebSWei Hu /* Device interface */
1727a560f3ebSWei Hu DEVMETHOD(device_probe, hvsock_probe),
1728a560f3ebSWei Hu DEVMETHOD(device_attach, hvsock_attach),
1729a560f3ebSWei Hu DEVMETHOD(device_detach, hvsock_detach),
1730a560f3ebSWei Hu DEVMETHOD_END
1731a560f3ebSWei Hu };
1732a560f3ebSWei Hu
1733a560f3ebSWei Hu static driver_t hvsock_driver = {
1734a560f3ebSWei Hu "hv_sock",
1735a560f3ebSWei Hu hvsock_methods,
1736a560f3ebSWei Hu sizeof(struct hvsock_sc)
1737a560f3ebSWei Hu };
1738a560f3ebSWei Hu
1739c1cef544SJohn Baldwin DRIVER_MODULE(hvsock, vmbus, hvsock_driver, NULL, NULL);
1740a560f3ebSWei Hu MODULE_VERSION(hvsock, 1);
1741a560f3ebSWei Hu MODULE_DEPEND(hvsock, vmbus, 1, 1, 1);
1742