1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2020 Microsoft Corp. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 #include <sys/param.h> 31 #include <sys/bus.h> 32 #include <sys/domain.h> 33 #include <sys/lock.h> 34 #include <sys/kernel.h> 35 #include <sys/types.h> 36 #include <sys/malloc.h> 37 #include <sys/module.h> 38 #include <sys/mutex.h> 39 #include <sys/proc.h> 40 #include <sys/protosw.h> 41 #include <sys/socket.h> 42 #include <sys/sysctl.h> 43 #include <sys/sysproto.h> 44 #include <sys/systm.h> 45 #include <sys/sockbuf.h> 46 #include <sys/sx.h> 47 #include <sys/uio.h> 48 49 #include <net/vnet.h> 50 51 #include <dev/hyperv/vmbus/vmbus_reg.h> 52 53 #include "hv_sock.h" 54 55 #define HVSOCK_DBG_NONE 0x0 56 #define HVSOCK_DBG_INFO 0x1 57 #define HVSOCK_DBG_ERR 0x2 58 #define HVSOCK_DBG_VERBOSE 0x3 59 60 61 SYSCTL_NODE(_net, OID_AUTO, hvsock, CTLFLAG_RD, 0, "HyperV socket"); 62 63 static int hvs_dbg_level; 64 SYSCTL_INT(_net_hvsock, OID_AUTO, hvs_dbg_level, CTLFLAG_RWTUN, &hvs_dbg_level, 65 0, "hyperv socket debug level: 0 = none, 1 = info, 2 = error, 3 = verbose"); 66 67 68 #define HVSOCK_DBG(level, ...) do { \ 69 if (hvs_dbg_level >= (level)) \ 70 printf(__VA_ARGS__); \ 71 } while (0) 72 73 MALLOC_DEFINE(M_HVSOCK, "hyperv_socket", "hyperv socket control structures"); 74 75 static int hvs_dom_probe(void); 76 77 /* The MTU is 16KB per host side's design */ 78 #define HVSOCK_MTU_SIZE (1024 * 16) 79 #define HVSOCK_SEND_BUF_SZ (PAGE_SIZE - sizeof(struct vmpipe_proto_header)) 80 81 #define HVSOCK_HEADER_LEN (sizeof(struct hvs_pkt_header)) 82 83 #define HVSOCK_PKT_LEN(payload_len) (HVSOCK_HEADER_LEN + \ 84 roundup2(payload_len, 8) + \ 85 sizeof(uint64_t)) 86 87 /* 88 * HyperV Transport sockets 89 */ 90 static struct protosw hv_socket_protosw = { 91 .pr_type = SOCK_STREAM, 92 .pr_protocol = HYPERV_SOCK_PROTO_TRANS, 93 .pr_flags = PR_CONNREQUIRED, 94 .pr_attach = hvs_trans_attach, 95 .pr_bind = hvs_trans_bind, 96 .pr_listen = hvs_trans_listen, 97 .pr_accept = hvs_trans_accept, 98 .pr_connect = hvs_trans_connect, 99 .pr_peeraddr = hvs_trans_peeraddr, 100 .pr_sockaddr = hvs_trans_sockaddr, 101 .pr_soreceive = hvs_trans_soreceive, 102 .pr_sosend = hvs_trans_sosend, 103 .pr_disconnect = hvs_trans_disconnect, 104 .pr_close = hvs_trans_close, 105 .pr_detach = hvs_trans_detach, 106 .pr_shutdown = hvs_trans_shutdown, 107 .pr_abort = hvs_trans_abort, 108 }; 109 110 static struct domain hv_socket_domain = { 111 .dom_family = AF_HYPERV, 112 .dom_name = "hyperv", 113 .dom_probe = hvs_dom_probe, 114 .dom_nprotosw = 1, 115 .dom_protosw = { &hv_socket_protosw }, 116 }; 117 118 DOMAIN_SET(hv_socket_); 119 120 #define MAX_PORT ((uint32_t)0xFFFFFFFF) 121 #define MIN_PORT ((uint32_t)0x0) 122 123 /* 00000000-facb-11e6-bd58-64006a7986d3 */ 124 static const struct hyperv_guid srv_id_template = { 125 .hv_guid = { 126 0x00, 0x00, 0x00, 0x00, 0xcb, 0xfa, 0xe6, 0x11, 127 0xbd, 0x58, 0x64, 0x00, 0x6a, 0x79, 0x86, 0xd3 } 128 }; 129 130 static int hvsock_br_callback(void *, int, void *); 131 static uint32_t hvsock_canread_check(struct hvs_pcb *); 132 static uint32_t hvsock_canwrite_check(struct hvs_pcb *); 133 static int hvsock_send_data(struct vmbus_channel *chan, 134 struct uio *uio, uint32_t to_write, struct sockbuf *sb); 135 136 137 138 /* Globals */ 139 static struct sx hvs_trans_socks_sx; 140 static struct mtx hvs_trans_socks_mtx; 141 static LIST_HEAD(, hvs_pcb) hvs_trans_bound_socks; 142 static LIST_HEAD(, hvs_pcb) hvs_trans_connected_socks; 143 static uint32_t previous_auto_bound_port; 144 145 static void 146 hvsock_print_guid(struct hyperv_guid *guid) 147 { 148 unsigned char *p = (unsigned char *)guid; 149 150 HVSOCK_DBG(HVSOCK_DBG_INFO, 151 "0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x\n", 152 *(unsigned int *)p, 153 *((unsigned short *) &p[4]), 154 *((unsigned short *) &p[6]), 155 p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]); 156 } 157 158 static bool 159 is_valid_srv_id(const struct hyperv_guid *id) 160 { 161 return !memcmp(&id->hv_guid[4], 162 &srv_id_template.hv_guid[4], sizeof(struct hyperv_guid) - 4); 163 } 164 165 static unsigned int 166 get_port_by_srv_id(const struct hyperv_guid *srv_id) 167 { 168 return *((const unsigned int *)srv_id); 169 } 170 171 static void 172 set_port_by_srv_id(struct hyperv_guid *srv_id, unsigned int port) 173 { 174 *((unsigned int *)srv_id) = port; 175 } 176 177 178 static void 179 __hvs_remove_pcb_from_list(struct hvs_pcb *pcb, unsigned char list) 180 { 181 struct hvs_pcb *p = NULL; 182 183 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: pcb is %p\n", __func__, pcb); 184 185 if (!pcb) 186 return; 187 188 if (list & HVS_LIST_BOUND) { 189 LIST_FOREACH(p, &hvs_trans_bound_socks, bound_next) 190 if (p == pcb) 191 LIST_REMOVE(p, bound_next); 192 } 193 194 if (list & HVS_LIST_CONNECTED) { 195 LIST_FOREACH(p, &hvs_trans_connected_socks, connected_next) 196 if (p == pcb) 197 LIST_REMOVE(pcb, connected_next); 198 } 199 } 200 201 static void 202 __hvs_remove_socket_from_list(struct socket *so, unsigned char list) 203 { 204 struct hvs_pcb *pcb = so2hvspcb(so); 205 206 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: pcb is %p\n", __func__, pcb); 207 208 __hvs_remove_pcb_from_list(pcb, list); 209 } 210 211 static void 212 __hvs_insert_socket_on_list(struct socket *so, unsigned char list) 213 { 214 struct hvs_pcb *pcb = so2hvspcb(so); 215 216 if (list & HVS_LIST_BOUND) 217 LIST_INSERT_HEAD(&hvs_trans_bound_socks, 218 pcb, bound_next); 219 220 if (list & HVS_LIST_CONNECTED) 221 LIST_INSERT_HEAD(&hvs_trans_connected_socks, 222 pcb, connected_next); 223 } 224 225 void 226 hvs_remove_socket_from_list(struct socket *so, unsigned char list) 227 { 228 if (!so || !so->so_pcb) { 229 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, 230 "%s: socket or so_pcb is null\n", __func__); 231 return; 232 } 233 234 mtx_lock(&hvs_trans_socks_mtx); 235 __hvs_remove_socket_from_list(so, list); 236 mtx_unlock(&hvs_trans_socks_mtx); 237 } 238 239 static void 240 hvs_insert_socket_on_list(struct socket *so, unsigned char list) 241 { 242 if (!so || !so->so_pcb) { 243 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, 244 "%s: socket or so_pcb is null\n", __func__); 245 return; 246 } 247 248 mtx_lock(&hvs_trans_socks_mtx); 249 __hvs_insert_socket_on_list(so, list); 250 mtx_unlock(&hvs_trans_socks_mtx); 251 } 252 253 static struct socket * 254 __hvs_find_socket_on_list(struct sockaddr_hvs *addr, unsigned char list) 255 { 256 struct hvs_pcb *p = NULL; 257 258 if (list & HVS_LIST_BOUND) 259 LIST_FOREACH(p, &hvs_trans_bound_socks, bound_next) 260 if (p->so != NULL && 261 addr->hvs_port == p->local_addr.hvs_port) 262 return p->so; 263 264 if (list & HVS_LIST_CONNECTED) 265 LIST_FOREACH(p, &hvs_trans_connected_socks, connected_next) 266 if (p->so != NULL && 267 addr->hvs_port == p->local_addr.hvs_port) 268 return p->so; 269 270 return NULL; 271 } 272 273 static struct socket * 274 hvs_find_socket_on_list(struct sockaddr_hvs *addr, unsigned char list) 275 { 276 struct socket *s = NULL; 277 278 mtx_lock(&hvs_trans_socks_mtx); 279 s = __hvs_find_socket_on_list(addr, list); 280 mtx_unlock(&hvs_trans_socks_mtx); 281 282 return s; 283 } 284 285 static inline void 286 hvs_addr_set(struct sockaddr_hvs *addr, unsigned int port) 287 { 288 memset(addr, 0, sizeof(*addr)); 289 addr->sa_family = AF_HYPERV; 290 addr->sa_len = sizeof(*addr); 291 addr->hvs_port = port; 292 } 293 294 void 295 hvs_addr_init(struct sockaddr_hvs *addr, const struct hyperv_guid *svr_id) 296 { 297 hvs_addr_set(addr, get_port_by_srv_id(svr_id)); 298 } 299 300 int 301 hvs_trans_lock(void) 302 { 303 sx_xlock(&hvs_trans_socks_sx); 304 return (0); 305 } 306 307 void 308 hvs_trans_unlock(void) 309 { 310 sx_xunlock(&hvs_trans_socks_sx); 311 } 312 313 static int 314 hvs_dom_probe(void) 315 { 316 317 /* Don't even give us a chance to attach on non-HyperV. */ 318 if (vm_guest != VM_GUEST_HV) 319 return (ENXIO); 320 return (0); 321 } 322 323 static void 324 hvs_trans_init(void *arg __unused) 325 { 326 327 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, 328 "%s: HyperV Socket hvs_trans_init called\n", __func__); 329 330 /* Initialize Globals */ 331 previous_auto_bound_port = MAX_PORT; 332 sx_init(&hvs_trans_socks_sx, "hvs_trans_sock_sx"); 333 mtx_init(&hvs_trans_socks_mtx, 334 "hvs_trans_socks_mtx", NULL, MTX_DEF); 335 LIST_INIT(&hvs_trans_bound_socks); 336 LIST_INIT(&hvs_trans_connected_socks); 337 } 338 SYSINIT(hvs_trans_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, 339 hvs_trans_init, NULL); 340 341 /* 342 * Called in two cases: 343 * 1) When user calls socket(); 344 * 2) When we accept new incoming conneciton and call sonewconn(). 345 */ 346 int 347 hvs_trans_attach(struct socket *so, int proto, struct thread *td) 348 { 349 struct hvs_pcb *pcb = so2hvspcb(so); 350 351 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, 352 "%s: HyperV Socket hvs_trans_attach called\n", __func__); 353 354 if (so->so_type != SOCK_STREAM) 355 return (ESOCKTNOSUPPORT); 356 357 if (proto != 0 && proto != HYPERV_SOCK_PROTO_TRANS) 358 return (EPROTONOSUPPORT); 359 360 if (pcb != NULL) 361 return (EISCONN); 362 pcb = malloc(sizeof(struct hvs_pcb), M_HVSOCK, M_NOWAIT | M_ZERO); 363 if (pcb == NULL) 364 return (ENOMEM); 365 366 pcb->so = so; 367 so->so_pcb = (void *)pcb; 368 369 return (0); 370 } 371 372 void 373 hvs_trans_detach(struct socket *so) 374 { 375 struct hvs_pcb *pcb; 376 377 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, 378 "%s: HyperV Socket hvs_trans_detach called\n", __func__); 379 380 (void) hvs_trans_lock(); 381 pcb = so2hvspcb(so); 382 if (pcb == NULL) { 383 hvs_trans_unlock(); 384 return; 385 } 386 387 if (SOLISTENING(so)) { 388 bzero(pcb, sizeof(*pcb)); 389 free(pcb, M_HVSOCK); 390 } 391 392 so->so_pcb = NULL; 393 394 hvs_trans_unlock(); 395 } 396 397 int 398 hvs_trans_bind(struct socket *so, struct sockaddr *addr, struct thread *td) 399 { 400 struct hvs_pcb *pcb = so2hvspcb(so); 401 struct sockaddr_hvs *sa = (struct sockaddr_hvs *) addr; 402 int error = 0; 403 404 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, 405 "%s: HyperV Socket hvs_trans_bind called\n", __func__); 406 407 if (sa == NULL) { 408 return (EINVAL); 409 } 410 411 if (pcb == NULL) { 412 return (EINVAL); 413 } 414 415 if (sa->sa_family != AF_HYPERV) { 416 HVSOCK_DBG(HVSOCK_DBG_ERR, 417 "%s: Not supported, sa_family is %u\n", 418 __func__, sa->sa_family); 419 return (EAFNOSUPPORT); 420 } 421 if (sa->sa_len != sizeof(*sa)) { 422 HVSOCK_DBG(HVSOCK_DBG_ERR, 423 "%s: Not supported, sa_len is %u\n", 424 __func__, sa->sa_len); 425 return (EINVAL); 426 } 427 428 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, 429 "%s: binding port = 0x%x\n", __func__, sa->hvs_port); 430 431 mtx_lock(&hvs_trans_socks_mtx); 432 if (__hvs_find_socket_on_list(sa, 433 HVS_LIST_BOUND | HVS_LIST_CONNECTED)) { 434 error = EADDRINUSE; 435 } else { 436 /* 437 * The address is available for us to bind. 438 * Add socket to the bound list. 439 */ 440 hvs_addr_set(&pcb->local_addr, sa->hvs_port); 441 hvs_addr_set(&pcb->remote_addr, HVADDR_PORT_ANY); 442 __hvs_insert_socket_on_list(so, HVS_LIST_BOUND); 443 } 444 mtx_unlock(&hvs_trans_socks_mtx); 445 446 return (error); 447 } 448 449 int 450 hvs_trans_listen(struct socket *so, int backlog, struct thread *td) 451 { 452 struct hvs_pcb *pcb = so2hvspcb(so); 453 struct socket *bound_so; 454 int error; 455 456 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, 457 "%s: HyperV Socket hvs_trans_listen called\n", __func__); 458 459 if (pcb == NULL) 460 return (EINVAL); 461 462 /* Check if the address is already bound and it was by us. */ 463 bound_so = hvs_find_socket_on_list(&pcb->local_addr, HVS_LIST_BOUND); 464 if (bound_so == NULL || bound_so != so) { 465 HVSOCK_DBG(HVSOCK_DBG_ERR, 466 "%s: Address not bound or not by us.\n", __func__); 467 return (EADDRNOTAVAIL); 468 } 469 470 SOCK_LOCK(so); 471 error = solisten_proto_check(so); 472 if (error == 0) 473 solisten_proto(so, backlog); 474 SOCK_UNLOCK(so); 475 476 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, 477 "%s: HyperV Socket listen error = %d\n", __func__, error); 478 return (error); 479 } 480 481 int 482 hvs_trans_accept(struct socket *so, struct sockaddr **nam) 483 { 484 struct hvs_pcb *pcb = so2hvspcb(so); 485 486 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, 487 "%s: HyperV Socket hvs_trans_accept called\n", __func__); 488 489 if (pcb == NULL) 490 return (EINVAL); 491 492 *nam = sodupsockaddr((struct sockaddr *) &pcb->remote_addr, 493 M_NOWAIT); 494 495 return ((*nam == NULL) ? ENOMEM : 0); 496 } 497 498 int 499 hvs_trans_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 500 { 501 struct hvs_pcb *pcb = so2hvspcb(so); 502 struct sockaddr_hvs *raddr = (struct sockaddr_hvs *)nam; 503 bool found_auto_bound_port = false; 504 int i, error = 0; 505 506 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, 507 "%s: HyperV Socket hvs_trans_connect called, remote port is %x\n", 508 __func__, raddr->hvs_port); 509 510 if (pcb == NULL) 511 return (EINVAL); 512 513 /* Verify the remote address */ 514 if (raddr == NULL) 515 return (EINVAL); 516 if (raddr->sa_family != AF_HYPERV) 517 return (EAFNOSUPPORT); 518 if (raddr->sa_len != sizeof(*raddr)) 519 return (EINVAL); 520 521 mtx_lock(&hvs_trans_socks_mtx); 522 if (so->so_state & 523 (SS_ISCONNECTED|SS_ISDISCONNECTING|SS_ISCONNECTING)) { 524 HVSOCK_DBG(HVSOCK_DBG_ERR, 525 "%s: socket connect in progress\n", 526 __func__); 527 error = EINPROGRESS; 528 goto out; 529 } 530 531 /* 532 * Find an available port for us to auto bind the local 533 * address. 534 */ 535 hvs_addr_set(&pcb->local_addr, 0); 536 537 for (i = previous_auto_bound_port - 1; 538 i != previous_auto_bound_port; i --) { 539 if (i == MIN_PORT) 540 i = MAX_PORT; 541 542 pcb->local_addr.hvs_port = i; 543 544 if (__hvs_find_socket_on_list(&pcb->local_addr, 545 HVS_LIST_BOUND | HVS_LIST_CONNECTED) == NULL) { 546 found_auto_bound_port = true; 547 previous_auto_bound_port = i; 548 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, 549 "%s: found local bound port is %x\n", 550 __func__, pcb->local_addr.hvs_port); 551 break; 552 } 553 } 554 555 if (found_auto_bound_port == true) { 556 /* Found available port for auto bound, put on list */ 557 __hvs_insert_socket_on_list(so, HVS_LIST_BOUND); 558 /* Set VM service ID */ 559 pcb->vm_srv_id = srv_id_template; 560 set_port_by_srv_id(&pcb->vm_srv_id, pcb->local_addr.hvs_port); 561 /* Set host service ID and remote port */ 562 pcb->host_srv_id = srv_id_template; 563 set_port_by_srv_id(&pcb->host_srv_id, raddr->hvs_port); 564 hvs_addr_set(&pcb->remote_addr, raddr->hvs_port); 565 566 /* Change the socket state to SS_ISCONNECTING */ 567 soisconnecting(so); 568 } else { 569 HVSOCK_DBG(HVSOCK_DBG_ERR, 570 "%s: No local port available for auto bound\n", 571 __func__); 572 error = EADDRINUSE; 573 } 574 575 HVSOCK_DBG(HVSOCK_DBG_INFO, "Connect vm_srv_id is "); 576 hvsock_print_guid(&pcb->vm_srv_id); 577 HVSOCK_DBG(HVSOCK_DBG_INFO, "Connect host_srv_id is "); 578 hvsock_print_guid(&pcb->host_srv_id); 579 580 out: 581 mtx_unlock(&hvs_trans_socks_mtx); 582 583 if (found_auto_bound_port == true) 584 vmbus_req_tl_connect(&pcb->vm_srv_id, &pcb->host_srv_id); 585 586 return (error); 587 } 588 589 int 590 hvs_trans_disconnect(struct socket *so) 591 { 592 struct hvs_pcb *pcb; 593 594 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, 595 "%s: HyperV Socket hvs_trans_disconnect called\n", __func__); 596 597 (void) hvs_trans_lock(); 598 pcb = so2hvspcb(so); 599 if (pcb == NULL) { 600 hvs_trans_unlock(); 601 return (EINVAL); 602 } 603 604 /* If socket is already disconnected, skip this */ 605 if ((so->so_state & SS_ISDISCONNECTED) == 0) 606 soisdisconnecting(so); 607 608 hvs_trans_unlock(); 609 610 return (0); 611 } 612 613 struct hvs_callback_arg { 614 struct uio *uio; 615 struct sockbuf *sb; 616 }; 617 618 int 619 hvs_trans_soreceive(struct socket *so, struct sockaddr **paddr, 620 struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp) 621 { 622 struct hvs_pcb *pcb = so2hvspcb(so); 623 struct sockbuf *sb; 624 ssize_t orig_resid; 625 uint32_t canread, to_read; 626 int flags, error = 0; 627 struct hvs_callback_arg cbarg; 628 629 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, 630 "%s: HyperV Socket hvs_trans_soreceive called\n", __func__); 631 632 if (so->so_type != SOCK_STREAM) 633 return (EINVAL); 634 if (pcb == NULL) 635 return (EINVAL); 636 637 if (flagsp != NULL) 638 flags = *flagsp &~ MSG_EOR; 639 else 640 flags = 0; 641 642 if (flags & MSG_PEEK) 643 return (EOPNOTSUPP); 644 645 /* If no space to copy out anything */ 646 if (uio->uio_resid == 0 || uio->uio_rw != UIO_READ) 647 return (EINVAL); 648 649 orig_resid = uio->uio_resid; 650 651 /* Prevent other readers from entering the socket. */ 652 error = SOCK_IO_RECV_LOCK(so, SBLOCKWAIT(flags)); 653 if (error) { 654 HVSOCK_DBG(HVSOCK_DBG_ERR, 655 "%s: soiolock returned error = %d\n", __func__, error); 656 return (error); 657 } 658 659 sb = &so->so_rcv; 660 SOCKBUF_LOCK(sb); 661 662 cbarg.uio = uio; 663 cbarg.sb = sb; 664 /* 665 * If the socket is closing, there might still be some data 666 * in rx br to read. However we need to make sure 667 * the channel is still open. 668 */ 669 if ((sb->sb_state & SBS_CANTRCVMORE) && 670 (so->so_state & SS_ISDISCONNECTED)) { 671 /* Other thread already closed the channel */ 672 error = EPIPE; 673 goto out; 674 } 675 676 while (true) { 677 while (uio->uio_resid > 0 && 678 (canread = hvsock_canread_check(pcb)) > 0) { 679 to_read = MIN(canread, uio->uio_resid); 680 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, 681 "%s: to_read = %u, skip = %u\n", __func__, to_read, 682 (unsigned int)(sizeof(struct hvs_pkt_header) + 683 pcb->recv_data_off)); 684 685 error = vmbus_chan_recv_peek_call(pcb->chan, to_read, 686 sizeof(struct hvs_pkt_header) + pcb->recv_data_off, 687 hvsock_br_callback, (void *)&cbarg); 688 /* 689 * It is possible socket is disconnected becasue 690 * we released lock in hvsock_br_callback. So we 691 * need to check the state to make sure it is not 692 * disconnected. 693 */ 694 if (error || so->so_state & SS_ISDISCONNECTED) { 695 break; 696 } 697 698 pcb->recv_data_len -= to_read; 699 pcb->recv_data_off += to_read; 700 } 701 702 if (error) 703 break; 704 705 /* Abort if socket has reported problems. */ 706 if (so->so_error) { 707 if (so->so_error == ESHUTDOWN && 708 orig_resid > uio->uio_resid) { 709 /* 710 * Although we got a FIN, we also received 711 * some data in this round. Delivery it 712 * to user. 713 */ 714 error = 0; 715 } else { 716 if (so->so_error != ESHUTDOWN) 717 error = so->so_error; 718 } 719 720 break; 721 } 722 723 /* Cannot received more. */ 724 if (sb->sb_state & SBS_CANTRCVMORE) 725 break; 726 727 /* We are done if buffer has been filled */ 728 if (uio->uio_resid == 0) 729 break; 730 731 if (!(flags & MSG_WAITALL) && orig_resid > uio->uio_resid) 732 break; 733 734 /* Buffer ring is empty and we shall not block */ 735 if ((so->so_state & SS_NBIO) || 736 (flags & (MSG_DONTWAIT|MSG_NBIO))) { 737 if (orig_resid == uio->uio_resid) { 738 /* We have not read anything */ 739 error = EAGAIN; 740 } 741 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, 742 "%s: non blocked read return, error %d.\n", 743 __func__, error); 744 break; 745 } 746 747 /* 748 * Wait and block until (more) data comes in. 749 * Note: Drops the sockbuf lock during wait. 750 */ 751 error = sbwait(so, SO_RCV); 752 753 if (error) 754 break; 755 756 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, 757 "%s: wake up from sbwait, read available is %u\n", 758 __func__, vmbus_chan_read_available(pcb->chan)); 759 } 760 761 out: 762 SOCKBUF_UNLOCK(sb); 763 SOCK_IO_RECV_UNLOCK(so); 764 765 /* We recieved a FIN in this call */ 766 if (so->so_error == ESHUTDOWN) { 767 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 768 /* Send has already closed */ 769 soisdisconnecting(so); 770 } else { 771 /* Just close the receive side */ 772 socantrcvmore(so); 773 } 774 } 775 776 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, 777 "%s: returning error = %d, so_error = %d\n", 778 __func__, error, so->so_error); 779 780 return (error); 781 } 782 783 int 784 hvs_trans_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, 785 struct mbuf *top, struct mbuf *controlp, int flags, struct thread *td) 786 { 787 struct hvs_pcb *pcb = so2hvspcb(so); 788 struct sockbuf *sb; 789 ssize_t orig_resid; 790 uint32_t canwrite, to_write; 791 int error = 0; 792 793 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, 794 "%s: HyperV Socket hvs_trans_sosend called, uio_resid = %zd\n", 795 __func__, uio->uio_resid); 796 797 if (so->so_type != SOCK_STREAM) 798 return (EINVAL); 799 if (pcb == NULL) 800 return (EINVAL); 801 802 /* If nothing to send */ 803 if (uio->uio_resid == 0 || uio->uio_rw != UIO_WRITE) 804 return (EINVAL); 805 806 orig_resid = uio->uio_resid; 807 808 /* Prevent other writers from entering the socket. */ 809 error = SOCK_IO_SEND_LOCK(so, SBLOCKWAIT(flags)); 810 if (error) { 811 HVSOCK_DBG(HVSOCK_DBG_ERR, 812 "%s: soiolocak returned error = %d\n", __func__, error); 813 return (error); 814 } 815 816 sb = &so->so_snd; 817 SOCKBUF_LOCK(sb); 818 819 if ((sb->sb_state & SBS_CANTSENDMORE) || 820 so->so_error == ESHUTDOWN) { 821 error = EPIPE; 822 goto out; 823 } 824 825 while (uio->uio_resid > 0) { 826 canwrite = hvsock_canwrite_check(pcb); 827 if (canwrite == 0) { 828 /* We have sent some data */ 829 if (orig_resid > uio->uio_resid) 830 break; 831 /* 832 * We have not sent any data and it is 833 * non-blocked io 834 */ 835 if (so->so_state & SS_NBIO || 836 (flags & (MSG_NBIO | MSG_DONTWAIT)) != 0) { 837 error = EWOULDBLOCK; 838 break; 839 } else { 840 /* 841 * We are here because there is no space on 842 * send buffer ring. Signal the other side 843 * to read and free more space. 844 * Sleep wait until space avaiable to send 845 * Note: Drops the sockbuf lock during wait. 846 */ 847 error = sbwait(so, SO_SND); 848 849 if (error) 850 break; 851 852 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, 853 "%s: wake up from sbwait, space avail on " 854 "tx ring is %u\n", 855 __func__, 856 vmbus_chan_write_available(pcb->chan)); 857 858 continue; 859 } 860 } 861 to_write = MIN(canwrite, uio->uio_resid); 862 to_write = MIN(to_write, HVSOCK_SEND_BUF_SZ); 863 864 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, 865 "%s: canwrite is %u, to_write = %u\n", __func__, 866 canwrite, to_write); 867 error = hvsock_send_data(pcb->chan, uio, to_write, sb); 868 869 if (error) 870 break; 871 } 872 873 out: 874 SOCKBUF_UNLOCK(sb); 875 SOCK_IO_SEND_UNLOCK(so); 876 877 return (error); 878 } 879 880 int 881 hvs_trans_peeraddr(struct socket *so, struct sockaddr **nam) 882 { 883 struct hvs_pcb *pcb = so2hvspcb(so); 884 885 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, 886 "%s: HyperV Socket hvs_trans_peeraddr called\n", __func__); 887 888 if (pcb == NULL) 889 return (EINVAL); 890 891 *nam = sodupsockaddr((struct sockaddr *) &pcb->remote_addr, M_NOWAIT); 892 893 return ((*nam == NULL)? ENOMEM : 0); 894 } 895 896 int 897 hvs_trans_sockaddr(struct socket *so, struct sockaddr **nam) 898 { 899 struct hvs_pcb *pcb = so2hvspcb(so); 900 901 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, 902 "%s: HyperV Socket hvs_trans_sockaddr called\n", __func__); 903 904 if (pcb == NULL) 905 return (EINVAL); 906 907 *nam = sodupsockaddr((struct sockaddr *) &pcb->local_addr, M_NOWAIT); 908 909 return ((*nam == NULL)? ENOMEM : 0); 910 } 911 912 void 913 hvs_trans_close(struct socket *so) 914 { 915 struct hvs_pcb *pcb; 916 917 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, 918 "%s: HyperV Socket hvs_trans_close called\n", __func__); 919 920 (void) hvs_trans_lock(); 921 pcb = so2hvspcb(so); 922 if (!pcb) { 923 hvs_trans_unlock(); 924 return; 925 } 926 927 if (so->so_state & SS_ISCONNECTED) { 928 /* Send a FIN to peer */ 929 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, 930 "%s: hvs_trans_close sending a FIN to host\n", __func__); 931 (void) hvsock_send_data(pcb->chan, NULL, 0, NULL); 932 } 933 934 if (so->so_state & 935 (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING)) 936 soisdisconnected(so); 937 938 pcb->chan = NULL; 939 pcb->so = NULL; 940 941 if (SOLISTENING(so)) { 942 mtx_lock(&hvs_trans_socks_mtx); 943 /* Remove from bound list */ 944 __hvs_remove_socket_from_list(so, HVS_LIST_BOUND); 945 mtx_unlock(&hvs_trans_socks_mtx); 946 } 947 948 hvs_trans_unlock(); 949 950 return; 951 } 952 953 void 954 hvs_trans_abort(struct socket *so) 955 { 956 struct hvs_pcb *pcb = so2hvspcb(so); 957 958 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, 959 "%s: HyperV Socket hvs_trans_abort called\n", __func__); 960 961 (void) hvs_trans_lock(); 962 if (pcb == NULL) { 963 hvs_trans_unlock(); 964 return; 965 } 966 967 if (SOLISTENING(so)) { 968 mtx_lock(&hvs_trans_socks_mtx); 969 /* Remove from bound list */ 970 __hvs_remove_socket_from_list(so, HVS_LIST_BOUND); 971 mtx_unlock(&hvs_trans_socks_mtx); 972 } 973 974 if (so->so_state & SS_ISCONNECTED) { 975 (void) sodisconnect(so); 976 } 977 hvs_trans_unlock(); 978 979 return; 980 } 981 982 int 983 hvs_trans_shutdown(struct socket *so) 984 { 985 struct hvs_pcb *pcb = so2hvspcb(so); 986 struct sockbuf *sb; 987 988 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, 989 "%s: HyperV Socket hvs_trans_shutdown called\n", __func__); 990 991 if (pcb == NULL) 992 return (EINVAL); 993 994 /* 995 * Only get called with the shutdown method is SHUT_WR or 996 * SHUT_RDWR. 997 * When the method is SHUT_RD or SHUT_RDWR, the caller 998 * already set the SBS_CANTRCVMORE on receive side socket 999 * buffer. 1000 */ 1001 if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) == 0) { 1002 /* 1003 * SHUT_WR only case. 1004 * Receive side is still open. Just close 1005 * the send side. 1006 */ 1007 socantsendmore(so); 1008 } else { 1009 /* SHUT_RDWR case */ 1010 if (so->so_state & SS_ISCONNECTED) { 1011 /* Send a FIN to peer */ 1012 sb = &so->so_snd; 1013 SOCKBUF_LOCK(sb); 1014 (void) hvsock_send_data(pcb->chan, NULL, 0, sb); 1015 SOCKBUF_UNLOCK(sb); 1016 1017 soisdisconnecting(so); 1018 } 1019 } 1020 1021 return (0); 1022 } 1023 1024 /* In the VM, we support Hyper-V Sockets with AF_HYPERV, and the endpoint is 1025 * <port> (see struct sockaddr_hvs). 1026 * 1027 * On the host, Hyper-V Sockets are supported by Winsock AF_HYPERV: 1028 * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/user- 1029 * guide/make-integration-service, and the endpoint is <VmID, ServiceId> with 1030 * the below sockaddr: 1031 * 1032 * struct SOCKADDR_HV 1033 * { 1034 * ADDRESS_FAMILY Family; 1035 * USHORT Reserved; 1036 * GUID VmId; 1037 * GUID ServiceId; 1038 * }; 1039 * Note: VmID is not used by FreeBSD VM and actually it isn't transmitted via 1040 * VMBus, because here it's obvious the host and the VM can easily identify 1041 * each other. Though the VmID is useful on the host, especially in the case 1042 * of Windows container, FreeBSD VM doesn't need it at all. 1043 * 1044 * To be compatible with similar infrastructure in Linux VMs, we have 1045 * to limit the available GUID space of SOCKADDR_HV so that we can create 1046 * a mapping between FreeBSD AF_HYPERV port and SOCKADDR_HV Service GUID. 1047 * The rule of writing Hyper-V Sockets apps on the host and in FreeBSD VM is: 1048 * 1049 **************************************************************************** 1050 * The only valid Service GUIDs, from the perspectives of both the host and * 1051 * FreeBSD VM, that can be connected by the other end, must conform to this * 1052 * format: <port>-facb-11e6-bd58-64006a7986d3. * 1053 **************************************************************************** 1054 * 1055 * When we write apps on the host to connect(), the GUID ServiceID is used. 1056 * When we write apps in FreeBSD VM to connect(), we only need to specify the 1057 * port and the driver will form the GUID and use that to request the host. 1058 * 1059 * From the perspective of FreeBSD VM, the remote ephemeral port (i.e. the 1060 * auto-generated remote port for a connect request initiated by the host's 1061 * connect()) is set to HVADDR_PORT_UNKNOWN, which is not realy used on the 1062 * FreeBSD guest. 1063 */ 1064 1065 /* 1066 * Older HyperV hosts (vmbus version 'VMBUS_VERSION_WIN10' or before) 1067 * restricts HyperV socket ring buffer size to six 4K pages. Newer 1068 * HyperV hosts doen't have this limit. 1069 */ 1070 #define HVS_RINGBUF_RCV_SIZE (PAGE_SIZE * 6) 1071 #define HVS_RINGBUF_SND_SIZE (PAGE_SIZE * 6) 1072 #define HVS_RINGBUF_MAX_SIZE (PAGE_SIZE * 64) 1073 1074 struct hvsock_sc { 1075 device_t dev; 1076 struct hvs_pcb *pcb; 1077 struct vmbus_channel *channel; 1078 }; 1079 1080 static bool 1081 hvsock_chan_readable(struct vmbus_channel *chan) 1082 { 1083 uint32_t readable = vmbus_chan_read_available(chan); 1084 1085 return (readable >= HVSOCK_PKT_LEN(0)); 1086 } 1087 1088 static void 1089 hvsock_chan_cb(struct vmbus_channel *chan, void *context) 1090 { 1091 struct hvs_pcb *pcb = (struct hvs_pcb *) context; 1092 struct socket *so; 1093 uint32_t canwrite; 1094 1095 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, 1096 "%s: host send us a wakeup on rb data, pcb = %p\n", 1097 __func__, pcb); 1098 1099 /* 1100 * Check if the socket is still attached and valid. 1101 * Here we know channel is still open. Need to make 1102 * sure the socket has not been closed or freed. 1103 */ 1104 (void) hvs_trans_lock(); 1105 so = hsvpcb2so(pcb); 1106 1107 if (pcb->chan != NULL && so != NULL) { 1108 /* 1109 * Wake up reader if there are data to read. 1110 */ 1111 SOCKBUF_LOCK(&(so)->so_rcv); 1112 1113 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, 1114 "%s: read available = %u\n", __func__, 1115 vmbus_chan_read_available(pcb->chan)); 1116 1117 if (hvsock_chan_readable(pcb->chan)) 1118 sorwakeup_locked(so); 1119 else 1120 SOCKBUF_UNLOCK(&(so)->so_rcv); 1121 1122 /* 1123 * Wake up sender if space becomes available to write. 1124 */ 1125 SOCKBUF_LOCK(&(so)->so_snd); 1126 canwrite = hvsock_canwrite_check(pcb); 1127 1128 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, 1129 "%s: canwrite = %u\n", __func__, canwrite); 1130 1131 if (canwrite > 0) { 1132 sowwakeup_locked(so); 1133 } else { 1134 SOCKBUF_UNLOCK(&(so)->so_snd); 1135 } 1136 } 1137 1138 hvs_trans_unlock(); 1139 1140 return; 1141 } 1142 1143 static int 1144 hvsock_br_callback(void *datap, int cplen, void *cbarg) 1145 { 1146 struct hvs_callback_arg *arg = (struct hvs_callback_arg *)cbarg; 1147 struct uio *uio = arg->uio; 1148 struct sockbuf *sb = arg->sb; 1149 int error = 0; 1150 1151 if (cbarg == NULL || datap == NULL) 1152 return (EINVAL); 1153 1154 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, 1155 "%s: called, uio_rw = %s, uio_resid = %zd, cplen = %u, " 1156 "datap = %p\n", 1157 __func__, (uio->uio_rw == UIO_READ) ? "read from br":"write to br", 1158 uio->uio_resid, cplen, datap); 1159 1160 if (sb) 1161 SOCKBUF_UNLOCK(sb); 1162 1163 error = uiomove(datap, cplen, uio); 1164 1165 if (sb) 1166 SOCKBUF_LOCK(sb); 1167 1168 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, 1169 "%s: after uiomove, uio_resid = %zd, error = %d\n", 1170 __func__, uio->uio_resid, error); 1171 1172 return (error); 1173 } 1174 1175 static int 1176 hvsock_send_data(struct vmbus_channel *chan, struct uio *uio, 1177 uint32_t to_write, struct sockbuf *sb) 1178 { 1179 struct hvs_pkt_header hvs_pkt; 1180 int hvs_pkthlen, hvs_pktlen, pad_pktlen, hlen, error = 0; 1181 uint64_t pad = 0; 1182 struct iovec iov[3]; 1183 struct hvs_callback_arg cbarg; 1184 1185 if (chan == NULL) 1186 return (ENOTCONN); 1187 1188 hlen = sizeof(struct vmbus_chanpkt_hdr); 1189 hvs_pkthlen = sizeof(struct hvs_pkt_header); 1190 hvs_pktlen = hvs_pkthlen + to_write; 1191 pad_pktlen = VMBUS_CHANPKT_TOTLEN(hvs_pktlen); 1192 1193 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, 1194 "%s: hlen = %u, hvs_pkthlen = %u, hvs_pktlen = %u, " 1195 "pad_pktlen = %u, data_len = %u\n", 1196 __func__, hlen, hvs_pkthlen, hvs_pktlen, pad_pktlen, to_write); 1197 1198 hvs_pkt.chan_pkt_hdr.cph_type = VMBUS_CHANPKT_TYPE_INBAND; 1199 hvs_pkt.chan_pkt_hdr.cph_flags = 0; 1200 VMBUS_CHANPKT_SETLEN(hvs_pkt.chan_pkt_hdr.cph_hlen, hlen); 1201 VMBUS_CHANPKT_SETLEN(hvs_pkt.chan_pkt_hdr.cph_tlen, pad_pktlen); 1202 hvs_pkt.chan_pkt_hdr.cph_xactid = 0; 1203 1204 hvs_pkt.vmpipe_pkt_hdr.vmpipe_pkt_type = 1; 1205 hvs_pkt.vmpipe_pkt_hdr.vmpipe_data_size = to_write; 1206 1207 cbarg.uio = uio; 1208 cbarg.sb = sb; 1209 1210 if (uio && to_write > 0) { 1211 iov[0].iov_base = &hvs_pkt; 1212 iov[0].iov_len = hvs_pkthlen; 1213 iov[1].iov_base = NULL; 1214 iov[1].iov_len = to_write; 1215 iov[2].iov_base = &pad; 1216 iov[2].iov_len = pad_pktlen - hvs_pktlen; 1217 1218 error = vmbus_chan_iov_send(chan, iov, 3, 1219 hvsock_br_callback, &cbarg); 1220 } else { 1221 if (to_write == 0) { 1222 iov[0].iov_base = &hvs_pkt; 1223 iov[0].iov_len = hvs_pkthlen; 1224 iov[1].iov_base = &pad; 1225 iov[1].iov_len = pad_pktlen - hvs_pktlen; 1226 error = vmbus_chan_iov_send(chan, iov, 2, NULL, NULL); 1227 } 1228 } 1229 1230 if (error) { 1231 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, 1232 "%s: error = %d\n", __func__, error); 1233 } 1234 1235 return (error); 1236 } 1237 1238 /* 1239 * Check if we have data on current ring buffer to read 1240 * or not. If not, advance the ring buffer read index to 1241 * next packet. Update the recev_data_len and recev_data_off 1242 * to new value. 1243 * Return the number of bytes can read. 1244 */ 1245 static uint32_t 1246 hvsock_canread_check(struct hvs_pcb *pcb) 1247 { 1248 uint32_t advance; 1249 uint32_t tlen, hlen, dlen; 1250 uint32_t bytes_canread = 0; 1251 int error; 1252 1253 if (pcb == NULL || pcb->chan == NULL) { 1254 pcb->so->so_error = EIO; 1255 return (0); 1256 } 1257 1258 /* Still have data not read yet on current packet */ 1259 if (pcb->recv_data_len > 0) 1260 return (pcb->recv_data_len); 1261 1262 if (pcb->rb_init) 1263 advance = 1264 VMBUS_CHANPKT_GETLEN(pcb->hvs_pkt.chan_pkt_hdr.cph_tlen); 1265 else 1266 advance = 0; 1267 1268 bytes_canread = vmbus_chan_read_available(pcb->chan); 1269 1270 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, 1271 "%s: bytes_canread on br = %u, advance = %u\n", 1272 __func__, bytes_canread, advance); 1273 1274 if (pcb->rb_init && bytes_canread == (advance + sizeof(uint64_t))) { 1275 /* 1276 * Nothing to read. Need to advance the rindex before 1277 * calling sbwait, so host knows to wake us up when data 1278 * is available to read on rb. 1279 */ 1280 error = vmbus_chan_recv_idxadv(pcb->chan, advance); 1281 if (error) { 1282 HVSOCK_DBG(HVSOCK_DBG_ERR, 1283 "%s: after calling vmbus_chan_recv_idxadv, " 1284 "got error = %d\n", __func__, error); 1285 return (0); 1286 } else { 1287 pcb->rb_init = false; 1288 pcb->recv_data_len = 0; 1289 pcb->recv_data_off = 0; 1290 bytes_canread = vmbus_chan_read_available(pcb->chan); 1291 1292 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, 1293 "%s: advanced %u bytes, " 1294 " bytes_canread on br now = %u\n", 1295 __func__, advance, bytes_canread); 1296 1297 if (bytes_canread == 0) 1298 return (0); 1299 else 1300 advance = 0; 1301 } 1302 } 1303 1304 if (bytes_canread < 1305 advance + (sizeof(struct hvs_pkt_header) + sizeof(uint64_t))) 1306 return (0); 1307 1308 error = vmbus_chan_recv_peek(pcb->chan, &pcb->hvs_pkt, 1309 sizeof(struct hvs_pkt_header), advance); 1310 1311 /* Don't have anything to read */ 1312 if (error) { 1313 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, 1314 "%s: after calling vmbus_chan_recv_peek, got error = %d\n", 1315 __func__, error); 1316 return (0); 1317 } 1318 1319 /* 1320 * We just read in a new packet header. Do some sanity checks. 1321 */ 1322 tlen = VMBUS_CHANPKT_GETLEN(pcb->hvs_pkt.chan_pkt_hdr.cph_tlen); 1323 hlen = VMBUS_CHANPKT_GETLEN(pcb->hvs_pkt.chan_pkt_hdr.cph_hlen); 1324 dlen = pcb->hvs_pkt.vmpipe_pkt_hdr.vmpipe_data_size; 1325 if (__predict_false(hlen < sizeof(struct vmbus_chanpkt_hdr)) || 1326 __predict_false(hlen > tlen) || 1327 __predict_false(tlen < dlen + sizeof(struct hvs_pkt_header))) { 1328 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, 1329 "invalid tlen(%u), hlen(%u) or dlen(%u)\n", 1330 tlen, hlen, dlen); 1331 pcb->so->so_error = EIO; 1332 return (0); 1333 } 1334 if (pcb->rb_init == false) 1335 pcb->rb_init = true; 1336 1337 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, 1338 "Got new pkt tlen(%u), hlen(%u) or dlen(%u)\n", 1339 tlen, hlen, dlen); 1340 1341 /* The other side has sent a close FIN */ 1342 if (dlen == 0) { 1343 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, 1344 "%s: Received FIN from other side\n", __func__); 1345 /* inform the caller by seting so_error to ESHUTDOWN */ 1346 pcb->so->so_error = ESHUTDOWN; 1347 } 1348 1349 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, 1350 "%s: canread on receive ring is %u \n", __func__, dlen); 1351 1352 pcb->recv_data_len = dlen; 1353 pcb->recv_data_off = 0; 1354 1355 return (pcb->recv_data_len); 1356 } 1357 1358 static uint32_t 1359 hvsock_canwrite_check(struct hvs_pcb *pcb) 1360 { 1361 uint32_t writeable; 1362 uint32_t ret; 1363 1364 if (pcb == NULL || pcb->chan == NULL) 1365 return (0); 1366 1367 writeable = vmbus_chan_write_available(pcb->chan); 1368 1369 /* 1370 * We must always reserve a 0-length-payload packet for the FIN. 1371 */ 1372 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, 1373 "%s: writeable is %u, should be greater than %ju\n", 1374 __func__, writeable, 1375 (uintmax_t)(HVSOCK_PKT_LEN(1) + HVSOCK_PKT_LEN(0))); 1376 1377 if (writeable < HVSOCK_PKT_LEN(1) + HVSOCK_PKT_LEN(0)) { 1378 /* 1379 * The Tx ring seems full. 1380 */ 1381 return (0); 1382 } 1383 1384 ret = writeable - HVSOCK_PKT_LEN(0) - HVSOCK_PKT_LEN(0); 1385 1386 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, 1387 "%s: available size is %u\n", __func__, rounddown2(ret, 8)); 1388 1389 return (rounddown2(ret, 8)); 1390 } 1391 1392 static void 1393 hvsock_set_chan_pending_send_size(struct vmbus_channel *chan) 1394 { 1395 vmbus_chan_set_pending_send_size(chan, 1396 HVSOCK_PKT_LEN(HVSOCK_SEND_BUF_SZ)); 1397 } 1398 1399 static int 1400 hvsock_open_channel(struct vmbus_channel *chan, struct socket *so) 1401 { 1402 unsigned int rcvbuf, sndbuf; 1403 struct hvs_pcb *pcb = so2hvspcb(so); 1404 int ret; 1405 1406 if (vmbus_current_version < VMBUS_VERSION_WIN10_V5) { 1407 sndbuf = HVS_RINGBUF_SND_SIZE; 1408 rcvbuf = HVS_RINGBUF_RCV_SIZE; 1409 } else { 1410 sndbuf = MAX(so->so_snd.sb_hiwat, HVS_RINGBUF_SND_SIZE); 1411 sndbuf = MIN(sndbuf, HVS_RINGBUF_MAX_SIZE); 1412 sndbuf = rounddown2(sndbuf, PAGE_SIZE); 1413 rcvbuf = MAX(so->so_rcv.sb_hiwat, HVS_RINGBUF_RCV_SIZE); 1414 rcvbuf = MIN(rcvbuf, HVS_RINGBUF_MAX_SIZE); 1415 rcvbuf = rounddown2(rcvbuf, PAGE_SIZE); 1416 } 1417 1418 /* 1419 * Can only read whatever user provided size of data 1420 * from ring buffer. Turn off batched reading. 1421 */ 1422 vmbus_chan_set_readbatch(chan, false); 1423 1424 ret = vmbus_chan_open(chan, sndbuf, rcvbuf, NULL, 0, 1425 hvsock_chan_cb, pcb); 1426 1427 if (ret != 0) { 1428 HVSOCK_DBG(HVSOCK_DBG_ERR, 1429 "%s: failed to open hvsock channel, sndbuf = %u, " 1430 "rcvbuf = %u\n", __func__, sndbuf, rcvbuf); 1431 } else { 1432 HVSOCK_DBG(HVSOCK_DBG_INFO, 1433 "%s: hvsock channel opened, sndbuf = %u, i" 1434 "rcvbuf = %u\n", __func__, sndbuf, rcvbuf); 1435 /* 1436 * Se the pending send size so to receive wakeup 1437 * signals from host when there is enough space on 1438 * rx buffer ring to write. 1439 */ 1440 hvsock_set_chan_pending_send_size(chan); 1441 } 1442 1443 return ret; 1444 } 1445 1446 /* 1447 * Guest is listening passively on the socket. Open channel and 1448 * create a new socket for the conneciton. 1449 */ 1450 static void 1451 hvsock_open_conn_passive(struct vmbus_channel *chan, struct socket *so, 1452 struct hvsock_sc *sc) 1453 { 1454 struct socket *new_so; 1455 struct hvs_pcb *new_pcb, *pcb; 1456 int error; 1457 1458 /* Do nothing if socket is not listening */ 1459 if (!SOLISTENING(so)) { 1460 HVSOCK_DBG(HVSOCK_DBG_ERR, 1461 "%s: socket is not a listening one\n", __func__); 1462 return; 1463 } 1464 1465 /* 1466 * Create a new socket. This will call pru_attach to complete 1467 * the socket initialization and put the new socket onto 1468 * listening socket's sol_incomp list, waiting to be promoted 1469 * to sol_comp list. 1470 * The new socket created has ref count 0. There is no other 1471 * thread that changes the state of this new one at the 1472 * moment, so we don't need to hold its lock while opening 1473 * channel and filling out its pcb information. 1474 */ 1475 new_so = sonewconn(so, 0); 1476 if (!new_so) 1477 HVSOCK_DBG(HVSOCK_DBG_ERR, 1478 "%s: creating new socket failed\n", __func__); 1479 1480 /* 1481 * Now open the vmbus channel. If it fails, the socket will be 1482 * on the listening socket's sol_incomp queue until it is 1483 * replaced and aborted. 1484 */ 1485 error = hvsock_open_channel(chan, new_so); 1486 if (error) { 1487 new_so->so_error = error; 1488 return; 1489 } 1490 1491 pcb = so->so_pcb; 1492 new_pcb = new_so->so_pcb; 1493 1494 hvs_addr_set(&(new_pcb->local_addr), pcb->local_addr.hvs_port); 1495 /* Remote port is unknown to guest in this type of conneciton */ 1496 hvs_addr_set(&(new_pcb->remote_addr), HVADDR_PORT_UNKNOWN); 1497 new_pcb->chan = chan; 1498 new_pcb->recv_data_len = 0; 1499 new_pcb->recv_data_off = 0; 1500 new_pcb->rb_init = false; 1501 1502 new_pcb->vm_srv_id = *vmbus_chan_guid_type(chan); 1503 new_pcb->host_srv_id = *vmbus_chan_guid_inst(chan); 1504 1505 hvs_insert_socket_on_list(new_so, HVS_LIST_CONNECTED); 1506 1507 sc->pcb = new_pcb; 1508 1509 /* 1510 * Change the socket state to SS_ISCONNECTED. This will promote 1511 * the socket to sol_comp queue and wake up the thread which 1512 * is accepting connection. 1513 */ 1514 soisconnected(new_so); 1515 } 1516 1517 1518 /* 1519 * Guest is actively connecting to host. 1520 */ 1521 static void 1522 hvsock_open_conn_active(struct vmbus_channel *chan, struct socket *so) 1523 { 1524 struct hvs_pcb *pcb; 1525 int error; 1526 1527 error = hvsock_open_channel(chan, so); 1528 if (error) { 1529 so->so_error = error; 1530 return; 1531 } 1532 1533 pcb = so->so_pcb; 1534 pcb->chan = chan; 1535 pcb->recv_data_len = 0; 1536 pcb->recv_data_off = 0; 1537 pcb->rb_init = false; 1538 1539 mtx_lock(&hvs_trans_socks_mtx); 1540 __hvs_remove_socket_from_list(so, HVS_LIST_BOUND); 1541 __hvs_insert_socket_on_list(so, HVS_LIST_CONNECTED); 1542 mtx_unlock(&hvs_trans_socks_mtx); 1543 1544 /* 1545 * Change the socket state to SS_ISCONNECTED. This will wake up 1546 * the thread sleeping in connect call. 1547 */ 1548 soisconnected(so); 1549 } 1550 1551 static void 1552 hvsock_open_connection(struct vmbus_channel *chan, struct hvsock_sc *sc) 1553 { 1554 struct hyperv_guid *inst_guid, *type_guid; 1555 bool conn_from_host; 1556 struct sockaddr_hvs addr; 1557 struct socket *so; 1558 struct hvs_pcb *pcb; 1559 1560 type_guid = (struct hyperv_guid *) vmbus_chan_guid_type(chan); 1561 inst_guid = (struct hyperv_guid *) vmbus_chan_guid_inst(chan); 1562 conn_from_host = vmbus_chan_is_hvs_conn_from_host(chan); 1563 1564 HVSOCK_DBG(HVSOCK_DBG_INFO, "type_guid is "); 1565 hvsock_print_guid(type_guid); 1566 HVSOCK_DBG(HVSOCK_DBG_INFO, "inst_guid is "); 1567 hvsock_print_guid(inst_guid); 1568 HVSOCK_DBG(HVSOCK_DBG_INFO, "connection %s host\n", 1569 (conn_from_host == true ) ? "from" : "to"); 1570 1571 /* 1572 * The listening port should be in [0, MAX_LISTEN_PORT] 1573 */ 1574 if (!is_valid_srv_id(type_guid)) 1575 return; 1576 1577 /* 1578 * There should be a bound socket already created no matter 1579 * it is a passive or active connection. 1580 * For host initiated connection (passive on guest side), 1581 * the type_guid contains the port which guest is bound and 1582 * listening. 1583 * For the guest initiated connection (active on guest side), 1584 * the inst_guid contains the port that guest has auto bound 1585 * to. 1586 */ 1587 hvs_addr_init(&addr, conn_from_host ? type_guid : inst_guid); 1588 so = hvs_find_socket_on_list(&addr, HVS_LIST_BOUND); 1589 if (!so) { 1590 HVSOCK_DBG(HVSOCK_DBG_ERR, 1591 "%s: no bound socket found for port %u\n", 1592 __func__, addr.hvs_port); 1593 return; 1594 } 1595 1596 if (conn_from_host) { 1597 hvsock_open_conn_passive(chan, so, sc); 1598 } else { 1599 (void) hvs_trans_lock(); 1600 pcb = so->so_pcb; 1601 if (pcb && pcb->so) { 1602 sc->pcb = so2hvspcb(so); 1603 hvsock_open_conn_active(chan, so); 1604 } else { 1605 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, 1606 "%s: channel detached before open\n", __func__); 1607 } 1608 hvs_trans_unlock(); 1609 } 1610 1611 } 1612 1613 static int 1614 hvsock_probe(device_t dev) 1615 { 1616 struct vmbus_channel *channel = vmbus_get_channel(dev); 1617 1618 if (!channel || !vmbus_chan_is_hvs(channel)) { 1619 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, 1620 "hvsock_probe called but not a hvsock channel id %u\n", 1621 vmbus_chan_id(channel)); 1622 1623 return ENXIO; 1624 } else { 1625 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, 1626 "hvsock_probe got a hvsock channel id %u\n", 1627 vmbus_chan_id(channel)); 1628 1629 return BUS_PROBE_DEFAULT; 1630 } 1631 } 1632 1633 static int 1634 hvsock_attach(device_t dev) 1635 { 1636 struct vmbus_channel *channel = vmbus_get_channel(dev); 1637 struct hvsock_sc *sc = (struct hvsock_sc *)device_get_softc(dev); 1638 1639 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "hvsock_attach called.\n"); 1640 1641 hvsock_open_connection(channel, sc); 1642 1643 /* 1644 * Always return success. On error the host will rescind the device 1645 * in 30 seconds and we can do cleanup at that time in 1646 * vmbus_chan_msgproc_chrescind(). 1647 */ 1648 return (0); 1649 } 1650 1651 static int 1652 hvsock_detach(device_t dev) 1653 { 1654 struct hvsock_sc *sc = (struct hvsock_sc *)device_get_softc(dev); 1655 struct socket *so; 1656 int retry; 1657 1658 if (bootverbose) 1659 device_printf(dev, "hvsock_detach called.\n"); 1660 1661 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "hvsock_detach called.\n"); 1662 1663 if (sc->pcb != NULL) { 1664 (void) hvs_trans_lock(); 1665 1666 so = hsvpcb2so(sc->pcb); 1667 if (so) { 1668 /* Close the connection */ 1669 if (so->so_state & 1670 (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING)) 1671 soisdisconnected(so); 1672 } 1673 1674 mtx_lock(&hvs_trans_socks_mtx); 1675 __hvs_remove_pcb_from_list(sc->pcb, 1676 HVS_LIST_BOUND | HVS_LIST_CONNECTED); 1677 mtx_unlock(&hvs_trans_socks_mtx); 1678 1679 /* 1680 * Close channel while no reader and sender are working 1681 * on the buffer rings. 1682 */ 1683 if (so) { 1684 retry = 0; 1685 while (SOCK_IO_RECV_LOCK(so, 0) == EWOULDBLOCK) { 1686 /* 1687 * Someone is reading, rx br is busy 1688 */ 1689 soisdisconnected(so); 1690 DELAY(500); 1691 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, 1692 "waiting for rx reader to exit, " 1693 "retry = %d\n", retry++); 1694 } 1695 retry = 0; 1696 while (SOCK_IO_SEND_LOCK(so, 0) == EWOULDBLOCK) { 1697 /* 1698 * Someone is sending, tx br is busy 1699 */ 1700 soisdisconnected(so); 1701 DELAY(500); 1702 HVSOCK_DBG(HVSOCK_DBG_VERBOSE, 1703 "waiting for tx sender to exit, " 1704 "retry = %d\n", retry++); 1705 } 1706 } 1707 1708 1709 bzero(sc->pcb, sizeof(struct hvs_pcb)); 1710 free(sc->pcb, M_HVSOCK); 1711 sc->pcb = NULL; 1712 1713 if (so) { 1714 SOCK_IO_RECV_UNLOCK(so); 1715 SOCK_IO_SEND_UNLOCK(so); 1716 so->so_pcb = NULL; 1717 } 1718 1719 hvs_trans_unlock(); 1720 } 1721 1722 vmbus_chan_close(vmbus_get_channel(dev)); 1723 1724 return (0); 1725 } 1726 1727 static device_method_t hvsock_methods[] = { 1728 /* Device interface */ 1729 DEVMETHOD(device_probe, hvsock_probe), 1730 DEVMETHOD(device_attach, hvsock_attach), 1731 DEVMETHOD(device_detach, hvsock_detach), 1732 DEVMETHOD_END 1733 }; 1734 1735 static driver_t hvsock_driver = { 1736 "hv_sock", 1737 hvsock_methods, 1738 sizeof(struct hvsock_sc) 1739 }; 1740 1741 DRIVER_MODULE(hvsock, vmbus, hvsock_driver, NULL, NULL); 1742 MODULE_VERSION(hvsock, 1); 1743 MODULE_DEPEND(hvsock, vmbus, 1, 1, 1); 1744