1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * common code for virtio vsock 4 * 5 * Copyright (C) 2013-2015 Red Hat, Inc. 6 * Author: Asias He <asias@redhat.com> 7 * Stefan Hajnoczi <stefanha@redhat.com> 8 */ 9 #include <linux/spinlock.h> 10 #include <linux/module.h> 11 #include <linux/sched/signal.h> 12 #include <linux/ctype.h> 13 #include <linux/list.h> 14 #include <linux/virtio_vsock.h> 15 #include <uapi/linux/vsockmon.h> 16 17 #include <net/sock.h> 18 #include <net/af_vsock.h> 19 20 #define CREATE_TRACE_POINTS 21 #include <trace/events/vsock_virtio_transport_common.h> 22 23 /* How long to wait for graceful shutdown of a connection */ 24 #define VSOCK_CLOSE_TIMEOUT (8 * HZ) 25 26 /* Threshold for detecting small packets to copy */ 27 #define GOOD_COPY_LEN 128 28 29 static const struct virtio_transport * 30 virtio_transport_get_ops(struct vsock_sock *vsk) 31 { 32 const struct vsock_transport *t = vsock_core_get_transport(vsk); 33 34 return container_of(t, struct virtio_transport, transport); 35 } 36 37 static struct virtio_vsock_pkt * 38 virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info, 39 size_t len, 40 u32 src_cid, 41 u32 src_port, 42 u32 dst_cid, 43 u32 dst_port) 44 { 45 struct virtio_vsock_pkt *pkt; 46 int err; 47 48 pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); 49 if (!pkt) 50 return NULL; 51 52 pkt->hdr.type = cpu_to_le16(info->type); 53 pkt->hdr.op = cpu_to_le16(info->op); 54 pkt->hdr.src_cid = cpu_to_le64(src_cid); 55 pkt->hdr.dst_cid = cpu_to_le64(dst_cid); 56 pkt->hdr.src_port = cpu_to_le32(src_port); 57 pkt->hdr.dst_port = cpu_to_le32(dst_port); 58 pkt->hdr.flags = cpu_to_le32(info->flags); 59 pkt->len = len; 60 pkt->hdr.len = cpu_to_le32(len); 61 pkt->reply = info->reply; 62 pkt->vsk = info->vsk; 63 64 if (info->msg && len > 0) { 65 pkt->buf = kmalloc(len, GFP_KERNEL); 66 if (!pkt->buf) 67 goto out_pkt; 68 69 pkt->buf_len = len; 70 71 err = memcpy_from_msg(pkt->buf, info->msg, len); 72 if (err) 73 goto out; 74 } 75 76 trace_virtio_transport_alloc_pkt(src_cid, src_port, 77 dst_cid, dst_port, 78 len, 79 info->type, 80 info->op, 81 info->flags); 82 83 return pkt; 84 85 out: 86 kfree(pkt->buf); 87 out_pkt: 88 kfree(pkt); 89 return NULL; 90 } 91 92 /* Packet capture */ 93 static struct sk_buff *virtio_transport_build_skb(void *opaque) 94 { 95 struct virtio_vsock_pkt *pkt = opaque; 96 struct af_vsockmon_hdr *hdr; 97 struct sk_buff *skb; 98 size_t payload_len; 99 void *payload_buf; 100 101 /* A packet could be split to fit the RX buffer, so we can retrieve 102 * the payload length from the header and the buffer pointer taking 103 * care of the offset in the original packet. 104 */ 105 payload_len = le32_to_cpu(pkt->hdr.len); 106 payload_buf = pkt->buf + pkt->off; 107 108 skb = alloc_skb(sizeof(*hdr) + sizeof(pkt->hdr) + payload_len, 109 GFP_ATOMIC); 110 if (!skb) 111 return NULL; 112 113 hdr = skb_put(skb, sizeof(*hdr)); 114 115 /* pkt->hdr is little-endian so no need to byteswap here */ 116 hdr->src_cid = pkt->hdr.src_cid; 117 hdr->src_port = pkt->hdr.src_port; 118 hdr->dst_cid = pkt->hdr.dst_cid; 119 hdr->dst_port = pkt->hdr.dst_port; 120 121 hdr->transport = cpu_to_le16(AF_VSOCK_TRANSPORT_VIRTIO); 122 hdr->len = cpu_to_le16(sizeof(pkt->hdr)); 123 memset(hdr->reserved, 0, sizeof(hdr->reserved)); 124 125 switch (le16_to_cpu(pkt->hdr.op)) { 126 case VIRTIO_VSOCK_OP_REQUEST: 127 case VIRTIO_VSOCK_OP_RESPONSE: 128 hdr->op = cpu_to_le16(AF_VSOCK_OP_CONNECT); 129 break; 130 case VIRTIO_VSOCK_OP_RST: 131 case VIRTIO_VSOCK_OP_SHUTDOWN: 132 hdr->op = cpu_to_le16(AF_VSOCK_OP_DISCONNECT); 133 break; 134 case VIRTIO_VSOCK_OP_RW: 135 hdr->op = cpu_to_le16(AF_VSOCK_OP_PAYLOAD); 136 break; 137 case VIRTIO_VSOCK_OP_CREDIT_UPDATE: 138 case VIRTIO_VSOCK_OP_CREDIT_REQUEST: 139 hdr->op = cpu_to_le16(AF_VSOCK_OP_CONTROL); 140 break; 141 default: 142 hdr->op = cpu_to_le16(AF_VSOCK_OP_UNKNOWN); 143 break; 144 } 145 146 skb_put_data(skb, &pkt->hdr, sizeof(pkt->hdr)); 147 148 if (payload_len) { 149 skb_put_data(skb, payload_buf, payload_len); 150 } 151 152 return skb; 153 } 154 155 void virtio_transport_deliver_tap_pkt(struct virtio_vsock_pkt *pkt) 156 { 157 vsock_deliver_tap(virtio_transport_build_skb, pkt); 158 } 159 EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt); 160 161 static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, 162 struct virtio_vsock_pkt_info *info) 163 { 164 u32 src_cid, src_port, dst_cid, dst_port; 165 struct virtio_vsock_sock *vvs; 166 struct virtio_vsock_pkt *pkt; 167 u32 pkt_len = info->pkt_len; 168 169 src_cid = virtio_transport_get_ops(vsk)->transport.get_local_cid(); 170 src_port = vsk->local_addr.svm_port; 171 if (!info->remote_cid) { 172 dst_cid = vsk->remote_addr.svm_cid; 173 dst_port = vsk->remote_addr.svm_port; 174 } else { 175 dst_cid = info->remote_cid; 176 dst_port = info->remote_port; 177 } 178 179 vvs = vsk->trans; 180 181 /* we can send less than pkt_len bytes */ 182 if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) 183 pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; 184 185 /* virtio_transport_get_credit might return less than pkt_len credit */ 186 pkt_len = virtio_transport_get_credit(vvs, pkt_len); 187 188 /* Do not send zero length OP_RW pkt */ 189 if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) 190 return pkt_len; 191 192 pkt = virtio_transport_alloc_pkt(info, pkt_len, 193 src_cid, src_port, 194 dst_cid, dst_port); 195 if (!pkt) { 196 virtio_transport_put_credit(vvs, pkt_len); 197 return -ENOMEM; 198 } 199 200 virtio_transport_inc_tx_pkt(vvs, pkt); 201 202 return virtio_transport_get_ops(vsk)->send_pkt(pkt); 203 } 204 205 static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs, 206 struct virtio_vsock_pkt *pkt) 207 { 208 if (vvs->rx_bytes + pkt->len > vvs->buf_alloc) 209 return false; 210 211 vvs->rx_bytes += pkt->len; 212 return true; 213 } 214 215 static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs, 216 struct virtio_vsock_pkt *pkt) 217 { 218 vvs->rx_bytes -= pkt->len; 219 vvs->fwd_cnt += pkt->len; 220 } 221 222 void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct virtio_vsock_pkt *pkt) 223 { 224 spin_lock_bh(&vvs->rx_lock); 225 vvs->last_fwd_cnt = vvs->fwd_cnt; 226 pkt->hdr.fwd_cnt = cpu_to_le32(vvs->fwd_cnt); 227 pkt->hdr.buf_alloc = cpu_to_le32(vvs->buf_alloc); 228 spin_unlock_bh(&vvs->rx_lock); 229 } 230 EXPORT_SYMBOL_GPL(virtio_transport_inc_tx_pkt); 231 232 u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 credit) 233 { 234 u32 ret; 235 236 spin_lock_bh(&vvs->tx_lock); 237 ret = vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt); 238 if (ret > credit) 239 ret = credit; 240 vvs->tx_cnt += ret; 241 spin_unlock_bh(&vvs->tx_lock); 242 243 return ret; 244 } 245 EXPORT_SYMBOL_GPL(virtio_transport_get_credit); 246 247 void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit) 248 { 249 spin_lock_bh(&vvs->tx_lock); 250 vvs->tx_cnt -= credit; 251 spin_unlock_bh(&vvs->tx_lock); 252 } 253 EXPORT_SYMBOL_GPL(virtio_transport_put_credit); 254 255 static int virtio_transport_send_credit_update(struct vsock_sock *vsk, 256 int type, 257 struct virtio_vsock_hdr *hdr) 258 { 259 struct virtio_vsock_pkt_info info = { 260 .op = VIRTIO_VSOCK_OP_CREDIT_UPDATE, 261 .type = type, 262 .vsk = vsk, 263 }; 264 265 return virtio_transport_send_pkt_info(vsk, &info); 266 } 267 268 static ssize_t 269 virtio_transport_stream_do_peek(struct vsock_sock *vsk, 270 struct msghdr *msg, 271 size_t len) 272 { 273 struct virtio_vsock_sock *vvs = vsk->trans; 274 struct virtio_vsock_pkt *pkt; 275 size_t bytes, total = 0, off; 276 int err = -EFAULT; 277 278 spin_lock_bh(&vvs->rx_lock); 279 280 list_for_each_entry(pkt, &vvs->rx_queue, list) { 281 off = pkt->off; 282 283 if (total == len) 284 break; 285 286 while (total < len && off < pkt->len) { 287 bytes = len - total; 288 if (bytes > pkt->len - off) 289 bytes = pkt->len - off; 290 291 /* sk_lock is held by caller so no one else can dequeue. 292 * Unlock rx_lock since memcpy_to_msg() may sleep. 293 */ 294 spin_unlock_bh(&vvs->rx_lock); 295 296 err = memcpy_to_msg(msg, pkt->buf + off, bytes); 297 if (err) 298 goto out; 299 300 spin_lock_bh(&vvs->rx_lock); 301 302 total += bytes; 303 off += bytes; 304 } 305 } 306 307 spin_unlock_bh(&vvs->rx_lock); 308 309 return total; 310 311 out: 312 if (total) 313 err = total; 314 return err; 315 } 316 317 static ssize_t 318 virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, 319 struct msghdr *msg, 320 size_t len) 321 { 322 struct virtio_vsock_sock *vvs = vsk->trans; 323 struct virtio_vsock_pkt *pkt; 324 size_t bytes, total = 0; 325 u32 free_space; 326 int err = -EFAULT; 327 328 spin_lock_bh(&vvs->rx_lock); 329 while (total < len && !list_empty(&vvs->rx_queue)) { 330 pkt = list_first_entry(&vvs->rx_queue, 331 struct virtio_vsock_pkt, list); 332 333 bytes = len - total; 334 if (bytes > pkt->len - pkt->off) 335 bytes = pkt->len - pkt->off; 336 337 /* sk_lock is held by caller so no one else can dequeue. 338 * Unlock rx_lock since memcpy_to_msg() may sleep. 339 */ 340 spin_unlock_bh(&vvs->rx_lock); 341 342 err = memcpy_to_msg(msg, pkt->buf + pkt->off, bytes); 343 if (err) 344 goto out; 345 346 spin_lock_bh(&vvs->rx_lock); 347 348 total += bytes; 349 pkt->off += bytes; 350 if (pkt->off == pkt->len) { 351 virtio_transport_dec_rx_pkt(vvs, pkt); 352 list_del(&pkt->list); 353 virtio_transport_free_pkt(pkt); 354 } 355 } 356 357 free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs->last_fwd_cnt); 358 359 spin_unlock_bh(&vvs->rx_lock); 360 361 /* To reduce the number of credit update messages, 362 * don't update credits as long as lots of space is available. 363 * Note: the limit chosen here is arbitrary. Setting the limit 364 * too high causes extra messages. Too low causes transmitter 365 * stalls. As stalls are in theory more expensive than extra 366 * messages, we set the limit to a high value. TODO: experiment 367 * with different values. 368 */ 369 if (free_space < VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) { 370 virtio_transport_send_credit_update(vsk, 371 VIRTIO_VSOCK_TYPE_STREAM, 372 NULL); 373 } 374 375 return total; 376 377 out: 378 if (total) 379 err = total; 380 return err; 381 } 382 383 ssize_t 384 virtio_transport_stream_dequeue(struct vsock_sock *vsk, 385 struct msghdr *msg, 386 size_t len, int flags) 387 { 388 if (flags & MSG_PEEK) 389 return virtio_transport_stream_do_peek(vsk, msg, len); 390 else 391 return virtio_transport_stream_do_dequeue(vsk, msg, len); 392 } 393 EXPORT_SYMBOL_GPL(virtio_transport_stream_dequeue); 394 395 int 396 virtio_transport_dgram_dequeue(struct vsock_sock *vsk, 397 struct msghdr *msg, 398 size_t len, int flags) 399 { 400 return -EOPNOTSUPP; 401 } 402 EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue); 403 404 s64 virtio_transport_stream_has_data(struct vsock_sock *vsk) 405 { 406 struct virtio_vsock_sock *vvs = vsk->trans; 407 s64 bytes; 408 409 spin_lock_bh(&vvs->rx_lock); 410 bytes = vvs->rx_bytes; 411 spin_unlock_bh(&vvs->rx_lock); 412 413 return bytes; 414 } 415 EXPORT_SYMBOL_GPL(virtio_transport_stream_has_data); 416 417 static s64 virtio_transport_has_space(struct vsock_sock *vsk) 418 { 419 struct virtio_vsock_sock *vvs = vsk->trans; 420 s64 bytes; 421 422 bytes = vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt); 423 if (bytes < 0) 424 bytes = 0; 425 426 return bytes; 427 } 428 429 s64 virtio_transport_stream_has_space(struct vsock_sock *vsk) 430 { 431 struct virtio_vsock_sock *vvs = vsk->trans; 432 s64 bytes; 433 434 spin_lock_bh(&vvs->tx_lock); 435 bytes = virtio_transport_has_space(vsk); 436 spin_unlock_bh(&vvs->tx_lock); 437 438 return bytes; 439 } 440 EXPORT_SYMBOL_GPL(virtio_transport_stream_has_space); 441 442 int virtio_transport_do_socket_init(struct vsock_sock *vsk, 443 struct vsock_sock *psk) 444 { 445 struct virtio_vsock_sock *vvs; 446 447 vvs = kzalloc(sizeof(*vvs), GFP_KERNEL); 448 if (!vvs) 449 return -ENOMEM; 450 451 vsk->trans = vvs; 452 vvs->vsk = vsk; 453 if (psk && psk->trans) { 454 struct virtio_vsock_sock *ptrans = psk->trans; 455 456 vvs->peer_buf_alloc = ptrans->peer_buf_alloc; 457 } 458 459 if (vsk->buffer_size > VIRTIO_VSOCK_MAX_BUF_SIZE) 460 vsk->buffer_size = VIRTIO_VSOCK_MAX_BUF_SIZE; 461 462 vvs->buf_alloc = vsk->buffer_size; 463 464 spin_lock_init(&vvs->rx_lock); 465 spin_lock_init(&vvs->tx_lock); 466 INIT_LIST_HEAD(&vvs->rx_queue); 467 468 return 0; 469 } 470 EXPORT_SYMBOL_GPL(virtio_transport_do_socket_init); 471 472 /* sk_lock held by the caller */ 473 void virtio_transport_notify_buffer_size(struct vsock_sock *vsk, u64 *val) 474 { 475 struct virtio_vsock_sock *vvs = vsk->trans; 476 477 if (*val > VIRTIO_VSOCK_MAX_BUF_SIZE) 478 *val = VIRTIO_VSOCK_MAX_BUF_SIZE; 479 480 vvs->buf_alloc = *val; 481 482 virtio_transport_send_credit_update(vsk, VIRTIO_VSOCK_TYPE_STREAM, 483 NULL); 484 } 485 EXPORT_SYMBOL_GPL(virtio_transport_notify_buffer_size); 486 487 int 488 virtio_transport_notify_poll_in(struct vsock_sock *vsk, 489 size_t target, 490 bool *data_ready_now) 491 { 492 if (vsock_stream_has_data(vsk)) 493 *data_ready_now = true; 494 else 495 *data_ready_now = false; 496 497 return 0; 498 } 499 EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_in); 500 501 int 502 virtio_transport_notify_poll_out(struct vsock_sock *vsk, 503 size_t target, 504 bool *space_avail_now) 505 { 506 s64 free_space; 507 508 free_space = vsock_stream_has_space(vsk); 509 if (free_space > 0) 510 *space_avail_now = true; 511 else if (free_space == 0) 512 *space_avail_now = false; 513 514 return 0; 515 } 516 EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_out); 517 518 int virtio_transport_notify_recv_init(struct vsock_sock *vsk, 519 size_t target, struct vsock_transport_recv_notify_data *data) 520 { 521 return 0; 522 } 523 EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_init); 524 525 int virtio_transport_notify_recv_pre_block(struct vsock_sock *vsk, 526 size_t target, struct vsock_transport_recv_notify_data *data) 527 { 528 return 0; 529 } 530 EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_block); 531 532 int virtio_transport_notify_recv_pre_dequeue(struct vsock_sock *vsk, 533 size_t target, struct vsock_transport_recv_notify_data *data) 534 { 535 return 0; 536 } 537 EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_dequeue); 538 539 int virtio_transport_notify_recv_post_dequeue(struct vsock_sock *vsk, 540 size_t target, ssize_t copied, bool data_read, 541 struct vsock_transport_recv_notify_data *data) 542 { 543 return 0; 544 } 545 EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_post_dequeue); 546 547 int virtio_transport_notify_send_init(struct vsock_sock *vsk, 548 struct vsock_transport_send_notify_data *data) 549 { 550 return 0; 551 } 552 EXPORT_SYMBOL_GPL(virtio_transport_notify_send_init); 553 554 int virtio_transport_notify_send_pre_block(struct vsock_sock *vsk, 555 struct vsock_transport_send_notify_data *data) 556 { 557 return 0; 558 } 559 EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_block); 560 561 int virtio_transport_notify_send_pre_enqueue(struct vsock_sock *vsk, 562 struct vsock_transport_send_notify_data *data) 563 { 564 return 0; 565 } 566 EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_enqueue); 567 568 int virtio_transport_notify_send_post_enqueue(struct vsock_sock *vsk, 569 ssize_t written, struct vsock_transport_send_notify_data *data) 570 { 571 return 0; 572 } 573 EXPORT_SYMBOL_GPL(virtio_transport_notify_send_post_enqueue); 574 575 u64 virtio_transport_stream_rcvhiwat(struct vsock_sock *vsk) 576 { 577 return vsk->buffer_size; 578 } 579 EXPORT_SYMBOL_GPL(virtio_transport_stream_rcvhiwat); 580 581 bool virtio_transport_stream_is_active(struct vsock_sock *vsk) 582 { 583 return true; 584 } 585 EXPORT_SYMBOL_GPL(virtio_transport_stream_is_active); 586 587 bool virtio_transport_stream_allow(u32 cid, u32 port) 588 { 589 return true; 590 } 591 EXPORT_SYMBOL_GPL(virtio_transport_stream_allow); 592 593 int virtio_transport_dgram_bind(struct vsock_sock *vsk, 594 struct sockaddr_vm *addr) 595 { 596 return -EOPNOTSUPP; 597 } 598 EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind); 599 600 bool virtio_transport_dgram_allow(u32 cid, u32 port) 601 { 602 return false; 603 } 604 EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow); 605 606 int virtio_transport_connect(struct vsock_sock *vsk) 607 { 608 struct virtio_vsock_pkt_info info = { 609 .op = VIRTIO_VSOCK_OP_REQUEST, 610 .type = VIRTIO_VSOCK_TYPE_STREAM, 611 .vsk = vsk, 612 }; 613 614 return virtio_transport_send_pkt_info(vsk, &info); 615 } 616 EXPORT_SYMBOL_GPL(virtio_transport_connect); 617 618 int virtio_transport_shutdown(struct vsock_sock *vsk, int mode) 619 { 620 struct virtio_vsock_pkt_info info = { 621 .op = VIRTIO_VSOCK_OP_SHUTDOWN, 622 .type = VIRTIO_VSOCK_TYPE_STREAM, 623 .flags = (mode & RCV_SHUTDOWN ? 624 VIRTIO_VSOCK_SHUTDOWN_RCV : 0) | 625 (mode & SEND_SHUTDOWN ? 626 VIRTIO_VSOCK_SHUTDOWN_SEND : 0), 627 .vsk = vsk, 628 }; 629 630 return virtio_transport_send_pkt_info(vsk, &info); 631 } 632 EXPORT_SYMBOL_GPL(virtio_transport_shutdown); 633 634 int 635 virtio_transport_dgram_enqueue(struct vsock_sock *vsk, 636 struct sockaddr_vm *remote_addr, 637 struct msghdr *msg, 638 size_t dgram_len) 639 { 640 return -EOPNOTSUPP; 641 } 642 EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue); 643 644 ssize_t 645 virtio_transport_stream_enqueue(struct vsock_sock *vsk, 646 struct msghdr *msg, 647 size_t len) 648 { 649 struct virtio_vsock_pkt_info info = { 650 .op = VIRTIO_VSOCK_OP_RW, 651 .type = VIRTIO_VSOCK_TYPE_STREAM, 652 .msg = msg, 653 .pkt_len = len, 654 .vsk = vsk, 655 }; 656 657 return virtio_transport_send_pkt_info(vsk, &info); 658 } 659 EXPORT_SYMBOL_GPL(virtio_transport_stream_enqueue); 660 661 void virtio_transport_destruct(struct vsock_sock *vsk) 662 { 663 struct virtio_vsock_sock *vvs = vsk->trans; 664 665 kfree(vvs); 666 } 667 EXPORT_SYMBOL_GPL(virtio_transport_destruct); 668 669 static int virtio_transport_reset(struct vsock_sock *vsk, 670 struct virtio_vsock_pkt *pkt) 671 { 672 struct virtio_vsock_pkt_info info = { 673 .op = VIRTIO_VSOCK_OP_RST, 674 .type = VIRTIO_VSOCK_TYPE_STREAM, 675 .reply = !!pkt, 676 .vsk = vsk, 677 }; 678 679 /* Send RST only if the original pkt is not a RST pkt */ 680 if (pkt && le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST) 681 return 0; 682 683 return virtio_transport_send_pkt_info(vsk, &info); 684 } 685 686 /* Normally packets are associated with a socket. There may be no socket if an 687 * attempt was made to connect to a socket that does not exist. 688 */ 689 static int virtio_transport_reset_no_sock(const struct virtio_transport *t, 690 struct virtio_vsock_pkt *pkt) 691 { 692 struct virtio_vsock_pkt *reply; 693 struct virtio_vsock_pkt_info info = { 694 .op = VIRTIO_VSOCK_OP_RST, 695 .type = le16_to_cpu(pkt->hdr.type), 696 .reply = true, 697 }; 698 699 /* Send RST only if the original pkt is not a RST pkt */ 700 if (le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST) 701 return 0; 702 703 reply = virtio_transport_alloc_pkt(&info, 0, 704 le64_to_cpu(pkt->hdr.dst_cid), 705 le32_to_cpu(pkt->hdr.dst_port), 706 le64_to_cpu(pkt->hdr.src_cid), 707 le32_to_cpu(pkt->hdr.src_port)); 708 if (!reply) 709 return -ENOMEM; 710 711 if (!t) { 712 virtio_transport_free_pkt(reply); 713 return -ENOTCONN; 714 } 715 716 return t->send_pkt(reply); 717 } 718 719 static void virtio_transport_wait_close(struct sock *sk, long timeout) 720 { 721 if (timeout) { 722 DEFINE_WAIT_FUNC(wait, woken_wake_function); 723 724 add_wait_queue(sk_sleep(sk), &wait); 725 726 do { 727 if (sk_wait_event(sk, &timeout, 728 sock_flag(sk, SOCK_DONE), &wait)) 729 break; 730 } while (!signal_pending(current) && timeout); 731 732 remove_wait_queue(sk_sleep(sk), &wait); 733 } 734 } 735 736 static void virtio_transport_do_close(struct vsock_sock *vsk, 737 bool cancel_timeout) 738 { 739 struct sock *sk = sk_vsock(vsk); 740 741 sock_set_flag(sk, SOCK_DONE); 742 vsk->peer_shutdown = SHUTDOWN_MASK; 743 if (vsock_stream_has_data(vsk) <= 0) 744 sk->sk_state = TCP_CLOSING; 745 sk->sk_state_change(sk); 746 747 if (vsk->close_work_scheduled && 748 (!cancel_timeout || cancel_delayed_work(&vsk->close_work))) { 749 vsk->close_work_scheduled = false; 750 751 vsock_remove_sock(vsk); 752 753 /* Release refcnt obtained when we scheduled the timeout */ 754 sock_put(sk); 755 } 756 } 757 758 static void virtio_transport_close_timeout(struct work_struct *work) 759 { 760 struct vsock_sock *vsk = 761 container_of(work, struct vsock_sock, close_work.work); 762 struct sock *sk = sk_vsock(vsk); 763 764 sock_hold(sk); 765 lock_sock(sk); 766 767 if (!sock_flag(sk, SOCK_DONE)) { 768 (void)virtio_transport_reset(vsk, NULL); 769 770 virtio_transport_do_close(vsk, false); 771 } 772 773 vsk->close_work_scheduled = false; 774 775 release_sock(sk); 776 sock_put(sk); 777 } 778 779 /* User context, vsk->sk is locked */ 780 static bool virtio_transport_close(struct vsock_sock *vsk) 781 { 782 struct sock *sk = &vsk->sk; 783 784 if (!(sk->sk_state == TCP_ESTABLISHED || 785 sk->sk_state == TCP_CLOSING)) 786 return true; 787 788 /* Already received SHUTDOWN from peer, reply with RST */ 789 if ((vsk->peer_shutdown & SHUTDOWN_MASK) == SHUTDOWN_MASK) { 790 (void)virtio_transport_reset(vsk, NULL); 791 return true; 792 } 793 794 if ((sk->sk_shutdown & SHUTDOWN_MASK) != SHUTDOWN_MASK) 795 (void)virtio_transport_shutdown(vsk, SHUTDOWN_MASK); 796 797 if (sock_flag(sk, SOCK_LINGER) && !(current->flags & PF_EXITING)) 798 virtio_transport_wait_close(sk, sk->sk_lingertime); 799 800 if (sock_flag(sk, SOCK_DONE)) { 801 return true; 802 } 803 804 sock_hold(sk); 805 INIT_DELAYED_WORK(&vsk->close_work, 806 virtio_transport_close_timeout); 807 vsk->close_work_scheduled = true; 808 schedule_delayed_work(&vsk->close_work, VSOCK_CLOSE_TIMEOUT); 809 return false; 810 } 811 812 void virtio_transport_release(struct vsock_sock *vsk) 813 { 814 struct virtio_vsock_sock *vvs = vsk->trans; 815 struct virtio_vsock_pkt *pkt, *tmp; 816 struct sock *sk = &vsk->sk; 817 bool remove_sock = true; 818 819 lock_sock_nested(sk, SINGLE_DEPTH_NESTING); 820 if (sk->sk_type == SOCK_STREAM) 821 remove_sock = virtio_transport_close(vsk); 822 823 list_for_each_entry_safe(pkt, tmp, &vvs->rx_queue, list) { 824 list_del(&pkt->list); 825 virtio_transport_free_pkt(pkt); 826 } 827 release_sock(sk); 828 829 if (remove_sock) 830 vsock_remove_sock(vsk); 831 } 832 EXPORT_SYMBOL_GPL(virtio_transport_release); 833 834 static int 835 virtio_transport_recv_connecting(struct sock *sk, 836 struct virtio_vsock_pkt *pkt) 837 { 838 struct vsock_sock *vsk = vsock_sk(sk); 839 int err; 840 int skerr; 841 842 switch (le16_to_cpu(pkt->hdr.op)) { 843 case VIRTIO_VSOCK_OP_RESPONSE: 844 sk->sk_state = TCP_ESTABLISHED; 845 sk->sk_socket->state = SS_CONNECTED; 846 vsock_insert_connected(vsk); 847 sk->sk_state_change(sk); 848 break; 849 case VIRTIO_VSOCK_OP_INVALID: 850 break; 851 case VIRTIO_VSOCK_OP_RST: 852 skerr = ECONNRESET; 853 err = 0; 854 goto destroy; 855 default: 856 skerr = EPROTO; 857 err = -EINVAL; 858 goto destroy; 859 } 860 return 0; 861 862 destroy: 863 virtio_transport_reset(vsk, pkt); 864 sk->sk_state = TCP_CLOSE; 865 sk->sk_err = skerr; 866 sk->sk_error_report(sk); 867 return err; 868 } 869 870 static void 871 virtio_transport_recv_enqueue(struct vsock_sock *vsk, 872 struct virtio_vsock_pkt *pkt) 873 { 874 struct virtio_vsock_sock *vvs = vsk->trans; 875 bool can_enqueue, free_pkt = false; 876 877 pkt->len = le32_to_cpu(pkt->hdr.len); 878 pkt->off = 0; 879 880 spin_lock_bh(&vvs->rx_lock); 881 882 can_enqueue = virtio_transport_inc_rx_pkt(vvs, pkt); 883 if (!can_enqueue) { 884 free_pkt = true; 885 goto out; 886 } 887 888 /* Try to copy small packets into the buffer of last packet queued, 889 * to avoid wasting memory queueing the entire buffer with a small 890 * payload. 891 */ 892 if (pkt->len <= GOOD_COPY_LEN && !list_empty(&vvs->rx_queue)) { 893 struct virtio_vsock_pkt *last_pkt; 894 895 last_pkt = list_last_entry(&vvs->rx_queue, 896 struct virtio_vsock_pkt, list); 897 898 /* If there is space in the last packet queued, we copy the 899 * new packet in its buffer. 900 */ 901 if (pkt->len <= last_pkt->buf_len - last_pkt->len) { 902 memcpy(last_pkt->buf + last_pkt->len, pkt->buf, 903 pkt->len); 904 last_pkt->len += pkt->len; 905 free_pkt = true; 906 goto out; 907 } 908 } 909 910 list_add_tail(&pkt->list, &vvs->rx_queue); 911 912 out: 913 spin_unlock_bh(&vvs->rx_lock); 914 if (free_pkt) 915 virtio_transport_free_pkt(pkt); 916 } 917 918 static int 919 virtio_transport_recv_connected(struct sock *sk, 920 struct virtio_vsock_pkt *pkt) 921 { 922 struct vsock_sock *vsk = vsock_sk(sk); 923 int err = 0; 924 925 switch (le16_to_cpu(pkt->hdr.op)) { 926 case VIRTIO_VSOCK_OP_RW: 927 virtio_transport_recv_enqueue(vsk, pkt); 928 sk->sk_data_ready(sk); 929 return err; 930 case VIRTIO_VSOCK_OP_CREDIT_UPDATE: 931 sk->sk_write_space(sk); 932 break; 933 case VIRTIO_VSOCK_OP_SHUTDOWN: 934 if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_RCV) 935 vsk->peer_shutdown |= RCV_SHUTDOWN; 936 if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_SEND) 937 vsk->peer_shutdown |= SEND_SHUTDOWN; 938 if (vsk->peer_shutdown == SHUTDOWN_MASK && 939 vsock_stream_has_data(vsk) <= 0 && 940 !sock_flag(sk, SOCK_DONE)) { 941 (void)virtio_transport_reset(vsk, NULL); 942 943 virtio_transport_do_close(vsk, true); 944 } 945 if (le32_to_cpu(pkt->hdr.flags)) 946 sk->sk_state_change(sk); 947 break; 948 case VIRTIO_VSOCK_OP_RST: 949 virtio_transport_do_close(vsk, true); 950 break; 951 default: 952 err = -EINVAL; 953 break; 954 } 955 956 virtio_transport_free_pkt(pkt); 957 return err; 958 } 959 960 static void 961 virtio_transport_recv_disconnecting(struct sock *sk, 962 struct virtio_vsock_pkt *pkt) 963 { 964 struct vsock_sock *vsk = vsock_sk(sk); 965 966 if (le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST) 967 virtio_transport_do_close(vsk, true); 968 } 969 970 static int 971 virtio_transport_send_response(struct vsock_sock *vsk, 972 struct virtio_vsock_pkt *pkt) 973 { 974 struct virtio_vsock_pkt_info info = { 975 .op = VIRTIO_VSOCK_OP_RESPONSE, 976 .type = VIRTIO_VSOCK_TYPE_STREAM, 977 .remote_cid = le64_to_cpu(pkt->hdr.src_cid), 978 .remote_port = le32_to_cpu(pkt->hdr.src_port), 979 .reply = true, 980 .vsk = vsk, 981 }; 982 983 return virtio_transport_send_pkt_info(vsk, &info); 984 } 985 986 static bool virtio_transport_space_update(struct sock *sk, 987 struct virtio_vsock_pkt *pkt) 988 { 989 struct vsock_sock *vsk = vsock_sk(sk); 990 struct virtio_vsock_sock *vvs = vsk->trans; 991 bool space_available; 992 993 /* Listener sockets are not associated with any transport, so we are 994 * not able to take the state to see if there is space available in the 995 * remote peer, but since they are only used to receive requests, we 996 * can assume that there is always space available in the other peer. 997 */ 998 if (!vvs) 999 return true; 1000 1001 /* buf_alloc and fwd_cnt is always included in the hdr */ 1002 spin_lock_bh(&vvs->tx_lock); 1003 vvs->peer_buf_alloc = le32_to_cpu(pkt->hdr.buf_alloc); 1004 vvs->peer_fwd_cnt = le32_to_cpu(pkt->hdr.fwd_cnt); 1005 space_available = virtio_transport_has_space(vsk); 1006 spin_unlock_bh(&vvs->tx_lock); 1007 return space_available; 1008 } 1009 1010 /* Handle server socket */ 1011 static int 1012 virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt, 1013 struct virtio_transport *t) 1014 { 1015 struct vsock_sock *vsk = vsock_sk(sk); 1016 struct vsock_sock *vchild; 1017 struct sock *child; 1018 int ret; 1019 1020 if (le16_to_cpu(pkt->hdr.op) != VIRTIO_VSOCK_OP_REQUEST) { 1021 virtio_transport_reset(vsk, pkt); 1022 return -EINVAL; 1023 } 1024 1025 if (sk_acceptq_is_full(sk)) { 1026 virtio_transport_reset(vsk, pkt); 1027 return -ENOMEM; 1028 } 1029 1030 child = vsock_create_connected(sk); 1031 if (!child) { 1032 virtio_transport_reset(vsk, pkt); 1033 return -ENOMEM; 1034 } 1035 1036 sk_acceptq_added(sk); 1037 1038 lock_sock_nested(child, SINGLE_DEPTH_NESTING); 1039 1040 child->sk_state = TCP_ESTABLISHED; 1041 1042 vchild = vsock_sk(child); 1043 vsock_addr_init(&vchild->local_addr, le64_to_cpu(pkt->hdr.dst_cid), 1044 le32_to_cpu(pkt->hdr.dst_port)); 1045 vsock_addr_init(&vchild->remote_addr, le64_to_cpu(pkt->hdr.src_cid), 1046 le32_to_cpu(pkt->hdr.src_port)); 1047 1048 ret = vsock_assign_transport(vchild, vsk); 1049 /* Transport assigned (looking at remote_addr) must be the same 1050 * where we received the request. 1051 */ 1052 if (ret || vchild->transport != &t->transport) { 1053 release_sock(child); 1054 virtio_transport_reset(vsk, pkt); 1055 sock_put(child); 1056 return ret; 1057 } 1058 1059 if (virtio_transport_space_update(child, pkt)) 1060 child->sk_write_space(child); 1061 1062 vsock_insert_connected(vchild); 1063 vsock_enqueue_accept(sk, child); 1064 virtio_transport_send_response(vchild, pkt); 1065 1066 release_sock(child); 1067 1068 sk->sk_data_ready(sk); 1069 return 0; 1070 } 1071 1072 /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex 1073 * lock. 1074 */ 1075 void virtio_transport_recv_pkt(struct virtio_transport *t, 1076 struct virtio_vsock_pkt *pkt) 1077 { 1078 struct sockaddr_vm src, dst; 1079 struct vsock_sock *vsk; 1080 struct sock *sk; 1081 bool space_available; 1082 1083 vsock_addr_init(&src, le64_to_cpu(pkt->hdr.src_cid), 1084 le32_to_cpu(pkt->hdr.src_port)); 1085 vsock_addr_init(&dst, le64_to_cpu(pkt->hdr.dst_cid), 1086 le32_to_cpu(pkt->hdr.dst_port)); 1087 1088 trace_virtio_transport_recv_pkt(src.svm_cid, src.svm_port, 1089 dst.svm_cid, dst.svm_port, 1090 le32_to_cpu(pkt->hdr.len), 1091 le16_to_cpu(pkt->hdr.type), 1092 le16_to_cpu(pkt->hdr.op), 1093 le32_to_cpu(pkt->hdr.flags), 1094 le32_to_cpu(pkt->hdr.buf_alloc), 1095 le32_to_cpu(pkt->hdr.fwd_cnt)); 1096 1097 if (le16_to_cpu(pkt->hdr.type) != VIRTIO_VSOCK_TYPE_STREAM) { 1098 (void)virtio_transport_reset_no_sock(t, pkt); 1099 goto free_pkt; 1100 } 1101 1102 /* The socket must be in connected or bound table 1103 * otherwise send reset back 1104 */ 1105 sk = vsock_find_connected_socket(&src, &dst); 1106 if (!sk) { 1107 sk = vsock_find_bound_socket(&dst); 1108 if (!sk) { 1109 (void)virtio_transport_reset_no_sock(t, pkt); 1110 goto free_pkt; 1111 } 1112 } 1113 1114 vsk = vsock_sk(sk); 1115 1116 space_available = virtio_transport_space_update(sk, pkt); 1117 1118 lock_sock(sk); 1119 1120 /* Update CID in case it has changed after a transport reset event */ 1121 vsk->local_addr.svm_cid = dst.svm_cid; 1122 1123 if (space_available) 1124 sk->sk_write_space(sk); 1125 1126 switch (sk->sk_state) { 1127 case TCP_LISTEN: 1128 virtio_transport_recv_listen(sk, pkt, t); 1129 virtio_transport_free_pkt(pkt); 1130 break; 1131 case TCP_SYN_SENT: 1132 virtio_transport_recv_connecting(sk, pkt); 1133 virtio_transport_free_pkt(pkt); 1134 break; 1135 case TCP_ESTABLISHED: 1136 virtio_transport_recv_connected(sk, pkt); 1137 break; 1138 case TCP_CLOSING: 1139 virtio_transport_recv_disconnecting(sk, pkt); 1140 virtio_transport_free_pkt(pkt); 1141 break; 1142 default: 1143 virtio_transport_free_pkt(pkt); 1144 break; 1145 } 1146 1147 release_sock(sk); 1148 1149 /* Release refcnt obtained when we fetched this socket out of the 1150 * bound or connected list. 1151 */ 1152 sock_put(sk); 1153 return; 1154 1155 free_pkt: 1156 virtio_transport_free_pkt(pkt); 1157 } 1158 EXPORT_SYMBOL_GPL(virtio_transport_recv_pkt); 1159 1160 void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt) 1161 { 1162 kfree(pkt->buf); 1163 kfree(pkt); 1164 } 1165 EXPORT_SYMBOL_GPL(virtio_transport_free_pkt); 1166 1167 MODULE_LICENSE("GPL v2"); 1168 MODULE_AUTHOR("Asias He"); 1169 MODULE_DESCRIPTION("common code for virtio vsock"); 1170