1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * common code for virtio vsock 4 * 5 * Copyright (C) 2013-2015 Red Hat, Inc. 6 * Author: Asias He <asias@redhat.com> 7 * Stefan Hajnoczi <stefanha@redhat.com> 8 */ 9 #include <linux/spinlock.h> 10 #include <linux/module.h> 11 #include <linux/sched/signal.h> 12 #include <linux/ctype.h> 13 #include <linux/list.h> 14 #include <linux/virtio.h> 15 #include <linux/virtio_ids.h> 16 #include <linux/virtio_config.h> 17 #include <linux/virtio_vsock.h> 18 #include <uapi/linux/vsockmon.h> 19 20 #include <net/sock.h> 21 #include <net/af_vsock.h> 22 23 #define CREATE_TRACE_POINTS 24 #include <trace/events/vsock_virtio_transport_common.h> 25 26 /* How long to wait for graceful shutdown of a connection */ 27 #define VSOCK_CLOSE_TIMEOUT (8 * HZ) 28 29 /* Threshold for detecting small packets to copy */ 30 #define GOOD_COPY_LEN 128 31 32 static const struct virtio_transport * 33 virtio_transport_get_ops(struct vsock_sock *vsk) 34 { 35 const struct vsock_transport *t = vsock_core_get_transport(vsk); 36 37 return container_of(t, struct virtio_transport, transport); 38 } 39 40 static struct virtio_vsock_pkt * 41 virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info, 42 size_t len, 43 u32 src_cid, 44 u32 src_port, 45 u32 dst_cid, 46 u32 dst_port) 47 { 48 struct virtio_vsock_pkt *pkt; 49 int err; 50 51 pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); 52 if (!pkt) 53 return NULL; 54 55 pkt->hdr.type = cpu_to_le16(info->type); 56 pkt->hdr.op = cpu_to_le16(info->op); 57 pkt->hdr.src_cid = cpu_to_le64(src_cid); 58 pkt->hdr.dst_cid = cpu_to_le64(dst_cid); 59 pkt->hdr.src_port = cpu_to_le32(src_port); 60 pkt->hdr.dst_port = cpu_to_le32(dst_port); 61 pkt->hdr.flags = cpu_to_le32(info->flags); 62 pkt->len = len; 63 pkt->hdr.len = cpu_to_le32(len); 64 pkt->reply = info->reply; 65 pkt->vsk = info->vsk; 66 67 if (info->msg && len > 0) { 68 pkt->buf = kmalloc(len, GFP_KERNEL); 69 if (!pkt->buf) 70 goto out_pkt; 71 72 pkt->buf_len = len; 73 74 err = memcpy_from_msg(pkt->buf, info->msg, len); 75 if (err) 76 goto out; 77 } 78 79 trace_virtio_transport_alloc_pkt(src_cid, src_port, 80 dst_cid, dst_port, 81 len, 82 info->type, 83 info->op, 84 info->flags); 85 86 return pkt; 87 88 out: 89 kfree(pkt->buf); 90 out_pkt: 91 kfree(pkt); 92 return NULL; 93 } 94 95 /* Packet capture */ 96 static struct sk_buff *virtio_transport_build_skb(void *opaque) 97 { 98 struct virtio_vsock_pkt *pkt = opaque; 99 struct af_vsockmon_hdr *hdr; 100 struct sk_buff *skb; 101 size_t payload_len; 102 void *payload_buf; 103 104 /* A packet could be split to fit the RX buffer, so we can retrieve 105 * the payload length from the header and the buffer pointer taking 106 * care of the offset in the original packet. 107 */ 108 payload_len = le32_to_cpu(pkt->hdr.len); 109 payload_buf = pkt->buf + pkt->off; 110 111 skb = alloc_skb(sizeof(*hdr) + sizeof(pkt->hdr) + payload_len, 112 GFP_ATOMIC); 113 if (!skb) 114 return NULL; 115 116 hdr = skb_put(skb, sizeof(*hdr)); 117 118 /* pkt->hdr is little-endian so no need to byteswap here */ 119 hdr->src_cid = pkt->hdr.src_cid; 120 hdr->src_port = pkt->hdr.src_port; 121 hdr->dst_cid = pkt->hdr.dst_cid; 122 hdr->dst_port = pkt->hdr.dst_port; 123 124 hdr->transport = cpu_to_le16(AF_VSOCK_TRANSPORT_VIRTIO); 125 hdr->len = cpu_to_le16(sizeof(pkt->hdr)); 126 memset(hdr->reserved, 0, sizeof(hdr->reserved)); 127 128 switch (le16_to_cpu(pkt->hdr.op)) { 129 case VIRTIO_VSOCK_OP_REQUEST: 130 case VIRTIO_VSOCK_OP_RESPONSE: 131 hdr->op = cpu_to_le16(AF_VSOCK_OP_CONNECT); 132 break; 133 case VIRTIO_VSOCK_OP_RST: 134 case VIRTIO_VSOCK_OP_SHUTDOWN: 135 hdr->op = cpu_to_le16(AF_VSOCK_OP_DISCONNECT); 136 break; 137 case VIRTIO_VSOCK_OP_RW: 138 hdr->op = cpu_to_le16(AF_VSOCK_OP_PAYLOAD); 139 break; 140 case VIRTIO_VSOCK_OP_CREDIT_UPDATE: 141 case VIRTIO_VSOCK_OP_CREDIT_REQUEST: 142 hdr->op = cpu_to_le16(AF_VSOCK_OP_CONTROL); 143 break; 144 default: 145 hdr->op = cpu_to_le16(AF_VSOCK_OP_UNKNOWN); 146 break; 147 } 148 149 skb_put_data(skb, &pkt->hdr, sizeof(pkt->hdr)); 150 151 if (payload_len) { 152 skb_put_data(skb, payload_buf, payload_len); 153 } 154 155 return skb; 156 } 157 158 void virtio_transport_deliver_tap_pkt(struct virtio_vsock_pkt *pkt) 159 { 160 vsock_deliver_tap(virtio_transport_build_skb, pkt); 161 } 162 EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt); 163 164 static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, 165 struct virtio_vsock_pkt_info *info) 166 { 167 u32 src_cid, src_port, dst_cid, dst_port; 168 struct virtio_vsock_sock *vvs; 169 struct virtio_vsock_pkt *pkt; 170 u32 pkt_len = info->pkt_len; 171 172 src_cid = virtio_transport_get_ops(vsk)->transport.get_local_cid(); 173 src_port = vsk->local_addr.svm_port; 174 if (!info->remote_cid) { 175 dst_cid = vsk->remote_addr.svm_cid; 176 dst_port = vsk->remote_addr.svm_port; 177 } else { 178 dst_cid = info->remote_cid; 179 dst_port = info->remote_port; 180 } 181 182 vvs = vsk->trans; 183 184 /* we can send less than pkt_len bytes */ 185 if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) 186 pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; 187 188 /* virtio_transport_get_credit might return less than pkt_len credit */ 189 pkt_len = virtio_transport_get_credit(vvs, pkt_len); 190 191 /* Do not send zero length OP_RW pkt */ 192 if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) 193 return pkt_len; 194 195 pkt = virtio_transport_alloc_pkt(info, pkt_len, 196 src_cid, src_port, 197 dst_cid, dst_port); 198 if (!pkt) { 199 virtio_transport_put_credit(vvs, pkt_len); 200 return -ENOMEM; 201 } 202 203 virtio_transport_inc_tx_pkt(vvs, pkt); 204 205 return virtio_transport_get_ops(vsk)->send_pkt(pkt); 206 } 207 208 static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs, 209 struct virtio_vsock_pkt *pkt) 210 { 211 if (vvs->rx_bytes + pkt->len > vvs->buf_alloc) 212 return false; 213 214 vvs->rx_bytes += pkt->len; 215 return true; 216 } 217 218 static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs, 219 struct virtio_vsock_pkt *pkt) 220 { 221 vvs->rx_bytes -= pkt->len; 222 vvs->fwd_cnt += pkt->len; 223 } 224 225 void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct virtio_vsock_pkt *pkt) 226 { 227 spin_lock_bh(&vvs->rx_lock); 228 vvs->last_fwd_cnt = vvs->fwd_cnt; 229 pkt->hdr.fwd_cnt = cpu_to_le32(vvs->fwd_cnt); 230 pkt->hdr.buf_alloc = cpu_to_le32(vvs->buf_alloc); 231 spin_unlock_bh(&vvs->rx_lock); 232 } 233 EXPORT_SYMBOL_GPL(virtio_transport_inc_tx_pkt); 234 235 u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 credit) 236 { 237 u32 ret; 238 239 spin_lock_bh(&vvs->tx_lock); 240 ret = vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt); 241 if (ret > credit) 242 ret = credit; 243 vvs->tx_cnt += ret; 244 spin_unlock_bh(&vvs->tx_lock); 245 246 return ret; 247 } 248 EXPORT_SYMBOL_GPL(virtio_transport_get_credit); 249 250 void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit) 251 { 252 spin_lock_bh(&vvs->tx_lock); 253 vvs->tx_cnt -= credit; 254 spin_unlock_bh(&vvs->tx_lock); 255 } 256 EXPORT_SYMBOL_GPL(virtio_transport_put_credit); 257 258 static int virtio_transport_send_credit_update(struct vsock_sock *vsk, 259 int type, 260 struct virtio_vsock_hdr *hdr) 261 { 262 struct virtio_vsock_pkt_info info = { 263 .op = VIRTIO_VSOCK_OP_CREDIT_UPDATE, 264 .type = type, 265 .vsk = vsk, 266 }; 267 268 return virtio_transport_send_pkt_info(vsk, &info); 269 } 270 271 static ssize_t 272 virtio_transport_stream_do_peek(struct vsock_sock *vsk, 273 struct msghdr *msg, 274 size_t len) 275 { 276 struct virtio_vsock_sock *vvs = vsk->trans; 277 struct virtio_vsock_pkt *pkt; 278 size_t bytes, total = 0, off; 279 int err = -EFAULT; 280 281 spin_lock_bh(&vvs->rx_lock); 282 283 list_for_each_entry(pkt, &vvs->rx_queue, list) { 284 off = pkt->off; 285 286 if (total == len) 287 break; 288 289 while (total < len && off < pkt->len) { 290 bytes = len - total; 291 if (bytes > pkt->len - off) 292 bytes = pkt->len - off; 293 294 /* sk_lock is held by caller so no one else can dequeue. 295 * Unlock rx_lock since memcpy_to_msg() may sleep. 296 */ 297 spin_unlock_bh(&vvs->rx_lock); 298 299 err = memcpy_to_msg(msg, pkt->buf + off, bytes); 300 if (err) 301 goto out; 302 303 spin_lock_bh(&vvs->rx_lock); 304 305 total += bytes; 306 off += bytes; 307 } 308 } 309 310 spin_unlock_bh(&vvs->rx_lock); 311 312 return total; 313 314 out: 315 if (total) 316 err = total; 317 return err; 318 } 319 320 static ssize_t 321 virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, 322 struct msghdr *msg, 323 size_t len) 324 { 325 struct virtio_vsock_sock *vvs = vsk->trans; 326 struct virtio_vsock_pkt *pkt; 327 size_t bytes, total = 0; 328 u32 free_space; 329 int err = -EFAULT; 330 331 spin_lock_bh(&vvs->rx_lock); 332 while (total < len && !list_empty(&vvs->rx_queue)) { 333 pkt = list_first_entry(&vvs->rx_queue, 334 struct virtio_vsock_pkt, list); 335 336 bytes = len - total; 337 if (bytes > pkt->len - pkt->off) 338 bytes = pkt->len - pkt->off; 339 340 /* sk_lock is held by caller so no one else can dequeue. 341 * Unlock rx_lock since memcpy_to_msg() may sleep. 342 */ 343 spin_unlock_bh(&vvs->rx_lock); 344 345 err = memcpy_to_msg(msg, pkt->buf + pkt->off, bytes); 346 if (err) 347 goto out; 348 349 spin_lock_bh(&vvs->rx_lock); 350 351 total += bytes; 352 pkt->off += bytes; 353 if (pkt->off == pkt->len) { 354 virtio_transport_dec_rx_pkt(vvs, pkt); 355 list_del(&pkt->list); 356 virtio_transport_free_pkt(pkt); 357 } 358 } 359 360 free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs->last_fwd_cnt); 361 362 spin_unlock_bh(&vvs->rx_lock); 363 364 /* To reduce the number of credit update messages, 365 * don't update credits as long as lots of space is available. 366 * Note: the limit chosen here is arbitrary. Setting the limit 367 * too high causes extra messages. Too low causes transmitter 368 * stalls. As stalls are in theory more expensive than extra 369 * messages, we set the limit to a high value. TODO: experiment 370 * with different values. 371 */ 372 if (free_space < VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) { 373 virtio_transport_send_credit_update(vsk, 374 VIRTIO_VSOCK_TYPE_STREAM, 375 NULL); 376 } 377 378 return total; 379 380 out: 381 if (total) 382 err = total; 383 return err; 384 } 385 386 ssize_t 387 virtio_transport_stream_dequeue(struct vsock_sock *vsk, 388 struct msghdr *msg, 389 size_t len, int flags) 390 { 391 if (flags & MSG_PEEK) 392 return virtio_transport_stream_do_peek(vsk, msg, len); 393 else 394 return virtio_transport_stream_do_dequeue(vsk, msg, len); 395 } 396 EXPORT_SYMBOL_GPL(virtio_transport_stream_dequeue); 397 398 int 399 virtio_transport_dgram_dequeue(struct vsock_sock *vsk, 400 struct msghdr *msg, 401 size_t len, int flags) 402 { 403 return -EOPNOTSUPP; 404 } 405 EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue); 406 407 s64 virtio_transport_stream_has_data(struct vsock_sock *vsk) 408 { 409 struct virtio_vsock_sock *vvs = vsk->trans; 410 s64 bytes; 411 412 spin_lock_bh(&vvs->rx_lock); 413 bytes = vvs->rx_bytes; 414 spin_unlock_bh(&vvs->rx_lock); 415 416 return bytes; 417 } 418 EXPORT_SYMBOL_GPL(virtio_transport_stream_has_data); 419 420 static s64 virtio_transport_has_space(struct vsock_sock *vsk) 421 { 422 struct virtio_vsock_sock *vvs = vsk->trans; 423 s64 bytes; 424 425 bytes = vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt); 426 if (bytes < 0) 427 bytes = 0; 428 429 return bytes; 430 } 431 432 s64 virtio_transport_stream_has_space(struct vsock_sock *vsk) 433 { 434 struct virtio_vsock_sock *vvs = vsk->trans; 435 s64 bytes; 436 437 spin_lock_bh(&vvs->tx_lock); 438 bytes = virtio_transport_has_space(vsk); 439 spin_unlock_bh(&vvs->tx_lock); 440 441 return bytes; 442 } 443 EXPORT_SYMBOL_GPL(virtio_transport_stream_has_space); 444 445 int virtio_transport_do_socket_init(struct vsock_sock *vsk, 446 struct vsock_sock *psk) 447 { 448 struct virtio_vsock_sock *vvs; 449 450 vvs = kzalloc(sizeof(*vvs), GFP_KERNEL); 451 if (!vvs) 452 return -ENOMEM; 453 454 vsk->trans = vvs; 455 vvs->vsk = vsk; 456 if (psk && psk->trans) { 457 struct virtio_vsock_sock *ptrans = psk->trans; 458 459 vvs->peer_buf_alloc = ptrans->peer_buf_alloc; 460 } 461 462 if (vsk->buffer_size > VIRTIO_VSOCK_MAX_BUF_SIZE) 463 vsk->buffer_size = VIRTIO_VSOCK_MAX_BUF_SIZE; 464 465 vvs->buf_alloc = vsk->buffer_size; 466 467 spin_lock_init(&vvs->rx_lock); 468 spin_lock_init(&vvs->tx_lock); 469 INIT_LIST_HEAD(&vvs->rx_queue); 470 471 return 0; 472 } 473 EXPORT_SYMBOL_GPL(virtio_transport_do_socket_init); 474 475 /* sk_lock held by the caller */ 476 void virtio_transport_notify_buffer_size(struct vsock_sock *vsk, u64 *val) 477 { 478 struct virtio_vsock_sock *vvs = vsk->trans; 479 480 if (*val > VIRTIO_VSOCK_MAX_BUF_SIZE) 481 *val = VIRTIO_VSOCK_MAX_BUF_SIZE; 482 483 vvs->buf_alloc = *val; 484 485 virtio_transport_send_credit_update(vsk, VIRTIO_VSOCK_TYPE_STREAM, 486 NULL); 487 } 488 EXPORT_SYMBOL_GPL(virtio_transport_notify_buffer_size); 489 490 int 491 virtio_transport_notify_poll_in(struct vsock_sock *vsk, 492 size_t target, 493 bool *data_ready_now) 494 { 495 if (vsock_stream_has_data(vsk)) 496 *data_ready_now = true; 497 else 498 *data_ready_now = false; 499 500 return 0; 501 } 502 EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_in); 503 504 int 505 virtio_transport_notify_poll_out(struct vsock_sock *vsk, 506 size_t target, 507 bool *space_avail_now) 508 { 509 s64 free_space; 510 511 free_space = vsock_stream_has_space(vsk); 512 if (free_space > 0) 513 *space_avail_now = true; 514 else if (free_space == 0) 515 *space_avail_now = false; 516 517 return 0; 518 } 519 EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_out); 520 521 int virtio_transport_notify_recv_init(struct vsock_sock *vsk, 522 size_t target, struct vsock_transport_recv_notify_data *data) 523 { 524 return 0; 525 } 526 EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_init); 527 528 int virtio_transport_notify_recv_pre_block(struct vsock_sock *vsk, 529 size_t target, struct vsock_transport_recv_notify_data *data) 530 { 531 return 0; 532 } 533 EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_block); 534 535 int virtio_transport_notify_recv_pre_dequeue(struct vsock_sock *vsk, 536 size_t target, struct vsock_transport_recv_notify_data *data) 537 { 538 return 0; 539 } 540 EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_dequeue); 541 542 int virtio_transport_notify_recv_post_dequeue(struct vsock_sock *vsk, 543 size_t target, ssize_t copied, bool data_read, 544 struct vsock_transport_recv_notify_data *data) 545 { 546 return 0; 547 } 548 EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_post_dequeue); 549 550 int virtio_transport_notify_send_init(struct vsock_sock *vsk, 551 struct vsock_transport_send_notify_data *data) 552 { 553 return 0; 554 } 555 EXPORT_SYMBOL_GPL(virtio_transport_notify_send_init); 556 557 int virtio_transport_notify_send_pre_block(struct vsock_sock *vsk, 558 struct vsock_transport_send_notify_data *data) 559 { 560 return 0; 561 } 562 EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_block); 563 564 int virtio_transport_notify_send_pre_enqueue(struct vsock_sock *vsk, 565 struct vsock_transport_send_notify_data *data) 566 { 567 return 0; 568 } 569 EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_enqueue); 570 571 int virtio_transport_notify_send_post_enqueue(struct vsock_sock *vsk, 572 ssize_t written, struct vsock_transport_send_notify_data *data) 573 { 574 return 0; 575 } 576 EXPORT_SYMBOL_GPL(virtio_transport_notify_send_post_enqueue); 577 578 u64 virtio_transport_stream_rcvhiwat(struct vsock_sock *vsk) 579 { 580 return vsk->buffer_size; 581 } 582 EXPORT_SYMBOL_GPL(virtio_transport_stream_rcvhiwat); 583 584 bool virtio_transport_stream_is_active(struct vsock_sock *vsk) 585 { 586 return true; 587 } 588 EXPORT_SYMBOL_GPL(virtio_transport_stream_is_active); 589 590 bool virtio_transport_stream_allow(u32 cid, u32 port) 591 { 592 return true; 593 } 594 EXPORT_SYMBOL_GPL(virtio_transport_stream_allow); 595 596 int virtio_transport_dgram_bind(struct vsock_sock *vsk, 597 struct sockaddr_vm *addr) 598 { 599 return -EOPNOTSUPP; 600 } 601 EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind); 602 603 bool virtio_transport_dgram_allow(u32 cid, u32 port) 604 { 605 return false; 606 } 607 EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow); 608 609 int virtio_transport_connect(struct vsock_sock *vsk) 610 { 611 struct virtio_vsock_pkt_info info = { 612 .op = VIRTIO_VSOCK_OP_REQUEST, 613 .type = VIRTIO_VSOCK_TYPE_STREAM, 614 .vsk = vsk, 615 }; 616 617 return virtio_transport_send_pkt_info(vsk, &info); 618 } 619 EXPORT_SYMBOL_GPL(virtio_transport_connect); 620 621 int virtio_transport_shutdown(struct vsock_sock *vsk, int mode) 622 { 623 struct virtio_vsock_pkt_info info = { 624 .op = VIRTIO_VSOCK_OP_SHUTDOWN, 625 .type = VIRTIO_VSOCK_TYPE_STREAM, 626 .flags = (mode & RCV_SHUTDOWN ? 627 VIRTIO_VSOCK_SHUTDOWN_RCV : 0) | 628 (mode & SEND_SHUTDOWN ? 629 VIRTIO_VSOCK_SHUTDOWN_SEND : 0), 630 .vsk = vsk, 631 }; 632 633 return virtio_transport_send_pkt_info(vsk, &info); 634 } 635 EXPORT_SYMBOL_GPL(virtio_transport_shutdown); 636 637 int 638 virtio_transport_dgram_enqueue(struct vsock_sock *vsk, 639 struct sockaddr_vm *remote_addr, 640 struct msghdr *msg, 641 size_t dgram_len) 642 { 643 return -EOPNOTSUPP; 644 } 645 EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue); 646 647 ssize_t 648 virtio_transport_stream_enqueue(struct vsock_sock *vsk, 649 struct msghdr *msg, 650 size_t len) 651 { 652 struct virtio_vsock_pkt_info info = { 653 .op = VIRTIO_VSOCK_OP_RW, 654 .type = VIRTIO_VSOCK_TYPE_STREAM, 655 .msg = msg, 656 .pkt_len = len, 657 .vsk = vsk, 658 }; 659 660 return virtio_transport_send_pkt_info(vsk, &info); 661 } 662 EXPORT_SYMBOL_GPL(virtio_transport_stream_enqueue); 663 664 void virtio_transport_destruct(struct vsock_sock *vsk) 665 { 666 struct virtio_vsock_sock *vvs = vsk->trans; 667 668 kfree(vvs); 669 } 670 EXPORT_SYMBOL_GPL(virtio_transport_destruct); 671 672 static int virtio_transport_reset(struct vsock_sock *vsk, 673 struct virtio_vsock_pkt *pkt) 674 { 675 struct virtio_vsock_pkt_info info = { 676 .op = VIRTIO_VSOCK_OP_RST, 677 .type = VIRTIO_VSOCK_TYPE_STREAM, 678 .reply = !!pkt, 679 .vsk = vsk, 680 }; 681 682 /* Send RST only if the original pkt is not a RST pkt */ 683 if (pkt && le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST) 684 return 0; 685 686 return virtio_transport_send_pkt_info(vsk, &info); 687 } 688 689 /* Normally packets are associated with a socket. There may be no socket if an 690 * attempt was made to connect to a socket that does not exist. 691 */ 692 static int virtio_transport_reset_no_sock(const struct virtio_transport *t, 693 struct virtio_vsock_pkt *pkt) 694 { 695 struct virtio_vsock_pkt *reply; 696 struct virtio_vsock_pkt_info info = { 697 .op = VIRTIO_VSOCK_OP_RST, 698 .type = le16_to_cpu(pkt->hdr.type), 699 .reply = true, 700 }; 701 702 /* Send RST only if the original pkt is not a RST pkt */ 703 if (le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST) 704 return 0; 705 706 reply = virtio_transport_alloc_pkt(&info, 0, 707 le64_to_cpu(pkt->hdr.dst_cid), 708 le32_to_cpu(pkt->hdr.dst_port), 709 le64_to_cpu(pkt->hdr.src_cid), 710 le32_to_cpu(pkt->hdr.src_port)); 711 if (!reply) 712 return -ENOMEM; 713 714 if (!t) { 715 virtio_transport_free_pkt(reply); 716 return -ENOTCONN; 717 } 718 719 return t->send_pkt(reply); 720 } 721 722 static void virtio_transport_wait_close(struct sock *sk, long timeout) 723 { 724 if (timeout) { 725 DEFINE_WAIT_FUNC(wait, woken_wake_function); 726 727 add_wait_queue(sk_sleep(sk), &wait); 728 729 do { 730 if (sk_wait_event(sk, &timeout, 731 sock_flag(sk, SOCK_DONE), &wait)) 732 break; 733 } while (!signal_pending(current) && timeout); 734 735 remove_wait_queue(sk_sleep(sk), &wait); 736 } 737 } 738 739 static void virtio_transport_do_close(struct vsock_sock *vsk, 740 bool cancel_timeout) 741 { 742 struct sock *sk = sk_vsock(vsk); 743 744 sock_set_flag(sk, SOCK_DONE); 745 vsk->peer_shutdown = SHUTDOWN_MASK; 746 if (vsock_stream_has_data(vsk) <= 0) 747 sk->sk_state = TCP_CLOSING; 748 sk->sk_state_change(sk); 749 750 if (vsk->close_work_scheduled && 751 (!cancel_timeout || cancel_delayed_work(&vsk->close_work))) { 752 vsk->close_work_scheduled = false; 753 754 vsock_remove_sock(vsk); 755 756 /* Release refcnt obtained when we scheduled the timeout */ 757 sock_put(sk); 758 } 759 } 760 761 static void virtio_transport_close_timeout(struct work_struct *work) 762 { 763 struct vsock_sock *vsk = 764 container_of(work, struct vsock_sock, close_work.work); 765 struct sock *sk = sk_vsock(vsk); 766 767 sock_hold(sk); 768 lock_sock(sk); 769 770 if (!sock_flag(sk, SOCK_DONE)) { 771 (void)virtio_transport_reset(vsk, NULL); 772 773 virtio_transport_do_close(vsk, false); 774 } 775 776 vsk->close_work_scheduled = false; 777 778 release_sock(sk); 779 sock_put(sk); 780 } 781 782 /* User context, vsk->sk is locked */ 783 static bool virtio_transport_close(struct vsock_sock *vsk) 784 { 785 struct sock *sk = &vsk->sk; 786 787 if (!(sk->sk_state == TCP_ESTABLISHED || 788 sk->sk_state == TCP_CLOSING)) 789 return true; 790 791 /* Already received SHUTDOWN from peer, reply with RST */ 792 if ((vsk->peer_shutdown & SHUTDOWN_MASK) == SHUTDOWN_MASK) { 793 (void)virtio_transport_reset(vsk, NULL); 794 return true; 795 } 796 797 if ((sk->sk_shutdown & SHUTDOWN_MASK) != SHUTDOWN_MASK) 798 (void)virtio_transport_shutdown(vsk, SHUTDOWN_MASK); 799 800 if (sock_flag(sk, SOCK_LINGER) && !(current->flags & PF_EXITING)) 801 virtio_transport_wait_close(sk, sk->sk_lingertime); 802 803 if (sock_flag(sk, SOCK_DONE)) { 804 return true; 805 } 806 807 sock_hold(sk); 808 INIT_DELAYED_WORK(&vsk->close_work, 809 virtio_transport_close_timeout); 810 vsk->close_work_scheduled = true; 811 schedule_delayed_work(&vsk->close_work, VSOCK_CLOSE_TIMEOUT); 812 return false; 813 } 814 815 void virtio_transport_release(struct vsock_sock *vsk) 816 { 817 struct virtio_vsock_sock *vvs = vsk->trans; 818 struct virtio_vsock_pkt *pkt, *tmp; 819 struct sock *sk = &vsk->sk; 820 bool remove_sock = true; 821 822 lock_sock_nested(sk, SINGLE_DEPTH_NESTING); 823 if (sk->sk_type == SOCK_STREAM) 824 remove_sock = virtio_transport_close(vsk); 825 826 list_for_each_entry_safe(pkt, tmp, &vvs->rx_queue, list) { 827 list_del(&pkt->list); 828 virtio_transport_free_pkt(pkt); 829 } 830 release_sock(sk); 831 832 if (remove_sock) 833 vsock_remove_sock(vsk); 834 } 835 EXPORT_SYMBOL_GPL(virtio_transport_release); 836 837 static int 838 virtio_transport_recv_connecting(struct sock *sk, 839 struct virtio_vsock_pkt *pkt) 840 { 841 struct vsock_sock *vsk = vsock_sk(sk); 842 int err; 843 int skerr; 844 845 switch (le16_to_cpu(pkt->hdr.op)) { 846 case VIRTIO_VSOCK_OP_RESPONSE: 847 sk->sk_state = TCP_ESTABLISHED; 848 sk->sk_socket->state = SS_CONNECTED; 849 vsock_insert_connected(vsk); 850 sk->sk_state_change(sk); 851 break; 852 case VIRTIO_VSOCK_OP_INVALID: 853 break; 854 case VIRTIO_VSOCK_OP_RST: 855 skerr = ECONNRESET; 856 err = 0; 857 goto destroy; 858 default: 859 skerr = EPROTO; 860 err = -EINVAL; 861 goto destroy; 862 } 863 return 0; 864 865 destroy: 866 virtio_transport_reset(vsk, pkt); 867 sk->sk_state = TCP_CLOSE; 868 sk->sk_err = skerr; 869 sk->sk_error_report(sk); 870 return err; 871 } 872 873 static void 874 virtio_transport_recv_enqueue(struct vsock_sock *vsk, 875 struct virtio_vsock_pkt *pkt) 876 { 877 struct virtio_vsock_sock *vvs = vsk->trans; 878 bool can_enqueue, free_pkt = false; 879 880 pkt->len = le32_to_cpu(pkt->hdr.len); 881 pkt->off = 0; 882 883 spin_lock_bh(&vvs->rx_lock); 884 885 can_enqueue = virtio_transport_inc_rx_pkt(vvs, pkt); 886 if (!can_enqueue) { 887 free_pkt = true; 888 goto out; 889 } 890 891 /* Try to copy small packets into the buffer of last packet queued, 892 * to avoid wasting memory queueing the entire buffer with a small 893 * payload. 894 */ 895 if (pkt->len <= GOOD_COPY_LEN && !list_empty(&vvs->rx_queue)) { 896 struct virtio_vsock_pkt *last_pkt; 897 898 last_pkt = list_last_entry(&vvs->rx_queue, 899 struct virtio_vsock_pkt, list); 900 901 /* If there is space in the last packet queued, we copy the 902 * new packet in its buffer. 903 */ 904 if (pkt->len <= last_pkt->buf_len - last_pkt->len) { 905 memcpy(last_pkt->buf + last_pkt->len, pkt->buf, 906 pkt->len); 907 last_pkt->len += pkt->len; 908 free_pkt = true; 909 goto out; 910 } 911 } 912 913 list_add_tail(&pkt->list, &vvs->rx_queue); 914 915 out: 916 spin_unlock_bh(&vvs->rx_lock); 917 if (free_pkt) 918 virtio_transport_free_pkt(pkt); 919 } 920 921 static int 922 virtio_transport_recv_connected(struct sock *sk, 923 struct virtio_vsock_pkt *pkt) 924 { 925 struct vsock_sock *vsk = vsock_sk(sk); 926 int err = 0; 927 928 switch (le16_to_cpu(pkt->hdr.op)) { 929 case VIRTIO_VSOCK_OP_RW: 930 virtio_transport_recv_enqueue(vsk, pkt); 931 sk->sk_data_ready(sk); 932 return err; 933 case VIRTIO_VSOCK_OP_CREDIT_UPDATE: 934 sk->sk_write_space(sk); 935 break; 936 case VIRTIO_VSOCK_OP_SHUTDOWN: 937 if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_RCV) 938 vsk->peer_shutdown |= RCV_SHUTDOWN; 939 if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_SEND) 940 vsk->peer_shutdown |= SEND_SHUTDOWN; 941 if (vsk->peer_shutdown == SHUTDOWN_MASK && 942 vsock_stream_has_data(vsk) <= 0 && 943 !sock_flag(sk, SOCK_DONE)) { 944 (void)virtio_transport_reset(vsk, NULL); 945 946 virtio_transport_do_close(vsk, true); 947 } 948 if (le32_to_cpu(pkt->hdr.flags)) 949 sk->sk_state_change(sk); 950 break; 951 case VIRTIO_VSOCK_OP_RST: 952 virtio_transport_do_close(vsk, true); 953 break; 954 default: 955 err = -EINVAL; 956 break; 957 } 958 959 virtio_transport_free_pkt(pkt); 960 return err; 961 } 962 963 static void 964 virtio_transport_recv_disconnecting(struct sock *sk, 965 struct virtio_vsock_pkt *pkt) 966 { 967 struct vsock_sock *vsk = vsock_sk(sk); 968 969 if (le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST) 970 virtio_transport_do_close(vsk, true); 971 } 972 973 static int 974 virtio_transport_send_response(struct vsock_sock *vsk, 975 struct virtio_vsock_pkt *pkt) 976 { 977 struct virtio_vsock_pkt_info info = { 978 .op = VIRTIO_VSOCK_OP_RESPONSE, 979 .type = VIRTIO_VSOCK_TYPE_STREAM, 980 .remote_cid = le64_to_cpu(pkt->hdr.src_cid), 981 .remote_port = le32_to_cpu(pkt->hdr.src_port), 982 .reply = true, 983 .vsk = vsk, 984 }; 985 986 return virtio_transport_send_pkt_info(vsk, &info); 987 } 988 989 static bool virtio_transport_space_update(struct sock *sk, 990 struct virtio_vsock_pkt *pkt) 991 { 992 struct vsock_sock *vsk = vsock_sk(sk); 993 struct virtio_vsock_sock *vvs = vsk->trans; 994 bool space_available; 995 996 /* Listener sockets are not associated with any transport, so we are 997 * not able to take the state to see if there is space available in the 998 * remote peer, but since they are only used to receive requests, we 999 * can assume that there is always space available in the other peer. 1000 */ 1001 if (!vvs) 1002 return true; 1003 1004 /* buf_alloc and fwd_cnt is always included in the hdr */ 1005 spin_lock_bh(&vvs->tx_lock); 1006 vvs->peer_buf_alloc = le32_to_cpu(pkt->hdr.buf_alloc); 1007 vvs->peer_fwd_cnt = le32_to_cpu(pkt->hdr.fwd_cnt); 1008 space_available = virtio_transport_has_space(vsk); 1009 spin_unlock_bh(&vvs->tx_lock); 1010 return space_available; 1011 } 1012 1013 /* Handle server socket */ 1014 static int 1015 virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt, 1016 struct virtio_transport *t) 1017 { 1018 struct vsock_sock *vsk = vsock_sk(sk); 1019 struct vsock_sock *vchild; 1020 struct sock *child; 1021 int ret; 1022 1023 if (le16_to_cpu(pkt->hdr.op) != VIRTIO_VSOCK_OP_REQUEST) { 1024 virtio_transport_reset(vsk, pkt); 1025 return -EINVAL; 1026 } 1027 1028 if (sk_acceptq_is_full(sk)) { 1029 virtio_transport_reset(vsk, pkt); 1030 return -ENOMEM; 1031 } 1032 1033 child = vsock_create_connected(sk); 1034 if (!child) { 1035 virtio_transport_reset(vsk, pkt); 1036 return -ENOMEM; 1037 } 1038 1039 sk_acceptq_added(sk); 1040 1041 lock_sock_nested(child, SINGLE_DEPTH_NESTING); 1042 1043 child->sk_state = TCP_ESTABLISHED; 1044 1045 vchild = vsock_sk(child); 1046 vsock_addr_init(&vchild->local_addr, le64_to_cpu(pkt->hdr.dst_cid), 1047 le32_to_cpu(pkt->hdr.dst_port)); 1048 vsock_addr_init(&vchild->remote_addr, le64_to_cpu(pkt->hdr.src_cid), 1049 le32_to_cpu(pkt->hdr.src_port)); 1050 1051 ret = vsock_assign_transport(vchild, vsk); 1052 /* Transport assigned (looking at remote_addr) must be the same 1053 * where we received the request. 1054 */ 1055 if (ret || vchild->transport != &t->transport) { 1056 release_sock(child); 1057 virtio_transport_reset(vsk, pkt); 1058 sock_put(child); 1059 return ret; 1060 } 1061 1062 if (virtio_transport_space_update(child, pkt)) 1063 child->sk_write_space(child); 1064 1065 vsock_insert_connected(vchild); 1066 vsock_enqueue_accept(sk, child); 1067 virtio_transport_send_response(vchild, pkt); 1068 1069 release_sock(child); 1070 1071 sk->sk_data_ready(sk); 1072 return 0; 1073 } 1074 1075 /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex 1076 * lock. 1077 */ 1078 void virtio_transport_recv_pkt(struct virtio_transport *t, 1079 struct virtio_vsock_pkt *pkt) 1080 { 1081 struct sockaddr_vm src, dst; 1082 struct vsock_sock *vsk; 1083 struct sock *sk; 1084 bool space_available; 1085 1086 vsock_addr_init(&src, le64_to_cpu(pkt->hdr.src_cid), 1087 le32_to_cpu(pkt->hdr.src_port)); 1088 vsock_addr_init(&dst, le64_to_cpu(pkt->hdr.dst_cid), 1089 le32_to_cpu(pkt->hdr.dst_port)); 1090 1091 trace_virtio_transport_recv_pkt(src.svm_cid, src.svm_port, 1092 dst.svm_cid, dst.svm_port, 1093 le32_to_cpu(pkt->hdr.len), 1094 le16_to_cpu(pkt->hdr.type), 1095 le16_to_cpu(pkt->hdr.op), 1096 le32_to_cpu(pkt->hdr.flags), 1097 le32_to_cpu(pkt->hdr.buf_alloc), 1098 le32_to_cpu(pkt->hdr.fwd_cnt)); 1099 1100 if (le16_to_cpu(pkt->hdr.type) != VIRTIO_VSOCK_TYPE_STREAM) { 1101 (void)virtio_transport_reset_no_sock(t, pkt); 1102 goto free_pkt; 1103 } 1104 1105 /* The socket must be in connected or bound table 1106 * otherwise send reset back 1107 */ 1108 sk = vsock_find_connected_socket(&src, &dst); 1109 if (!sk) { 1110 sk = vsock_find_bound_socket(&dst); 1111 if (!sk) { 1112 (void)virtio_transport_reset_no_sock(t, pkt); 1113 goto free_pkt; 1114 } 1115 } 1116 1117 vsk = vsock_sk(sk); 1118 1119 space_available = virtio_transport_space_update(sk, pkt); 1120 1121 lock_sock(sk); 1122 1123 /* Update CID in case it has changed after a transport reset event */ 1124 vsk->local_addr.svm_cid = dst.svm_cid; 1125 1126 if (space_available) 1127 sk->sk_write_space(sk); 1128 1129 switch (sk->sk_state) { 1130 case TCP_LISTEN: 1131 virtio_transport_recv_listen(sk, pkt, t); 1132 virtio_transport_free_pkt(pkt); 1133 break; 1134 case TCP_SYN_SENT: 1135 virtio_transport_recv_connecting(sk, pkt); 1136 virtio_transport_free_pkt(pkt); 1137 break; 1138 case TCP_ESTABLISHED: 1139 virtio_transport_recv_connected(sk, pkt); 1140 break; 1141 case TCP_CLOSING: 1142 virtio_transport_recv_disconnecting(sk, pkt); 1143 virtio_transport_free_pkt(pkt); 1144 break; 1145 default: 1146 virtio_transport_free_pkt(pkt); 1147 break; 1148 } 1149 1150 release_sock(sk); 1151 1152 /* Release refcnt obtained when we fetched this socket out of the 1153 * bound or connected list. 1154 */ 1155 sock_put(sk); 1156 return; 1157 1158 free_pkt: 1159 virtio_transport_free_pkt(pkt); 1160 } 1161 EXPORT_SYMBOL_GPL(virtio_transport_recv_pkt); 1162 1163 void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt) 1164 { 1165 kfree(pkt->buf); 1166 kfree(pkt); 1167 } 1168 EXPORT_SYMBOL_GPL(virtio_transport_free_pkt); 1169 1170 MODULE_LICENSE("GPL v2"); 1171 MODULE_AUTHOR("Asias He"); 1172 MODULE_DESCRIPTION("common code for virtio vsock"); 1173