1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * common code for virtio vsock 4 * 5 * Copyright (C) 2013-2015 Red Hat, Inc. 6 * Author: Asias He <asias@redhat.com> 7 * Stefan Hajnoczi <stefanha@redhat.com> 8 */ 9 #include <linux/spinlock.h> 10 #include <linux/module.h> 11 #include <linux/sched/signal.h> 12 #include <linux/ctype.h> 13 #include <linux/list.h> 14 #include <linux/virtio_vsock.h> 15 #include <uapi/linux/vsockmon.h> 16 17 #include <net/sock.h> 18 #include <net/af_vsock.h> 19 20 #define CREATE_TRACE_POINTS 21 #include <trace/events/vsock_virtio_transport_common.h> 22 23 /* How long to wait for graceful shutdown of a connection */ 24 #define VSOCK_CLOSE_TIMEOUT (8 * HZ) 25 26 /* Threshold for detecting small packets to copy */ 27 #define GOOD_COPY_LEN 128 28 29 static const struct virtio_transport * 30 virtio_transport_get_ops(struct vsock_sock *vsk) 31 { 32 const struct vsock_transport *t = vsock_core_get_transport(vsk); 33 34 if (WARN_ON(!t)) 35 return NULL; 36 37 return container_of(t, struct virtio_transport, transport); 38 } 39 40 static bool virtio_transport_can_zcopy(const struct virtio_transport *t_ops, 41 struct virtio_vsock_pkt_info *info, 42 size_t pkt_len) 43 { 44 struct iov_iter *iov_iter; 45 46 if (!info->msg) 47 return false; 48 49 iov_iter = &info->msg->msg_iter; 50 51 if (iov_iter->iov_offset) 52 return false; 53 54 /* We can't send whole iov. */ 55 if (iov_iter->count > pkt_len) 56 return false; 57 58 /* Check that transport can send data in zerocopy mode. */ 59 t_ops = virtio_transport_get_ops(info->vsk); 60 61 if (t_ops->can_msgzerocopy) { 62 int pages_in_iov = iov_iter_npages(iov_iter, MAX_SKB_FRAGS); 63 int pages_to_send = min(pages_in_iov, MAX_SKB_FRAGS); 64 65 /* +1 is for packet header. */ 66 return t_ops->can_msgzerocopy(pages_to_send + 1); 67 } 68 69 return true; 70 } 71 72 static int virtio_transport_init_zcopy_skb(struct vsock_sock *vsk, 73 struct sk_buff *skb, 74 struct msghdr *msg, 75 bool zerocopy) 76 { 77 struct ubuf_info *uarg; 78 79 if (msg->msg_ubuf) { 80 uarg = msg->msg_ubuf; 81 net_zcopy_get(uarg); 82 } else { 83 struct iov_iter *iter = &msg->msg_iter; 84 struct ubuf_info_msgzc *uarg_zc; 85 86 uarg = msg_zerocopy_realloc(sk_vsock(vsk), 87 iter->count, 88 NULL); 89 if (!uarg) 90 return -1; 91 92 uarg_zc = uarg_to_msgzc(uarg); 93 uarg_zc->zerocopy = zerocopy ? 1 : 0; 94 } 95 96 skb_zcopy_init(skb, uarg); 97 98 return 0; 99 } 100 101 static int virtio_transport_fill_skb(struct sk_buff *skb, 102 struct virtio_vsock_pkt_info *info, 103 size_t len, 104 bool zcopy) 105 { 106 if (zcopy) 107 return __zerocopy_sg_from_iter(info->msg, NULL, skb, 108 &info->msg->msg_iter, 109 len); 110 111 return memcpy_from_msg(skb_put(skb, len), info->msg, len); 112 } 113 114 static void virtio_transport_init_hdr(struct sk_buff *skb, 115 struct virtio_vsock_pkt_info *info, 116 size_t payload_len, 117 u32 src_cid, 118 u32 src_port, 119 u32 dst_cid, 120 u32 dst_port) 121 { 122 struct virtio_vsock_hdr *hdr; 123 124 hdr = virtio_vsock_hdr(skb); 125 hdr->type = cpu_to_le16(info->type); 126 hdr->op = cpu_to_le16(info->op); 127 hdr->src_cid = cpu_to_le64(src_cid); 128 hdr->dst_cid = cpu_to_le64(dst_cid); 129 hdr->src_port = cpu_to_le32(src_port); 130 hdr->dst_port = cpu_to_le32(dst_port); 131 hdr->flags = cpu_to_le32(info->flags); 132 hdr->len = cpu_to_le32(payload_len); 133 hdr->buf_alloc = cpu_to_le32(0); 134 hdr->fwd_cnt = cpu_to_le32(0); 135 } 136 137 static void virtio_transport_copy_nonlinear_skb(const struct sk_buff *skb, 138 void *dst, 139 size_t len) 140 { 141 struct iov_iter iov_iter = { 0 }; 142 struct kvec kvec; 143 size_t to_copy; 144 145 kvec.iov_base = dst; 146 kvec.iov_len = len; 147 148 iov_iter.iter_type = ITER_KVEC; 149 iov_iter.kvec = &kvec; 150 iov_iter.nr_segs = 1; 151 152 to_copy = min_t(size_t, len, skb->len); 153 154 skb_copy_datagram_iter(skb, VIRTIO_VSOCK_SKB_CB(skb)->offset, 155 &iov_iter, to_copy); 156 } 157 158 /* Packet capture */ 159 static struct sk_buff *virtio_transport_build_skb(void *opaque) 160 { 161 struct virtio_vsock_hdr *pkt_hdr; 162 struct sk_buff *pkt = opaque; 163 struct af_vsockmon_hdr *hdr; 164 struct sk_buff *skb; 165 size_t payload_len; 166 167 /* A packet could be split to fit the RX buffer, so we can retrieve 168 * the payload length from the header and the buffer pointer taking 169 * care of the offset in the original packet. 170 */ 171 pkt_hdr = virtio_vsock_hdr(pkt); 172 payload_len = pkt->len; 173 174 skb = alloc_skb(sizeof(*hdr) + sizeof(*pkt_hdr) + payload_len, 175 GFP_ATOMIC); 176 if (!skb) 177 return NULL; 178 179 hdr = skb_put(skb, sizeof(*hdr)); 180 181 /* pkt->hdr is little-endian so no need to byteswap here */ 182 hdr->src_cid = pkt_hdr->src_cid; 183 hdr->src_port = pkt_hdr->src_port; 184 hdr->dst_cid = pkt_hdr->dst_cid; 185 hdr->dst_port = pkt_hdr->dst_port; 186 187 hdr->transport = cpu_to_le16(AF_VSOCK_TRANSPORT_VIRTIO); 188 hdr->len = cpu_to_le16(sizeof(*pkt_hdr)); 189 memset(hdr->reserved, 0, sizeof(hdr->reserved)); 190 191 switch (le16_to_cpu(pkt_hdr->op)) { 192 case VIRTIO_VSOCK_OP_REQUEST: 193 case VIRTIO_VSOCK_OP_RESPONSE: 194 hdr->op = cpu_to_le16(AF_VSOCK_OP_CONNECT); 195 break; 196 case VIRTIO_VSOCK_OP_RST: 197 case VIRTIO_VSOCK_OP_SHUTDOWN: 198 hdr->op = cpu_to_le16(AF_VSOCK_OP_DISCONNECT); 199 break; 200 case VIRTIO_VSOCK_OP_RW: 201 hdr->op = cpu_to_le16(AF_VSOCK_OP_PAYLOAD); 202 break; 203 case VIRTIO_VSOCK_OP_CREDIT_UPDATE: 204 case VIRTIO_VSOCK_OP_CREDIT_REQUEST: 205 hdr->op = cpu_to_le16(AF_VSOCK_OP_CONTROL); 206 break; 207 default: 208 hdr->op = cpu_to_le16(AF_VSOCK_OP_UNKNOWN); 209 break; 210 } 211 212 skb_put_data(skb, pkt_hdr, sizeof(*pkt_hdr)); 213 214 if (payload_len) { 215 if (skb_is_nonlinear(pkt)) { 216 void *data = skb_put(skb, payload_len); 217 218 virtio_transport_copy_nonlinear_skb(pkt, data, payload_len); 219 } else { 220 skb_put_data(skb, pkt->data, payload_len); 221 } 222 } 223 224 return skb; 225 } 226 227 void virtio_transport_deliver_tap_pkt(struct sk_buff *skb) 228 { 229 if (virtio_vsock_skb_tap_delivered(skb)) 230 return; 231 232 vsock_deliver_tap(virtio_transport_build_skb, skb); 233 virtio_vsock_skb_set_tap_delivered(skb); 234 } 235 EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt); 236 237 static u16 virtio_transport_get_type(struct sock *sk) 238 { 239 if (sk->sk_type == SOCK_STREAM) 240 return VIRTIO_VSOCK_TYPE_STREAM; 241 else 242 return VIRTIO_VSOCK_TYPE_SEQPACKET; 243 } 244 245 /* Returns new sk_buff on success, otherwise returns NULL. */ 246 static struct sk_buff *virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *info, 247 size_t payload_len, 248 bool zcopy, 249 u32 src_cid, 250 u32 src_port, 251 u32 dst_cid, 252 u32 dst_port) 253 { 254 struct vsock_sock *vsk; 255 struct sk_buff *skb; 256 size_t skb_len; 257 258 skb_len = VIRTIO_VSOCK_SKB_HEADROOM; 259 260 if (!zcopy) 261 skb_len += payload_len; 262 263 skb = virtio_vsock_alloc_skb(skb_len, GFP_KERNEL); 264 if (!skb) 265 return NULL; 266 267 virtio_transport_init_hdr(skb, info, payload_len, src_cid, src_port, 268 dst_cid, dst_port); 269 270 vsk = info->vsk; 271 272 /* If 'vsk' != NULL then payload is always present, so we 273 * will never call '__zerocopy_sg_from_iter()' below without 274 * setting skb owner in 'skb_set_owner_w()'. The only case 275 * when 'vsk' == NULL is VIRTIO_VSOCK_OP_RST control message 276 * without payload. 277 */ 278 WARN_ON_ONCE(!(vsk && (info->msg && payload_len)) && zcopy); 279 280 /* Set owner here, because '__zerocopy_sg_from_iter()' uses 281 * owner of skb without check to update 'sk_wmem_alloc'. 282 */ 283 if (vsk) 284 skb_set_owner_w(skb, sk_vsock(vsk)); 285 286 if (info->msg && payload_len > 0) { 287 int err; 288 289 err = virtio_transport_fill_skb(skb, info, payload_len, zcopy); 290 if (err) 291 goto out; 292 293 if (msg_data_left(info->msg) == 0 && 294 info->type == VIRTIO_VSOCK_TYPE_SEQPACKET) { 295 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); 296 297 hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM); 298 299 if (info->msg->msg_flags & MSG_EOR) 300 hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR); 301 } 302 } 303 304 if (info->reply) 305 virtio_vsock_skb_set_reply(skb); 306 307 trace_virtio_transport_alloc_pkt(src_cid, src_port, 308 dst_cid, dst_port, 309 payload_len, 310 info->type, 311 info->op, 312 info->flags, 313 zcopy); 314 315 return skb; 316 out: 317 kfree_skb(skb); 318 return NULL; 319 } 320 321 /* This function can only be used on connecting/connected sockets, 322 * since a socket assigned to a transport is required. 323 * 324 * Do not use on listener sockets! 325 */ 326 static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, 327 struct virtio_vsock_pkt_info *info) 328 { 329 u32 max_skb_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; 330 u32 src_cid, src_port, dst_cid, dst_port; 331 const struct virtio_transport *t_ops; 332 struct virtio_vsock_sock *vvs; 333 u32 pkt_len = info->pkt_len; 334 bool can_zcopy = false; 335 u32 rest_len; 336 int ret; 337 338 info->type = virtio_transport_get_type(sk_vsock(vsk)); 339 340 t_ops = virtio_transport_get_ops(vsk); 341 if (unlikely(!t_ops)) 342 return -EFAULT; 343 344 src_cid = t_ops->transport.get_local_cid(); 345 src_port = vsk->local_addr.svm_port; 346 if (!info->remote_cid) { 347 dst_cid = vsk->remote_addr.svm_cid; 348 dst_port = vsk->remote_addr.svm_port; 349 } else { 350 dst_cid = info->remote_cid; 351 dst_port = info->remote_port; 352 } 353 354 vvs = vsk->trans; 355 356 /* virtio_transport_get_credit might return less than pkt_len credit */ 357 pkt_len = virtio_transport_get_credit(vvs, pkt_len); 358 359 /* Do not send zero length OP_RW pkt */ 360 if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) 361 return pkt_len; 362 363 if (info->msg) { 364 /* If zerocopy is not enabled by 'setsockopt()', we behave as 365 * there is no MSG_ZEROCOPY flag set. 366 */ 367 if (!sock_flag(sk_vsock(vsk), SOCK_ZEROCOPY)) 368 info->msg->msg_flags &= ~MSG_ZEROCOPY; 369 370 if (info->msg->msg_flags & MSG_ZEROCOPY) 371 can_zcopy = virtio_transport_can_zcopy(t_ops, info, pkt_len); 372 373 if (can_zcopy) 374 max_skb_len = min_t(u32, VIRTIO_VSOCK_MAX_PKT_BUF_SIZE, 375 (MAX_SKB_FRAGS * PAGE_SIZE)); 376 } 377 378 rest_len = pkt_len; 379 380 do { 381 struct sk_buff *skb; 382 size_t skb_len; 383 384 skb_len = min(max_skb_len, rest_len); 385 386 skb = virtio_transport_alloc_skb(info, skb_len, can_zcopy, 387 src_cid, src_port, 388 dst_cid, dst_port); 389 if (!skb) { 390 ret = -ENOMEM; 391 break; 392 } 393 394 /* We process buffer part by part, allocating skb on 395 * each iteration. If this is last skb for this buffer 396 * and MSG_ZEROCOPY mode is in use - we must allocate 397 * completion for the current syscall. 398 */ 399 if (info->msg && info->msg->msg_flags & MSG_ZEROCOPY && 400 skb_len == rest_len && info->op == VIRTIO_VSOCK_OP_RW) { 401 if (virtio_transport_init_zcopy_skb(vsk, skb, 402 info->msg, 403 can_zcopy)) { 404 ret = -ENOMEM; 405 break; 406 } 407 } 408 409 virtio_transport_inc_tx_pkt(vvs, skb); 410 411 ret = t_ops->send_pkt(skb); 412 if (ret < 0) 413 break; 414 415 /* Both virtio and vhost 'send_pkt()' returns 'skb_len', 416 * but for reliability use 'ret' instead of 'skb_len'. 417 * Also if partial send happens (e.g. 'ret' != 'skb_len') 418 * somehow, we break this loop, but account such returned 419 * value in 'virtio_transport_put_credit()'. 420 */ 421 rest_len -= ret; 422 423 if (WARN_ONCE(ret != skb_len, 424 "'send_pkt()' returns %i, but %zu expected\n", 425 ret, skb_len)) 426 break; 427 } while (rest_len); 428 429 virtio_transport_put_credit(vvs, rest_len); 430 431 /* Return number of bytes, if any data has been sent. */ 432 if (rest_len != pkt_len) 433 ret = pkt_len - rest_len; 434 435 return ret; 436 } 437 438 static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs, 439 u32 len) 440 { 441 if (vvs->rx_bytes + len > vvs->buf_alloc) 442 return false; 443 444 vvs->rx_bytes += len; 445 return true; 446 } 447 448 static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs, 449 u32 len) 450 { 451 vvs->rx_bytes -= len; 452 vvs->fwd_cnt += len; 453 } 454 455 void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct sk_buff *skb) 456 { 457 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); 458 459 spin_lock_bh(&vvs->rx_lock); 460 vvs->last_fwd_cnt = vvs->fwd_cnt; 461 hdr->fwd_cnt = cpu_to_le32(vvs->fwd_cnt); 462 hdr->buf_alloc = cpu_to_le32(vvs->buf_alloc); 463 spin_unlock_bh(&vvs->rx_lock); 464 } 465 EXPORT_SYMBOL_GPL(virtio_transport_inc_tx_pkt); 466 467 u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 credit) 468 { 469 u32 ret; 470 471 if (!credit) 472 return 0; 473 474 spin_lock_bh(&vvs->tx_lock); 475 ret = vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt); 476 if (ret > credit) 477 ret = credit; 478 vvs->tx_cnt += ret; 479 spin_unlock_bh(&vvs->tx_lock); 480 481 return ret; 482 } 483 EXPORT_SYMBOL_GPL(virtio_transport_get_credit); 484 485 void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit) 486 { 487 if (!credit) 488 return; 489 490 spin_lock_bh(&vvs->tx_lock); 491 vvs->tx_cnt -= credit; 492 spin_unlock_bh(&vvs->tx_lock); 493 } 494 EXPORT_SYMBOL_GPL(virtio_transport_put_credit); 495 496 static int virtio_transport_send_credit_update(struct vsock_sock *vsk) 497 { 498 struct virtio_vsock_pkt_info info = { 499 .op = VIRTIO_VSOCK_OP_CREDIT_UPDATE, 500 .vsk = vsk, 501 }; 502 503 return virtio_transport_send_pkt_info(vsk, &info); 504 } 505 506 static ssize_t 507 virtio_transport_stream_do_peek(struct vsock_sock *vsk, 508 struct msghdr *msg, 509 size_t len) 510 { 511 struct virtio_vsock_sock *vvs = vsk->trans; 512 struct sk_buff *skb; 513 size_t total = 0; 514 int err; 515 516 spin_lock_bh(&vvs->rx_lock); 517 518 skb_queue_walk(&vvs->rx_queue, skb) { 519 size_t bytes; 520 521 bytes = len - total; 522 if (bytes > skb->len) 523 bytes = skb->len; 524 525 spin_unlock_bh(&vvs->rx_lock); 526 527 /* sk_lock is held by caller so no one else can dequeue. 528 * Unlock rx_lock since skb_copy_datagram_iter() may sleep. 529 */ 530 err = skb_copy_datagram_iter(skb, VIRTIO_VSOCK_SKB_CB(skb)->offset, 531 &msg->msg_iter, bytes); 532 if (err) 533 goto out; 534 535 total += bytes; 536 537 spin_lock_bh(&vvs->rx_lock); 538 539 if (total == len) 540 break; 541 } 542 543 spin_unlock_bh(&vvs->rx_lock); 544 545 return total; 546 547 out: 548 if (total) 549 err = total; 550 return err; 551 } 552 553 static ssize_t 554 virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, 555 struct msghdr *msg, 556 size_t len) 557 { 558 struct virtio_vsock_sock *vvs = vsk->trans; 559 size_t bytes, total = 0; 560 struct sk_buff *skb; 561 int err = -EFAULT; 562 u32 free_space; 563 564 spin_lock_bh(&vvs->rx_lock); 565 566 if (WARN_ONCE(skb_queue_empty(&vvs->rx_queue) && vvs->rx_bytes, 567 "rx_queue is empty, but rx_bytes is non-zero\n")) { 568 spin_unlock_bh(&vvs->rx_lock); 569 return err; 570 } 571 572 while (total < len && !skb_queue_empty(&vvs->rx_queue)) { 573 skb = skb_peek(&vvs->rx_queue); 574 575 bytes = min_t(size_t, len - total, 576 skb->len - VIRTIO_VSOCK_SKB_CB(skb)->offset); 577 578 /* sk_lock is held by caller so no one else can dequeue. 579 * Unlock rx_lock since skb_copy_datagram_iter() may sleep. 580 */ 581 spin_unlock_bh(&vvs->rx_lock); 582 583 err = skb_copy_datagram_iter(skb, 584 VIRTIO_VSOCK_SKB_CB(skb)->offset, 585 &msg->msg_iter, bytes); 586 if (err) 587 goto out; 588 589 spin_lock_bh(&vvs->rx_lock); 590 591 total += bytes; 592 593 VIRTIO_VSOCK_SKB_CB(skb)->offset += bytes; 594 595 if (skb->len == VIRTIO_VSOCK_SKB_CB(skb)->offset) { 596 u32 pkt_len = le32_to_cpu(virtio_vsock_hdr(skb)->len); 597 598 virtio_transport_dec_rx_pkt(vvs, pkt_len); 599 __skb_unlink(skb, &vvs->rx_queue); 600 consume_skb(skb); 601 } 602 } 603 604 free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs->last_fwd_cnt); 605 606 spin_unlock_bh(&vvs->rx_lock); 607 608 /* To reduce the number of credit update messages, 609 * don't update credits as long as lots of space is available. 610 * Note: the limit chosen here is arbitrary. Setting the limit 611 * too high causes extra messages. Too low causes transmitter 612 * stalls. As stalls are in theory more expensive than extra 613 * messages, we set the limit to a high value. TODO: experiment 614 * with different values. 615 */ 616 if (free_space < VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) 617 virtio_transport_send_credit_update(vsk); 618 619 return total; 620 621 out: 622 if (total) 623 err = total; 624 return err; 625 } 626 627 static ssize_t 628 virtio_transport_seqpacket_do_peek(struct vsock_sock *vsk, 629 struct msghdr *msg) 630 { 631 struct virtio_vsock_sock *vvs = vsk->trans; 632 struct sk_buff *skb; 633 size_t total, len; 634 635 spin_lock_bh(&vvs->rx_lock); 636 637 if (!vvs->msg_count) { 638 spin_unlock_bh(&vvs->rx_lock); 639 return 0; 640 } 641 642 total = 0; 643 len = msg_data_left(msg); 644 645 skb_queue_walk(&vvs->rx_queue, skb) { 646 struct virtio_vsock_hdr *hdr; 647 648 if (total < len) { 649 size_t bytes; 650 int err; 651 652 bytes = len - total; 653 if (bytes > skb->len) 654 bytes = skb->len; 655 656 spin_unlock_bh(&vvs->rx_lock); 657 658 /* sk_lock is held by caller so no one else can dequeue. 659 * Unlock rx_lock since skb_copy_datagram_iter() may sleep. 660 */ 661 err = skb_copy_datagram_iter(skb, VIRTIO_VSOCK_SKB_CB(skb)->offset, 662 &msg->msg_iter, bytes); 663 if (err) 664 return err; 665 666 spin_lock_bh(&vvs->rx_lock); 667 } 668 669 total += skb->len; 670 hdr = virtio_vsock_hdr(skb); 671 672 if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM) { 673 if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOR) 674 msg->msg_flags |= MSG_EOR; 675 676 break; 677 } 678 } 679 680 spin_unlock_bh(&vvs->rx_lock); 681 682 return total; 683 } 684 685 static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk, 686 struct msghdr *msg, 687 int flags) 688 { 689 struct virtio_vsock_sock *vvs = vsk->trans; 690 int dequeued_len = 0; 691 size_t user_buf_len = msg_data_left(msg); 692 bool msg_ready = false; 693 struct sk_buff *skb; 694 695 spin_lock_bh(&vvs->rx_lock); 696 697 if (vvs->msg_count == 0) { 698 spin_unlock_bh(&vvs->rx_lock); 699 return 0; 700 } 701 702 while (!msg_ready) { 703 struct virtio_vsock_hdr *hdr; 704 size_t pkt_len; 705 706 skb = __skb_dequeue(&vvs->rx_queue); 707 if (!skb) 708 break; 709 hdr = virtio_vsock_hdr(skb); 710 pkt_len = (size_t)le32_to_cpu(hdr->len); 711 712 if (dequeued_len >= 0) { 713 size_t bytes_to_copy; 714 715 bytes_to_copy = min(user_buf_len, pkt_len); 716 717 if (bytes_to_copy) { 718 int err; 719 720 /* sk_lock is held by caller so no one else can dequeue. 721 * Unlock rx_lock since skb_copy_datagram_iter() may sleep. 722 */ 723 spin_unlock_bh(&vvs->rx_lock); 724 725 err = skb_copy_datagram_iter(skb, 0, 726 &msg->msg_iter, 727 bytes_to_copy); 728 if (err) { 729 /* Copy of message failed. Rest of 730 * fragments will be freed without copy. 731 */ 732 dequeued_len = err; 733 } else { 734 user_buf_len -= bytes_to_copy; 735 } 736 737 spin_lock_bh(&vvs->rx_lock); 738 } 739 740 if (dequeued_len >= 0) 741 dequeued_len += pkt_len; 742 } 743 744 if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM) { 745 msg_ready = true; 746 vvs->msg_count--; 747 748 if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOR) 749 msg->msg_flags |= MSG_EOR; 750 } 751 752 virtio_transport_dec_rx_pkt(vvs, pkt_len); 753 kfree_skb(skb); 754 } 755 756 spin_unlock_bh(&vvs->rx_lock); 757 758 virtio_transport_send_credit_update(vsk); 759 760 return dequeued_len; 761 } 762 763 ssize_t 764 virtio_transport_stream_dequeue(struct vsock_sock *vsk, 765 struct msghdr *msg, 766 size_t len, int flags) 767 { 768 if (flags & MSG_PEEK) 769 return virtio_transport_stream_do_peek(vsk, msg, len); 770 else 771 return virtio_transport_stream_do_dequeue(vsk, msg, len); 772 } 773 EXPORT_SYMBOL_GPL(virtio_transport_stream_dequeue); 774 775 ssize_t 776 virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk, 777 struct msghdr *msg, 778 int flags) 779 { 780 if (flags & MSG_PEEK) 781 return virtio_transport_seqpacket_do_peek(vsk, msg); 782 else 783 return virtio_transport_seqpacket_do_dequeue(vsk, msg, flags); 784 } 785 EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue); 786 787 int 788 virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk, 789 struct msghdr *msg, 790 size_t len) 791 { 792 struct virtio_vsock_sock *vvs = vsk->trans; 793 794 spin_lock_bh(&vvs->tx_lock); 795 796 if (len > vvs->peer_buf_alloc) { 797 spin_unlock_bh(&vvs->tx_lock); 798 return -EMSGSIZE; 799 } 800 801 spin_unlock_bh(&vvs->tx_lock); 802 803 return virtio_transport_stream_enqueue(vsk, msg, len); 804 } 805 EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_enqueue); 806 807 int 808 virtio_transport_dgram_dequeue(struct vsock_sock *vsk, 809 struct msghdr *msg, 810 size_t len, int flags) 811 { 812 return -EOPNOTSUPP; 813 } 814 EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue); 815 816 s64 virtio_transport_stream_has_data(struct vsock_sock *vsk) 817 { 818 struct virtio_vsock_sock *vvs = vsk->trans; 819 s64 bytes; 820 821 spin_lock_bh(&vvs->rx_lock); 822 bytes = vvs->rx_bytes; 823 spin_unlock_bh(&vvs->rx_lock); 824 825 return bytes; 826 } 827 EXPORT_SYMBOL_GPL(virtio_transport_stream_has_data); 828 829 u32 virtio_transport_seqpacket_has_data(struct vsock_sock *vsk) 830 { 831 struct virtio_vsock_sock *vvs = vsk->trans; 832 u32 msg_count; 833 834 spin_lock_bh(&vvs->rx_lock); 835 msg_count = vvs->msg_count; 836 spin_unlock_bh(&vvs->rx_lock); 837 838 return msg_count; 839 } 840 EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_has_data); 841 842 static s64 virtio_transport_has_space(struct vsock_sock *vsk) 843 { 844 struct virtio_vsock_sock *vvs = vsk->trans; 845 s64 bytes; 846 847 bytes = vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt); 848 if (bytes < 0) 849 bytes = 0; 850 851 return bytes; 852 } 853 854 s64 virtio_transport_stream_has_space(struct vsock_sock *vsk) 855 { 856 struct virtio_vsock_sock *vvs = vsk->trans; 857 s64 bytes; 858 859 spin_lock_bh(&vvs->tx_lock); 860 bytes = virtio_transport_has_space(vsk); 861 spin_unlock_bh(&vvs->tx_lock); 862 863 return bytes; 864 } 865 EXPORT_SYMBOL_GPL(virtio_transport_stream_has_space); 866 867 int virtio_transport_do_socket_init(struct vsock_sock *vsk, 868 struct vsock_sock *psk) 869 { 870 struct virtio_vsock_sock *vvs; 871 872 vvs = kzalloc(sizeof(*vvs), GFP_KERNEL); 873 if (!vvs) 874 return -ENOMEM; 875 876 vsk->trans = vvs; 877 vvs->vsk = vsk; 878 if (psk && psk->trans) { 879 struct virtio_vsock_sock *ptrans = psk->trans; 880 881 vvs->peer_buf_alloc = ptrans->peer_buf_alloc; 882 } 883 884 if (vsk->buffer_size > VIRTIO_VSOCK_MAX_BUF_SIZE) 885 vsk->buffer_size = VIRTIO_VSOCK_MAX_BUF_SIZE; 886 887 vvs->buf_alloc = vsk->buffer_size; 888 889 spin_lock_init(&vvs->rx_lock); 890 spin_lock_init(&vvs->tx_lock); 891 skb_queue_head_init(&vvs->rx_queue); 892 893 return 0; 894 } 895 EXPORT_SYMBOL_GPL(virtio_transport_do_socket_init); 896 897 /* sk_lock held by the caller */ 898 void virtio_transport_notify_buffer_size(struct vsock_sock *vsk, u64 *val) 899 { 900 struct virtio_vsock_sock *vvs = vsk->trans; 901 902 if (*val > VIRTIO_VSOCK_MAX_BUF_SIZE) 903 *val = VIRTIO_VSOCK_MAX_BUF_SIZE; 904 905 vvs->buf_alloc = *val; 906 907 virtio_transport_send_credit_update(vsk); 908 } 909 EXPORT_SYMBOL_GPL(virtio_transport_notify_buffer_size); 910 911 int 912 virtio_transport_notify_poll_in(struct vsock_sock *vsk, 913 size_t target, 914 bool *data_ready_now) 915 { 916 *data_ready_now = vsock_stream_has_data(vsk) >= target; 917 918 return 0; 919 } 920 EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_in); 921 922 int 923 virtio_transport_notify_poll_out(struct vsock_sock *vsk, 924 size_t target, 925 bool *space_avail_now) 926 { 927 s64 free_space; 928 929 free_space = vsock_stream_has_space(vsk); 930 if (free_space > 0) 931 *space_avail_now = true; 932 else if (free_space == 0) 933 *space_avail_now = false; 934 935 return 0; 936 } 937 EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_out); 938 939 int virtio_transport_notify_recv_init(struct vsock_sock *vsk, 940 size_t target, struct vsock_transport_recv_notify_data *data) 941 { 942 return 0; 943 } 944 EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_init); 945 946 int virtio_transport_notify_recv_pre_block(struct vsock_sock *vsk, 947 size_t target, struct vsock_transport_recv_notify_data *data) 948 { 949 return 0; 950 } 951 EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_block); 952 953 int virtio_transport_notify_recv_pre_dequeue(struct vsock_sock *vsk, 954 size_t target, struct vsock_transport_recv_notify_data *data) 955 { 956 return 0; 957 } 958 EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_dequeue); 959 960 int virtio_transport_notify_recv_post_dequeue(struct vsock_sock *vsk, 961 size_t target, ssize_t copied, bool data_read, 962 struct vsock_transport_recv_notify_data *data) 963 { 964 return 0; 965 } 966 EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_post_dequeue); 967 968 int virtio_transport_notify_send_init(struct vsock_sock *vsk, 969 struct vsock_transport_send_notify_data *data) 970 { 971 return 0; 972 } 973 EXPORT_SYMBOL_GPL(virtio_transport_notify_send_init); 974 975 int virtio_transport_notify_send_pre_block(struct vsock_sock *vsk, 976 struct vsock_transport_send_notify_data *data) 977 { 978 return 0; 979 } 980 EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_block); 981 982 int virtio_transport_notify_send_pre_enqueue(struct vsock_sock *vsk, 983 struct vsock_transport_send_notify_data *data) 984 { 985 return 0; 986 } 987 EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_enqueue); 988 989 int virtio_transport_notify_send_post_enqueue(struct vsock_sock *vsk, 990 ssize_t written, struct vsock_transport_send_notify_data *data) 991 { 992 return 0; 993 } 994 EXPORT_SYMBOL_GPL(virtio_transport_notify_send_post_enqueue); 995 996 u64 virtio_transport_stream_rcvhiwat(struct vsock_sock *vsk) 997 { 998 return vsk->buffer_size; 999 } 1000 EXPORT_SYMBOL_GPL(virtio_transport_stream_rcvhiwat); 1001 1002 bool virtio_transport_stream_is_active(struct vsock_sock *vsk) 1003 { 1004 return true; 1005 } 1006 EXPORT_SYMBOL_GPL(virtio_transport_stream_is_active); 1007 1008 bool virtio_transport_stream_allow(u32 cid, u32 port) 1009 { 1010 return true; 1011 } 1012 EXPORT_SYMBOL_GPL(virtio_transport_stream_allow); 1013 1014 int virtio_transport_dgram_bind(struct vsock_sock *vsk, 1015 struct sockaddr_vm *addr) 1016 { 1017 return -EOPNOTSUPP; 1018 } 1019 EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind); 1020 1021 bool virtio_transport_dgram_allow(u32 cid, u32 port) 1022 { 1023 return false; 1024 } 1025 EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow); 1026 1027 int virtio_transport_connect(struct vsock_sock *vsk) 1028 { 1029 struct virtio_vsock_pkt_info info = { 1030 .op = VIRTIO_VSOCK_OP_REQUEST, 1031 .vsk = vsk, 1032 }; 1033 1034 return virtio_transport_send_pkt_info(vsk, &info); 1035 } 1036 EXPORT_SYMBOL_GPL(virtio_transport_connect); 1037 1038 int virtio_transport_shutdown(struct vsock_sock *vsk, int mode) 1039 { 1040 struct virtio_vsock_pkt_info info = { 1041 .op = VIRTIO_VSOCK_OP_SHUTDOWN, 1042 .flags = (mode & RCV_SHUTDOWN ? 1043 VIRTIO_VSOCK_SHUTDOWN_RCV : 0) | 1044 (mode & SEND_SHUTDOWN ? 1045 VIRTIO_VSOCK_SHUTDOWN_SEND : 0), 1046 .vsk = vsk, 1047 }; 1048 1049 return virtio_transport_send_pkt_info(vsk, &info); 1050 } 1051 EXPORT_SYMBOL_GPL(virtio_transport_shutdown); 1052 1053 int 1054 virtio_transport_dgram_enqueue(struct vsock_sock *vsk, 1055 struct sockaddr_vm *remote_addr, 1056 struct msghdr *msg, 1057 size_t dgram_len) 1058 { 1059 return -EOPNOTSUPP; 1060 } 1061 EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue); 1062 1063 ssize_t 1064 virtio_transport_stream_enqueue(struct vsock_sock *vsk, 1065 struct msghdr *msg, 1066 size_t len) 1067 { 1068 struct virtio_vsock_pkt_info info = { 1069 .op = VIRTIO_VSOCK_OP_RW, 1070 .msg = msg, 1071 .pkt_len = len, 1072 .vsk = vsk, 1073 }; 1074 1075 return virtio_transport_send_pkt_info(vsk, &info); 1076 } 1077 EXPORT_SYMBOL_GPL(virtio_transport_stream_enqueue); 1078 1079 void virtio_transport_destruct(struct vsock_sock *vsk) 1080 { 1081 struct virtio_vsock_sock *vvs = vsk->trans; 1082 1083 kfree(vvs); 1084 } 1085 EXPORT_SYMBOL_GPL(virtio_transport_destruct); 1086 1087 static int virtio_transport_reset(struct vsock_sock *vsk, 1088 struct sk_buff *skb) 1089 { 1090 struct virtio_vsock_pkt_info info = { 1091 .op = VIRTIO_VSOCK_OP_RST, 1092 .reply = !!skb, 1093 .vsk = vsk, 1094 }; 1095 1096 /* Send RST only if the original pkt is not a RST pkt */ 1097 if (skb && le16_to_cpu(virtio_vsock_hdr(skb)->op) == VIRTIO_VSOCK_OP_RST) 1098 return 0; 1099 1100 return virtio_transport_send_pkt_info(vsk, &info); 1101 } 1102 1103 /* Normally packets are associated with a socket. There may be no socket if an 1104 * attempt was made to connect to a socket that does not exist. 1105 */ 1106 static int virtio_transport_reset_no_sock(const struct virtio_transport *t, 1107 struct sk_buff *skb) 1108 { 1109 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); 1110 struct virtio_vsock_pkt_info info = { 1111 .op = VIRTIO_VSOCK_OP_RST, 1112 .type = le16_to_cpu(hdr->type), 1113 .reply = true, 1114 }; 1115 struct sk_buff *reply; 1116 1117 /* Send RST only if the original pkt is not a RST pkt */ 1118 if (le16_to_cpu(hdr->op) == VIRTIO_VSOCK_OP_RST) 1119 return 0; 1120 1121 if (!t) 1122 return -ENOTCONN; 1123 1124 reply = virtio_transport_alloc_skb(&info, 0, false, 1125 le64_to_cpu(hdr->dst_cid), 1126 le32_to_cpu(hdr->dst_port), 1127 le64_to_cpu(hdr->src_cid), 1128 le32_to_cpu(hdr->src_port)); 1129 if (!reply) 1130 return -ENOMEM; 1131 1132 return t->send_pkt(reply); 1133 } 1134 1135 /* This function should be called with sk_lock held and SOCK_DONE set */ 1136 static void virtio_transport_remove_sock(struct vsock_sock *vsk) 1137 { 1138 struct virtio_vsock_sock *vvs = vsk->trans; 1139 1140 /* We don't need to take rx_lock, as the socket is closing and we are 1141 * removing it. 1142 */ 1143 __skb_queue_purge(&vvs->rx_queue); 1144 vsock_remove_sock(vsk); 1145 } 1146 1147 static void virtio_transport_wait_close(struct sock *sk, long timeout) 1148 { 1149 if (timeout) { 1150 DEFINE_WAIT_FUNC(wait, woken_wake_function); 1151 1152 add_wait_queue(sk_sleep(sk), &wait); 1153 1154 do { 1155 if (sk_wait_event(sk, &timeout, 1156 sock_flag(sk, SOCK_DONE), &wait)) 1157 break; 1158 } while (!signal_pending(current) && timeout); 1159 1160 remove_wait_queue(sk_sleep(sk), &wait); 1161 } 1162 } 1163 1164 static void virtio_transport_do_close(struct vsock_sock *vsk, 1165 bool cancel_timeout) 1166 { 1167 struct sock *sk = sk_vsock(vsk); 1168 1169 sock_set_flag(sk, SOCK_DONE); 1170 vsk->peer_shutdown = SHUTDOWN_MASK; 1171 if (vsock_stream_has_data(vsk) <= 0) 1172 sk->sk_state = TCP_CLOSING; 1173 sk->sk_state_change(sk); 1174 1175 if (vsk->close_work_scheduled && 1176 (!cancel_timeout || cancel_delayed_work(&vsk->close_work))) { 1177 vsk->close_work_scheduled = false; 1178 1179 virtio_transport_remove_sock(vsk); 1180 1181 /* Release refcnt obtained when we scheduled the timeout */ 1182 sock_put(sk); 1183 } 1184 } 1185 1186 static void virtio_transport_close_timeout(struct work_struct *work) 1187 { 1188 struct vsock_sock *vsk = 1189 container_of(work, struct vsock_sock, close_work.work); 1190 struct sock *sk = sk_vsock(vsk); 1191 1192 sock_hold(sk); 1193 lock_sock(sk); 1194 1195 if (!sock_flag(sk, SOCK_DONE)) { 1196 (void)virtio_transport_reset(vsk, NULL); 1197 1198 virtio_transport_do_close(vsk, false); 1199 } 1200 1201 vsk->close_work_scheduled = false; 1202 1203 release_sock(sk); 1204 sock_put(sk); 1205 } 1206 1207 /* User context, vsk->sk is locked */ 1208 static bool virtio_transport_close(struct vsock_sock *vsk) 1209 { 1210 struct sock *sk = &vsk->sk; 1211 1212 if (!(sk->sk_state == TCP_ESTABLISHED || 1213 sk->sk_state == TCP_CLOSING)) 1214 return true; 1215 1216 /* Already received SHUTDOWN from peer, reply with RST */ 1217 if ((vsk->peer_shutdown & SHUTDOWN_MASK) == SHUTDOWN_MASK) { 1218 (void)virtio_transport_reset(vsk, NULL); 1219 return true; 1220 } 1221 1222 if ((sk->sk_shutdown & SHUTDOWN_MASK) != SHUTDOWN_MASK) 1223 (void)virtio_transport_shutdown(vsk, SHUTDOWN_MASK); 1224 1225 if (sock_flag(sk, SOCK_LINGER) && !(current->flags & PF_EXITING)) 1226 virtio_transport_wait_close(sk, sk->sk_lingertime); 1227 1228 if (sock_flag(sk, SOCK_DONE)) { 1229 return true; 1230 } 1231 1232 sock_hold(sk); 1233 INIT_DELAYED_WORK(&vsk->close_work, 1234 virtio_transport_close_timeout); 1235 vsk->close_work_scheduled = true; 1236 schedule_delayed_work(&vsk->close_work, VSOCK_CLOSE_TIMEOUT); 1237 return false; 1238 } 1239 1240 void virtio_transport_release(struct vsock_sock *vsk) 1241 { 1242 struct sock *sk = &vsk->sk; 1243 bool remove_sock = true; 1244 1245 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) 1246 remove_sock = virtio_transport_close(vsk); 1247 1248 if (remove_sock) { 1249 sock_set_flag(sk, SOCK_DONE); 1250 virtio_transport_remove_sock(vsk); 1251 } 1252 } 1253 EXPORT_SYMBOL_GPL(virtio_transport_release); 1254 1255 static int 1256 virtio_transport_recv_connecting(struct sock *sk, 1257 struct sk_buff *skb) 1258 { 1259 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); 1260 struct vsock_sock *vsk = vsock_sk(sk); 1261 int skerr; 1262 int err; 1263 1264 switch (le16_to_cpu(hdr->op)) { 1265 case VIRTIO_VSOCK_OP_RESPONSE: 1266 sk->sk_state = TCP_ESTABLISHED; 1267 sk->sk_socket->state = SS_CONNECTED; 1268 vsock_insert_connected(vsk); 1269 sk->sk_state_change(sk); 1270 break; 1271 case VIRTIO_VSOCK_OP_INVALID: 1272 break; 1273 case VIRTIO_VSOCK_OP_RST: 1274 skerr = ECONNRESET; 1275 err = 0; 1276 goto destroy; 1277 default: 1278 skerr = EPROTO; 1279 err = -EINVAL; 1280 goto destroy; 1281 } 1282 return 0; 1283 1284 destroy: 1285 virtio_transport_reset(vsk, skb); 1286 sk->sk_state = TCP_CLOSE; 1287 sk->sk_err = skerr; 1288 sk_error_report(sk); 1289 return err; 1290 } 1291 1292 static void 1293 virtio_transport_recv_enqueue(struct vsock_sock *vsk, 1294 struct sk_buff *skb) 1295 { 1296 struct virtio_vsock_sock *vvs = vsk->trans; 1297 bool can_enqueue, free_pkt = false; 1298 struct virtio_vsock_hdr *hdr; 1299 u32 len; 1300 1301 hdr = virtio_vsock_hdr(skb); 1302 len = le32_to_cpu(hdr->len); 1303 1304 spin_lock_bh(&vvs->rx_lock); 1305 1306 can_enqueue = virtio_transport_inc_rx_pkt(vvs, len); 1307 if (!can_enqueue) { 1308 free_pkt = true; 1309 goto out; 1310 } 1311 1312 if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM) 1313 vvs->msg_count++; 1314 1315 /* Try to copy small packets into the buffer of last packet queued, 1316 * to avoid wasting memory queueing the entire buffer with a small 1317 * payload. 1318 */ 1319 if (len <= GOOD_COPY_LEN && !skb_queue_empty(&vvs->rx_queue)) { 1320 struct virtio_vsock_hdr *last_hdr; 1321 struct sk_buff *last_skb; 1322 1323 last_skb = skb_peek_tail(&vvs->rx_queue); 1324 last_hdr = virtio_vsock_hdr(last_skb); 1325 1326 /* If there is space in the last packet queued, we copy the 1327 * new packet in its buffer. We avoid this if the last packet 1328 * queued has VIRTIO_VSOCK_SEQ_EOM set, because this is 1329 * delimiter of SEQPACKET message, so 'pkt' is the first packet 1330 * of a new message. 1331 */ 1332 if (skb->len < skb_tailroom(last_skb) && 1333 !(le32_to_cpu(last_hdr->flags) & VIRTIO_VSOCK_SEQ_EOM)) { 1334 memcpy(skb_put(last_skb, skb->len), skb->data, skb->len); 1335 free_pkt = true; 1336 last_hdr->flags |= hdr->flags; 1337 le32_add_cpu(&last_hdr->len, len); 1338 goto out; 1339 } 1340 } 1341 1342 __skb_queue_tail(&vvs->rx_queue, skb); 1343 1344 out: 1345 spin_unlock_bh(&vvs->rx_lock); 1346 if (free_pkt) 1347 kfree_skb(skb); 1348 } 1349 1350 static int 1351 virtio_transport_recv_connected(struct sock *sk, 1352 struct sk_buff *skb) 1353 { 1354 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); 1355 struct vsock_sock *vsk = vsock_sk(sk); 1356 int err = 0; 1357 1358 switch (le16_to_cpu(hdr->op)) { 1359 case VIRTIO_VSOCK_OP_RW: 1360 virtio_transport_recv_enqueue(vsk, skb); 1361 vsock_data_ready(sk); 1362 return err; 1363 case VIRTIO_VSOCK_OP_CREDIT_REQUEST: 1364 virtio_transport_send_credit_update(vsk); 1365 break; 1366 case VIRTIO_VSOCK_OP_CREDIT_UPDATE: 1367 sk->sk_write_space(sk); 1368 break; 1369 case VIRTIO_VSOCK_OP_SHUTDOWN: 1370 if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SHUTDOWN_RCV) 1371 vsk->peer_shutdown |= RCV_SHUTDOWN; 1372 if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SHUTDOWN_SEND) 1373 vsk->peer_shutdown |= SEND_SHUTDOWN; 1374 if (vsk->peer_shutdown == SHUTDOWN_MASK) { 1375 if (vsock_stream_has_data(vsk) <= 0 && !sock_flag(sk, SOCK_DONE)) { 1376 (void)virtio_transport_reset(vsk, NULL); 1377 virtio_transport_do_close(vsk, true); 1378 } 1379 /* Remove this socket anyway because the remote peer sent 1380 * the shutdown. This way a new connection will succeed 1381 * if the remote peer uses the same source port, 1382 * even if the old socket is still unreleased, but now disconnected. 1383 */ 1384 vsock_remove_sock(vsk); 1385 } 1386 if (le32_to_cpu(virtio_vsock_hdr(skb)->flags)) 1387 sk->sk_state_change(sk); 1388 break; 1389 case VIRTIO_VSOCK_OP_RST: 1390 virtio_transport_do_close(vsk, true); 1391 break; 1392 default: 1393 err = -EINVAL; 1394 break; 1395 } 1396 1397 kfree_skb(skb); 1398 return err; 1399 } 1400 1401 static void 1402 virtio_transport_recv_disconnecting(struct sock *sk, 1403 struct sk_buff *skb) 1404 { 1405 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); 1406 struct vsock_sock *vsk = vsock_sk(sk); 1407 1408 if (le16_to_cpu(hdr->op) == VIRTIO_VSOCK_OP_RST) 1409 virtio_transport_do_close(vsk, true); 1410 } 1411 1412 static int 1413 virtio_transport_send_response(struct vsock_sock *vsk, 1414 struct sk_buff *skb) 1415 { 1416 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); 1417 struct virtio_vsock_pkt_info info = { 1418 .op = VIRTIO_VSOCK_OP_RESPONSE, 1419 .remote_cid = le64_to_cpu(hdr->src_cid), 1420 .remote_port = le32_to_cpu(hdr->src_port), 1421 .reply = true, 1422 .vsk = vsk, 1423 }; 1424 1425 return virtio_transport_send_pkt_info(vsk, &info); 1426 } 1427 1428 static bool virtio_transport_space_update(struct sock *sk, 1429 struct sk_buff *skb) 1430 { 1431 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); 1432 struct vsock_sock *vsk = vsock_sk(sk); 1433 struct virtio_vsock_sock *vvs = vsk->trans; 1434 bool space_available; 1435 1436 /* Listener sockets are not associated with any transport, so we are 1437 * not able to take the state to see if there is space available in the 1438 * remote peer, but since they are only used to receive requests, we 1439 * can assume that there is always space available in the other peer. 1440 */ 1441 if (!vvs) 1442 return true; 1443 1444 /* buf_alloc and fwd_cnt is always included in the hdr */ 1445 spin_lock_bh(&vvs->tx_lock); 1446 vvs->peer_buf_alloc = le32_to_cpu(hdr->buf_alloc); 1447 vvs->peer_fwd_cnt = le32_to_cpu(hdr->fwd_cnt); 1448 space_available = virtio_transport_has_space(vsk); 1449 spin_unlock_bh(&vvs->tx_lock); 1450 return space_available; 1451 } 1452 1453 /* Handle server socket */ 1454 static int 1455 virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb, 1456 struct virtio_transport *t) 1457 { 1458 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); 1459 struct vsock_sock *vsk = vsock_sk(sk); 1460 struct vsock_sock *vchild; 1461 struct sock *child; 1462 int ret; 1463 1464 if (le16_to_cpu(hdr->op) != VIRTIO_VSOCK_OP_REQUEST) { 1465 virtio_transport_reset_no_sock(t, skb); 1466 return -EINVAL; 1467 } 1468 1469 if (sk_acceptq_is_full(sk)) { 1470 virtio_transport_reset_no_sock(t, skb); 1471 return -ENOMEM; 1472 } 1473 1474 child = vsock_create_connected(sk); 1475 if (!child) { 1476 virtio_transport_reset_no_sock(t, skb); 1477 return -ENOMEM; 1478 } 1479 1480 sk_acceptq_added(sk); 1481 1482 lock_sock_nested(child, SINGLE_DEPTH_NESTING); 1483 1484 child->sk_state = TCP_ESTABLISHED; 1485 1486 vchild = vsock_sk(child); 1487 vsock_addr_init(&vchild->local_addr, le64_to_cpu(hdr->dst_cid), 1488 le32_to_cpu(hdr->dst_port)); 1489 vsock_addr_init(&vchild->remote_addr, le64_to_cpu(hdr->src_cid), 1490 le32_to_cpu(hdr->src_port)); 1491 1492 ret = vsock_assign_transport(vchild, vsk); 1493 /* Transport assigned (looking at remote_addr) must be the same 1494 * where we received the request. 1495 */ 1496 if (ret || vchild->transport != &t->transport) { 1497 release_sock(child); 1498 virtio_transport_reset_no_sock(t, skb); 1499 sock_put(child); 1500 return ret; 1501 } 1502 1503 if (virtio_transport_space_update(child, skb)) 1504 child->sk_write_space(child); 1505 1506 vsock_insert_connected(vchild); 1507 vsock_enqueue_accept(sk, child); 1508 virtio_transport_send_response(vchild, skb); 1509 1510 release_sock(child); 1511 1512 sk->sk_data_ready(sk); 1513 return 0; 1514 } 1515 1516 static bool virtio_transport_valid_type(u16 type) 1517 { 1518 return (type == VIRTIO_VSOCK_TYPE_STREAM) || 1519 (type == VIRTIO_VSOCK_TYPE_SEQPACKET); 1520 } 1521 1522 /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex 1523 * lock. 1524 */ 1525 void virtio_transport_recv_pkt(struct virtio_transport *t, 1526 struct sk_buff *skb) 1527 { 1528 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); 1529 struct sockaddr_vm src, dst; 1530 struct vsock_sock *vsk; 1531 struct sock *sk; 1532 bool space_available; 1533 1534 vsock_addr_init(&src, le64_to_cpu(hdr->src_cid), 1535 le32_to_cpu(hdr->src_port)); 1536 vsock_addr_init(&dst, le64_to_cpu(hdr->dst_cid), 1537 le32_to_cpu(hdr->dst_port)); 1538 1539 trace_virtio_transport_recv_pkt(src.svm_cid, src.svm_port, 1540 dst.svm_cid, dst.svm_port, 1541 le32_to_cpu(hdr->len), 1542 le16_to_cpu(hdr->type), 1543 le16_to_cpu(hdr->op), 1544 le32_to_cpu(hdr->flags), 1545 le32_to_cpu(hdr->buf_alloc), 1546 le32_to_cpu(hdr->fwd_cnt)); 1547 1548 if (!virtio_transport_valid_type(le16_to_cpu(hdr->type))) { 1549 (void)virtio_transport_reset_no_sock(t, skb); 1550 goto free_pkt; 1551 } 1552 1553 /* The socket must be in connected or bound table 1554 * otherwise send reset back 1555 */ 1556 sk = vsock_find_connected_socket(&src, &dst); 1557 if (!sk) { 1558 sk = vsock_find_bound_socket(&dst); 1559 if (!sk) { 1560 (void)virtio_transport_reset_no_sock(t, skb); 1561 goto free_pkt; 1562 } 1563 } 1564 1565 if (virtio_transport_get_type(sk) != le16_to_cpu(hdr->type)) { 1566 (void)virtio_transport_reset_no_sock(t, skb); 1567 sock_put(sk); 1568 goto free_pkt; 1569 } 1570 1571 if (!skb_set_owner_sk_safe(skb, sk)) { 1572 WARN_ONCE(1, "receiving vsock socket has sk_refcnt == 0\n"); 1573 goto free_pkt; 1574 } 1575 1576 vsk = vsock_sk(sk); 1577 1578 lock_sock(sk); 1579 1580 /* Check if sk has been closed before lock_sock */ 1581 if (sock_flag(sk, SOCK_DONE)) { 1582 (void)virtio_transport_reset_no_sock(t, skb); 1583 release_sock(sk); 1584 sock_put(sk); 1585 goto free_pkt; 1586 } 1587 1588 space_available = virtio_transport_space_update(sk, skb); 1589 1590 /* Update CID in case it has changed after a transport reset event */ 1591 if (vsk->local_addr.svm_cid != VMADDR_CID_ANY) 1592 vsk->local_addr.svm_cid = dst.svm_cid; 1593 1594 if (space_available) 1595 sk->sk_write_space(sk); 1596 1597 switch (sk->sk_state) { 1598 case TCP_LISTEN: 1599 virtio_transport_recv_listen(sk, skb, t); 1600 kfree_skb(skb); 1601 break; 1602 case TCP_SYN_SENT: 1603 virtio_transport_recv_connecting(sk, skb); 1604 kfree_skb(skb); 1605 break; 1606 case TCP_ESTABLISHED: 1607 virtio_transport_recv_connected(sk, skb); 1608 break; 1609 case TCP_CLOSING: 1610 virtio_transport_recv_disconnecting(sk, skb); 1611 kfree_skb(skb); 1612 break; 1613 default: 1614 (void)virtio_transport_reset_no_sock(t, skb); 1615 kfree_skb(skb); 1616 break; 1617 } 1618 1619 release_sock(sk); 1620 1621 /* Release refcnt obtained when we fetched this socket out of the 1622 * bound or connected list. 1623 */ 1624 sock_put(sk); 1625 return; 1626 1627 free_pkt: 1628 kfree_skb(skb); 1629 } 1630 EXPORT_SYMBOL_GPL(virtio_transport_recv_pkt); 1631 1632 /* Remove skbs found in a queue that have a vsk that matches. 1633 * 1634 * Each skb is freed. 1635 * 1636 * Returns the count of skbs that were reply packets. 1637 */ 1638 int virtio_transport_purge_skbs(void *vsk, struct sk_buff_head *queue) 1639 { 1640 struct sk_buff_head freeme; 1641 struct sk_buff *skb, *tmp; 1642 int cnt = 0; 1643 1644 skb_queue_head_init(&freeme); 1645 1646 spin_lock_bh(&queue->lock); 1647 skb_queue_walk_safe(queue, skb, tmp) { 1648 if (vsock_sk(skb->sk) != vsk) 1649 continue; 1650 1651 __skb_unlink(skb, queue); 1652 __skb_queue_tail(&freeme, skb); 1653 1654 if (virtio_vsock_skb_reply(skb)) 1655 cnt++; 1656 } 1657 spin_unlock_bh(&queue->lock); 1658 1659 __skb_queue_purge(&freeme); 1660 1661 return cnt; 1662 } 1663 EXPORT_SYMBOL_GPL(virtio_transport_purge_skbs); 1664 1665 int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_actor) 1666 { 1667 struct virtio_vsock_sock *vvs = vsk->trans; 1668 struct sock *sk = sk_vsock(vsk); 1669 struct sk_buff *skb; 1670 int off = 0; 1671 int err; 1672 1673 spin_lock_bh(&vvs->rx_lock); 1674 /* Use __skb_recv_datagram() for race-free handling of the receive. It 1675 * works for types other than dgrams. 1676 */ 1677 skb = __skb_recv_datagram(sk, &vvs->rx_queue, MSG_DONTWAIT, &off, &err); 1678 spin_unlock_bh(&vvs->rx_lock); 1679 1680 if (!skb) 1681 return err; 1682 1683 return recv_actor(sk, skb); 1684 } 1685 EXPORT_SYMBOL_GPL(virtio_transport_read_skb); 1686 1687 MODULE_LICENSE("GPL v2"); 1688 MODULE_AUTHOR("Asias He"); 1689 MODULE_DESCRIPTION("common code for virtio vsock"); 1690