1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * common code for virtio vsock 4 * 5 * Copyright (C) 2013-2015 Red Hat, Inc. 6 * Author: Asias He <asias@redhat.com> 7 * Stefan Hajnoczi <stefanha@redhat.com> 8 */ 9 #include <linux/spinlock.h> 10 #include <linux/module.h> 11 #include <linux/sched/signal.h> 12 #include <linux/ctype.h> 13 #include <linux/list.h> 14 #include <linux/virtio_vsock.h> 15 #include <uapi/linux/vsockmon.h> 16 17 #include <net/sock.h> 18 #include <net/af_vsock.h> 19 20 #define CREATE_TRACE_POINTS 21 #include <trace/events/vsock_virtio_transport_common.h> 22 23 /* How long to wait for graceful shutdown of a connection */ 24 #define VSOCK_CLOSE_TIMEOUT (8 * HZ) 25 26 /* Threshold for detecting small packets to copy */ 27 #define GOOD_COPY_LEN 128 28 29 static void virtio_transport_cancel_close_work(struct vsock_sock *vsk, 30 bool cancel_timeout); 31 static s64 virtio_transport_has_space(struct virtio_vsock_sock *vvs); 32 33 static const struct virtio_transport * 34 virtio_transport_get_ops(struct vsock_sock *vsk) 35 { 36 const struct vsock_transport *t = vsock_core_get_transport(vsk); 37 38 if (WARN_ON(!t)) 39 return NULL; 40 41 return container_of(t, struct virtio_transport, transport); 42 } 43 44 static bool virtio_transport_can_zcopy(const struct virtio_transport *t_ops, 45 struct virtio_vsock_pkt_info *info, 46 size_t pkt_len) 47 { 48 struct iov_iter *iov_iter; 49 50 if (!info->msg) 51 return false; 52 53 iov_iter = &info->msg->msg_iter; 54 55 if (iov_iter->iov_offset) 56 return false; 57 58 /* We can't send whole iov. */ 59 if (iov_iter->count > pkt_len) 60 return false; 61 62 /* Check that transport can send data in zerocopy mode. */ 63 if (t_ops->can_msgzerocopy) { 64 int pages_to_send = iov_iter_npages(iov_iter, MAX_SKB_FRAGS); 65 66 /* +1 is for packet header. */ 67 return t_ops->can_msgzerocopy(pages_to_send + 1); 68 } 69 70 return true; 71 } 72 73 static int virtio_transport_init_zcopy_skb(struct vsock_sock *vsk, 74 struct sk_buff *skb, 75 struct msghdr *msg, 76 size_t pkt_len, 77 bool zerocopy) 78 { 79 struct ubuf_info *uarg; 80 81 if (msg->msg_ubuf) { 82 uarg = msg->msg_ubuf; 83 net_zcopy_get(uarg); 84 } else { 85 struct ubuf_info_msgzc *uarg_zc; 86 87 uarg = msg_zerocopy_realloc(sk_vsock(vsk), 88 pkt_len, NULL, false); 89 if (!uarg) 90 return -1; 91 92 uarg_zc = uarg_to_msgzc(uarg); 93 uarg_zc->zerocopy = zerocopy ? 1 : 0; 94 } 95 96 skb_zcopy_init(skb, uarg); 97 98 return 0; 99 } 100 101 static int virtio_transport_fill_skb(struct sk_buff *skb, 102 struct virtio_vsock_pkt_info *info, 103 size_t len, 104 bool zcopy) 105 { 106 struct msghdr *msg = info->msg; 107 108 if (zcopy) 109 return __zerocopy_sg_from_iter(msg, NULL, skb, 110 &msg->msg_iter, len, NULL); 111 112 virtio_vsock_skb_put(skb, len); 113 return skb_copy_datagram_from_iter_full(skb, 0, &msg->msg_iter, len); 114 } 115 116 static void virtio_transport_init_hdr(struct sk_buff *skb, 117 struct virtio_vsock_pkt_info *info, 118 size_t payload_len, 119 u32 src_cid, 120 u32 src_port, 121 u32 dst_cid, 122 u32 dst_port) 123 { 124 struct virtio_vsock_hdr *hdr; 125 126 hdr = virtio_vsock_hdr(skb); 127 hdr->type = cpu_to_le16(info->type); 128 hdr->op = cpu_to_le16(info->op); 129 hdr->src_cid = cpu_to_le64(src_cid); 130 hdr->dst_cid = cpu_to_le64(dst_cid); 131 hdr->src_port = cpu_to_le32(src_port); 132 hdr->dst_port = cpu_to_le32(dst_port); 133 hdr->flags = cpu_to_le32(info->flags); 134 hdr->len = cpu_to_le32(payload_len); 135 hdr->buf_alloc = cpu_to_le32(0); 136 hdr->fwd_cnt = cpu_to_le32(0); 137 } 138 139 /* Packet capture */ 140 static struct sk_buff *virtio_transport_build_skb(void *opaque) 141 { 142 struct virtio_vsock_hdr *pkt_hdr; 143 struct sk_buff *pkt = opaque; 144 struct af_vsockmon_hdr *hdr; 145 struct sk_buff *skb; 146 size_t payload_len; 147 148 /* A packet could be split to fit the RX buffer, so we use 149 * the payload length from the header, which has been updated 150 * by the sender to reflect the fragment size. 151 */ 152 pkt_hdr = virtio_vsock_hdr(pkt); 153 payload_len = le32_to_cpu(pkt_hdr->len); 154 155 skb = alloc_skb(sizeof(*hdr) + sizeof(*pkt_hdr) + payload_len, 156 GFP_ATOMIC); 157 if (!skb) 158 return NULL; 159 160 hdr = skb_put(skb, sizeof(*hdr)); 161 162 /* pkt->hdr is little-endian so no need to byteswap here */ 163 hdr->src_cid = pkt_hdr->src_cid; 164 hdr->src_port = pkt_hdr->src_port; 165 hdr->dst_cid = pkt_hdr->dst_cid; 166 hdr->dst_port = pkt_hdr->dst_port; 167 168 hdr->transport = cpu_to_le16(AF_VSOCK_TRANSPORT_VIRTIO); 169 hdr->len = cpu_to_le16(sizeof(*pkt_hdr)); 170 memset(hdr->reserved, 0, sizeof(hdr->reserved)); 171 172 switch (le16_to_cpu(pkt_hdr->op)) { 173 case VIRTIO_VSOCK_OP_REQUEST: 174 case VIRTIO_VSOCK_OP_RESPONSE: 175 hdr->op = cpu_to_le16(AF_VSOCK_OP_CONNECT); 176 break; 177 case VIRTIO_VSOCK_OP_RST: 178 case VIRTIO_VSOCK_OP_SHUTDOWN: 179 hdr->op = cpu_to_le16(AF_VSOCK_OP_DISCONNECT); 180 break; 181 case VIRTIO_VSOCK_OP_RW: 182 hdr->op = cpu_to_le16(AF_VSOCK_OP_PAYLOAD); 183 break; 184 case VIRTIO_VSOCK_OP_CREDIT_UPDATE: 185 case VIRTIO_VSOCK_OP_CREDIT_REQUEST: 186 hdr->op = cpu_to_le16(AF_VSOCK_OP_CONTROL); 187 break; 188 default: 189 hdr->op = cpu_to_le16(AF_VSOCK_OP_UNKNOWN); 190 break; 191 } 192 193 skb_put_data(skb, pkt_hdr, sizeof(*pkt_hdr)); 194 195 if (payload_len) { 196 struct iov_iter iov_iter; 197 struct kvec kvec; 198 void *data = skb_put(skb, payload_len); 199 200 kvec.iov_base = data; 201 kvec.iov_len = payload_len; 202 iov_iter_kvec(&iov_iter, ITER_DEST, &kvec, 1, payload_len); 203 204 if (skb_copy_datagram_iter(pkt, VIRTIO_VSOCK_SKB_CB(pkt)->offset, 205 &iov_iter, payload_len)) { 206 kfree_skb(skb); 207 return NULL; 208 } 209 } 210 211 return skb; 212 } 213 214 void virtio_transport_deliver_tap_pkt(struct sk_buff *skb) 215 { 216 if (virtio_vsock_skb_tap_delivered(skb)) 217 return; 218 219 vsock_deliver_tap(virtio_transport_build_skb, skb); 220 virtio_vsock_skb_set_tap_delivered(skb); 221 } 222 EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt); 223 224 static u16 virtio_transport_get_type(struct sock *sk) 225 { 226 if (sk->sk_type == SOCK_STREAM) 227 return VIRTIO_VSOCK_TYPE_STREAM; 228 else 229 return VIRTIO_VSOCK_TYPE_SEQPACKET; 230 } 231 232 /* Returns new sk_buff on success, otherwise returns NULL. */ 233 static struct sk_buff *virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *info, 234 size_t payload_len, 235 bool zcopy, 236 u32 src_cid, 237 u32 src_port, 238 u32 dst_cid, 239 u32 dst_port) 240 { 241 struct vsock_sock *vsk; 242 struct sk_buff *skb; 243 size_t skb_len; 244 245 skb_len = VIRTIO_VSOCK_SKB_HEADROOM; 246 247 if (!zcopy) 248 skb_len += payload_len; 249 250 skb = virtio_vsock_alloc_skb(skb_len, GFP_KERNEL); 251 if (!skb) 252 return NULL; 253 254 virtio_transport_init_hdr(skb, info, payload_len, src_cid, src_port, 255 dst_cid, dst_port); 256 257 vsk = info->vsk; 258 259 /* If 'vsk' != NULL then payload is always present, so we 260 * will never call '__zerocopy_sg_from_iter()' below without 261 * setting skb owner in 'skb_set_owner_w()'. The only case 262 * when 'vsk' == NULL is VIRTIO_VSOCK_OP_RST control message 263 * without payload. 264 */ 265 WARN_ON_ONCE(!(vsk && (info->msg && payload_len)) && zcopy); 266 267 /* Set owner here, because '__zerocopy_sg_from_iter()' uses 268 * owner of skb without check to update 'sk_wmem_alloc'. 269 */ 270 if (vsk) 271 skb_set_owner_w(skb, sk_vsock(vsk)); 272 273 if (info->msg && payload_len > 0) { 274 int err; 275 276 err = virtio_transport_fill_skb(skb, info, payload_len, zcopy); 277 if (err) 278 goto out; 279 280 if (msg_data_left(info->msg) == 0 && 281 info->type == VIRTIO_VSOCK_TYPE_SEQPACKET) { 282 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); 283 284 hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM); 285 286 if (info->msg->msg_flags & MSG_EOR) 287 hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR); 288 } 289 } 290 291 if (info->reply) 292 virtio_vsock_skb_set_reply(skb); 293 294 trace_virtio_transport_alloc_pkt(src_cid, src_port, 295 dst_cid, dst_port, 296 payload_len, 297 info->type, 298 info->op, 299 info->flags, 300 zcopy); 301 302 return skb; 303 out: 304 kfree_skb(skb); 305 return NULL; 306 } 307 308 /* This function can only be used on connecting/connected sockets, 309 * since a socket assigned to a transport is required. 310 * 311 * Do not use on listener sockets! 312 */ 313 static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, 314 struct virtio_vsock_pkt_info *info) 315 { 316 u32 max_skb_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; 317 u32 src_cid, src_port, dst_cid, dst_port; 318 const struct virtio_transport *t_ops; 319 struct virtio_vsock_sock *vvs; 320 u32 pkt_len = info->pkt_len; 321 bool can_zcopy = false; 322 u32 rest_len; 323 int ret; 324 325 info->type = virtio_transport_get_type(sk_vsock(vsk)); 326 327 t_ops = virtio_transport_get_ops(vsk); 328 if (unlikely(!t_ops)) 329 return -EFAULT; 330 331 src_cid = t_ops->transport.get_local_cid(); 332 src_port = vsk->local_addr.svm_port; 333 if (!info->remote_cid) { 334 dst_cid = vsk->remote_addr.svm_cid; 335 dst_port = vsk->remote_addr.svm_port; 336 } else { 337 dst_cid = info->remote_cid; 338 dst_port = info->remote_port; 339 } 340 341 vvs = vsk->trans; 342 343 /* virtio_transport_get_credit might return less than pkt_len credit */ 344 pkt_len = virtio_transport_get_credit(vvs, pkt_len); 345 346 /* Do not send zero length OP_RW pkt */ 347 if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) 348 return pkt_len; 349 350 if (info->msg) { 351 /* If zerocopy is not enabled by 'setsockopt()', we behave as 352 * there is no MSG_ZEROCOPY flag set. 353 */ 354 if (!sock_flag(sk_vsock(vsk), SOCK_ZEROCOPY)) 355 info->msg->msg_flags &= ~MSG_ZEROCOPY; 356 357 if (info->msg->msg_flags & MSG_ZEROCOPY) 358 can_zcopy = virtio_transport_can_zcopy(t_ops, info, pkt_len); 359 360 if (can_zcopy) 361 max_skb_len = min_t(u32, VIRTIO_VSOCK_MAX_PKT_BUF_SIZE, 362 (MAX_SKB_FRAGS * PAGE_SIZE)); 363 } 364 365 rest_len = pkt_len; 366 367 do { 368 struct sk_buff *skb; 369 size_t skb_len; 370 371 skb_len = min(max_skb_len, rest_len); 372 373 skb = virtio_transport_alloc_skb(info, skb_len, can_zcopy, 374 src_cid, src_port, 375 dst_cid, dst_port); 376 if (!skb) { 377 ret = -ENOMEM; 378 break; 379 } 380 381 /* We process buffer part by part, allocating skb on 382 * each iteration. If this is last skb for this buffer 383 * and MSG_ZEROCOPY mode is in use - we must allocate 384 * completion for the current syscall. 385 * 386 * Pass pkt_len because msg iter is already consumed 387 * by virtio_transport_fill_skb(), so iter->count 388 * can not be used for RLIMIT_MEMLOCK pinned-pages 389 * accounting done by msg_zerocopy_realloc(). 390 */ 391 if (info->msg && info->msg->msg_flags & MSG_ZEROCOPY && 392 skb_len == rest_len && info->op == VIRTIO_VSOCK_OP_RW) { 393 if (virtio_transport_init_zcopy_skb(vsk, skb, 394 info->msg, 395 pkt_len, 396 can_zcopy)) { 397 kfree_skb(skb); 398 ret = -ENOMEM; 399 break; 400 } 401 } 402 403 virtio_transport_inc_tx_pkt(vvs, skb); 404 405 ret = t_ops->send_pkt(skb, info->net); 406 if (ret < 0) 407 break; 408 409 /* Both virtio and vhost 'send_pkt()' returns 'skb_len', 410 * but for reliability use 'ret' instead of 'skb_len'. 411 * Also if partial send happens (e.g. 'ret' != 'skb_len') 412 * somehow, we break this loop, but account such returned 413 * value in 'virtio_transport_put_credit()'. 414 */ 415 rest_len -= ret; 416 417 if (WARN_ONCE(ret != skb_len, 418 "'send_pkt()' returns %i, but %zu expected\n", 419 ret, skb_len)) 420 break; 421 } while (rest_len); 422 423 virtio_transport_put_credit(vvs, rest_len); 424 425 /* Return number of bytes, if any data has been sent. */ 426 if (rest_len != pkt_len) 427 ret = pkt_len - rest_len; 428 429 return ret; 430 } 431 432 static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs, 433 u32 len) 434 { 435 u64 skb_overhead = (skb_queue_len(&vvs->rx_queue) + 1) * SKB_TRUESIZE(0); 436 437 if (skb_overhead + vvs->buf_used + len > vvs->buf_alloc) 438 return false; 439 440 vvs->rx_bytes += len; 441 vvs->buf_used += len; 442 return true; 443 } 444 445 static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs, 446 u32 bytes_read, u32 bytes_dequeued) 447 { 448 vvs->rx_bytes -= bytes_read; 449 vvs->buf_used -= bytes_dequeued; 450 vvs->fwd_cnt += bytes_dequeued; 451 } 452 453 void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct sk_buff *skb) 454 { 455 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); 456 457 spin_lock_bh(&vvs->rx_lock); 458 vvs->last_fwd_cnt = vvs->fwd_cnt; 459 hdr->fwd_cnt = cpu_to_le32(vvs->fwd_cnt); 460 hdr->buf_alloc = cpu_to_le32(vvs->buf_alloc); 461 spin_unlock_bh(&vvs->rx_lock); 462 } 463 EXPORT_SYMBOL_GPL(virtio_transport_inc_tx_pkt); 464 465 void virtio_transport_consume_skb_sent(struct sk_buff *skb, bool consume) 466 { 467 struct sock *s = skb->sk; 468 469 if (s && skb->len) { 470 struct vsock_sock *vs = vsock_sk(s); 471 struct virtio_vsock_sock *vvs; 472 473 vvs = vs->trans; 474 475 spin_lock_bh(&vvs->tx_lock); 476 vvs->bytes_unsent -= skb->len; 477 spin_unlock_bh(&vvs->tx_lock); 478 } 479 480 if (consume) 481 consume_skb(skb); 482 } 483 EXPORT_SYMBOL_GPL(virtio_transport_consume_skb_sent); 484 485 u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 credit) 486 { 487 u32 ret; 488 489 if (!credit) 490 return 0; 491 492 spin_lock_bh(&vvs->tx_lock); 493 ret = min_t(u32, credit, virtio_transport_has_space(vvs)); 494 vvs->tx_cnt += ret; 495 vvs->bytes_unsent += ret; 496 spin_unlock_bh(&vvs->tx_lock); 497 498 return ret; 499 } 500 EXPORT_SYMBOL_GPL(virtio_transport_get_credit); 501 502 void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit) 503 { 504 if (!credit) 505 return; 506 507 spin_lock_bh(&vvs->tx_lock); 508 vvs->tx_cnt -= credit; 509 vvs->bytes_unsent -= credit; 510 spin_unlock_bh(&vvs->tx_lock); 511 } 512 EXPORT_SYMBOL_GPL(virtio_transport_put_credit); 513 514 static int virtio_transport_send_credit_update(struct vsock_sock *vsk) 515 { 516 struct virtio_vsock_pkt_info info = { 517 .op = VIRTIO_VSOCK_OP_CREDIT_UPDATE, 518 .vsk = vsk, 519 .net = sock_net(sk_vsock(vsk)), 520 }; 521 522 return virtio_transport_send_pkt_info(vsk, &info); 523 } 524 525 static ssize_t 526 virtio_transport_stream_do_peek(struct vsock_sock *vsk, 527 struct msghdr *msg, 528 size_t len) 529 { 530 struct virtio_vsock_sock *vvs = vsk->trans; 531 struct sk_buff *skb; 532 size_t total = 0; 533 int err; 534 535 spin_lock_bh(&vvs->rx_lock); 536 537 skb_queue_walk(&vvs->rx_queue, skb) { 538 size_t bytes; 539 540 bytes = min_t(size_t, len - total, 541 skb->len - VIRTIO_VSOCK_SKB_CB(skb)->offset); 542 543 spin_unlock_bh(&vvs->rx_lock); 544 545 /* sk_lock is held by caller so no one else can dequeue. 546 * Unlock rx_lock since skb_copy_datagram_iter() may sleep. 547 */ 548 err = skb_copy_datagram_iter(skb, VIRTIO_VSOCK_SKB_CB(skb)->offset, 549 &msg->msg_iter, bytes); 550 if (err) 551 goto out; 552 553 total += bytes; 554 555 spin_lock_bh(&vvs->rx_lock); 556 557 if (total == len) 558 break; 559 } 560 561 spin_unlock_bh(&vvs->rx_lock); 562 563 return total; 564 565 out: 566 if (total) 567 err = total; 568 return err; 569 } 570 571 static ssize_t 572 virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, 573 struct msghdr *msg, 574 size_t len) 575 { 576 struct virtio_vsock_sock *vvs = vsk->trans; 577 struct sk_buff *skb; 578 u32 fwd_cnt_delta; 579 bool low_rx_bytes; 580 int err = -EFAULT; 581 size_t total = 0; 582 u32 free_space; 583 584 spin_lock_bh(&vvs->rx_lock); 585 586 if (WARN_ONCE(skb_queue_empty(&vvs->rx_queue) && vvs->rx_bytes, 587 "rx_queue is empty, but rx_bytes is non-zero\n")) { 588 spin_unlock_bh(&vvs->rx_lock); 589 return err; 590 } 591 592 while (total < len && !skb_queue_empty(&vvs->rx_queue)) { 593 size_t bytes, dequeued = 0; 594 595 skb = skb_peek(&vvs->rx_queue); 596 597 bytes = min_t(size_t, len - total, 598 skb->len - VIRTIO_VSOCK_SKB_CB(skb)->offset); 599 600 /* sk_lock is held by caller so no one else can dequeue. 601 * Unlock rx_lock since skb_copy_datagram_iter() may sleep. 602 */ 603 spin_unlock_bh(&vvs->rx_lock); 604 605 err = skb_copy_datagram_iter(skb, 606 VIRTIO_VSOCK_SKB_CB(skb)->offset, 607 &msg->msg_iter, bytes); 608 if (err) 609 goto out; 610 611 spin_lock_bh(&vvs->rx_lock); 612 613 total += bytes; 614 615 VIRTIO_VSOCK_SKB_CB(skb)->offset += bytes; 616 617 if (skb->len == VIRTIO_VSOCK_SKB_CB(skb)->offset) { 618 dequeued = le32_to_cpu(virtio_vsock_hdr(skb)->len); 619 __skb_unlink(skb, &vvs->rx_queue); 620 consume_skb(skb); 621 } 622 623 virtio_transport_dec_rx_pkt(vvs, bytes, dequeued); 624 } 625 626 fwd_cnt_delta = vvs->fwd_cnt - vvs->last_fwd_cnt; 627 free_space = vvs->buf_alloc - fwd_cnt_delta; 628 low_rx_bytes = (vvs->rx_bytes < 629 sock_rcvlowat(sk_vsock(vsk), 0, INT_MAX)); 630 631 spin_unlock_bh(&vvs->rx_lock); 632 633 /* To reduce the number of credit update messages, 634 * don't update credits as long as lots of space is available. 635 * Note: the limit chosen here is arbitrary. Setting the limit 636 * too high causes extra messages. Too low causes transmitter 637 * stalls. As stalls are in theory more expensive than extra 638 * messages, we set the limit to a high value. TODO: experiment 639 * with different values. Also send credit update message when 640 * number of bytes in rx queue is not enough to wake up reader. 641 */ 642 if (fwd_cnt_delta && 643 (free_space < VIRTIO_VSOCK_MAX_PKT_BUF_SIZE || low_rx_bytes)) 644 virtio_transport_send_credit_update(vsk); 645 646 return total; 647 648 out: 649 if (total) 650 err = total; 651 return err; 652 } 653 654 static ssize_t 655 virtio_transport_seqpacket_do_peek(struct vsock_sock *vsk, 656 struct msghdr *msg) 657 { 658 struct virtio_vsock_sock *vvs = vsk->trans; 659 struct sk_buff *skb; 660 size_t total, len; 661 662 spin_lock_bh(&vvs->rx_lock); 663 664 if (!vvs->msg_count) { 665 spin_unlock_bh(&vvs->rx_lock); 666 return 0; 667 } 668 669 total = 0; 670 len = msg_data_left(msg); 671 672 skb_queue_walk(&vvs->rx_queue, skb) { 673 struct virtio_vsock_hdr *hdr; 674 675 if (total < len) { 676 size_t bytes; 677 int err; 678 679 bytes = len - total; 680 if (bytes > skb->len) 681 bytes = skb->len; 682 683 spin_unlock_bh(&vvs->rx_lock); 684 685 /* sk_lock is held by caller so no one else can dequeue. 686 * Unlock rx_lock since skb_copy_datagram_iter() may sleep. 687 */ 688 err = skb_copy_datagram_iter(skb, VIRTIO_VSOCK_SKB_CB(skb)->offset, 689 &msg->msg_iter, bytes); 690 if (err) 691 return err; 692 693 spin_lock_bh(&vvs->rx_lock); 694 } 695 696 total += skb->len; 697 hdr = virtio_vsock_hdr(skb); 698 699 if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM) { 700 if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOR) 701 msg->msg_flags |= MSG_EOR; 702 703 break; 704 } 705 } 706 707 spin_unlock_bh(&vvs->rx_lock); 708 709 return total; 710 } 711 712 static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk, 713 struct msghdr *msg, 714 int flags) 715 { 716 struct virtio_vsock_sock *vvs = vsk->trans; 717 int dequeued_len = 0; 718 size_t user_buf_len = msg_data_left(msg); 719 bool msg_ready = false; 720 struct sk_buff *skb; 721 722 spin_lock_bh(&vvs->rx_lock); 723 724 if (vvs->msg_count == 0) { 725 spin_unlock_bh(&vvs->rx_lock); 726 return 0; 727 } 728 729 while (!msg_ready) { 730 struct virtio_vsock_hdr *hdr; 731 size_t pkt_len; 732 733 skb = __skb_dequeue(&vvs->rx_queue); 734 if (!skb) 735 break; 736 hdr = virtio_vsock_hdr(skb); 737 pkt_len = (size_t)le32_to_cpu(hdr->len); 738 739 if (dequeued_len >= 0) { 740 size_t bytes_to_copy; 741 742 bytes_to_copy = min(user_buf_len, pkt_len); 743 744 if (bytes_to_copy) { 745 int err; 746 747 /* sk_lock is held by caller so no one else can dequeue. 748 * Unlock rx_lock since skb_copy_datagram_iter() may sleep. 749 */ 750 spin_unlock_bh(&vvs->rx_lock); 751 752 err = skb_copy_datagram_iter(skb, 0, 753 &msg->msg_iter, 754 bytes_to_copy); 755 if (err) { 756 /* Copy of message failed. Rest of 757 * fragments will be freed without copy. 758 */ 759 dequeued_len = err; 760 } else { 761 user_buf_len -= bytes_to_copy; 762 } 763 764 spin_lock_bh(&vvs->rx_lock); 765 } 766 767 if (dequeued_len >= 0) 768 dequeued_len += pkt_len; 769 } 770 771 if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM) { 772 msg_ready = true; 773 vvs->msg_count--; 774 775 if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOR) 776 msg->msg_flags |= MSG_EOR; 777 } 778 779 virtio_transport_dec_rx_pkt(vvs, pkt_len, pkt_len); 780 kfree_skb(skb); 781 } 782 783 spin_unlock_bh(&vvs->rx_lock); 784 785 virtio_transport_send_credit_update(vsk); 786 787 return dequeued_len; 788 } 789 790 ssize_t 791 virtio_transport_stream_dequeue(struct vsock_sock *vsk, 792 struct msghdr *msg, 793 size_t len, int flags) 794 { 795 if (flags & MSG_PEEK) 796 return virtio_transport_stream_do_peek(vsk, msg, len); 797 else 798 return virtio_transport_stream_do_dequeue(vsk, msg, len); 799 } 800 EXPORT_SYMBOL_GPL(virtio_transport_stream_dequeue); 801 802 ssize_t 803 virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk, 804 struct msghdr *msg, 805 int flags) 806 { 807 if (flags & MSG_PEEK) 808 return virtio_transport_seqpacket_do_peek(vsk, msg); 809 else 810 return virtio_transport_seqpacket_do_dequeue(vsk, msg, flags); 811 } 812 EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue); 813 814 static u32 virtio_transport_tx_buf_size(struct virtio_vsock_sock *vvs) 815 { 816 /* The peer advertises its receive buffer via peer_buf_alloc, but we 817 * cap it to our local buf_alloc so a remote peer cannot force us to 818 * queue more data than our own buffer configuration allows. 819 */ 820 return min(vvs->peer_buf_alloc, vvs->buf_alloc); 821 } 822 823 int 824 virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk, 825 struct msghdr *msg, 826 size_t len) 827 { 828 struct virtio_vsock_sock *vvs = vsk->trans; 829 830 spin_lock_bh(&vvs->tx_lock); 831 832 if (len > virtio_transport_tx_buf_size(vvs)) { 833 spin_unlock_bh(&vvs->tx_lock); 834 return -EMSGSIZE; 835 } 836 837 spin_unlock_bh(&vvs->tx_lock); 838 839 return virtio_transport_stream_enqueue(vsk, msg, len); 840 } 841 EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_enqueue); 842 843 int 844 virtio_transport_dgram_dequeue(struct vsock_sock *vsk, 845 struct msghdr *msg, 846 size_t len, int flags) 847 { 848 return -EOPNOTSUPP; 849 } 850 EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue); 851 852 s64 virtio_transport_stream_has_data(struct vsock_sock *vsk) 853 { 854 struct virtio_vsock_sock *vvs = vsk->trans; 855 s64 bytes; 856 857 spin_lock_bh(&vvs->rx_lock); 858 bytes = vvs->rx_bytes; 859 spin_unlock_bh(&vvs->rx_lock); 860 861 return bytes; 862 } 863 EXPORT_SYMBOL_GPL(virtio_transport_stream_has_data); 864 865 u32 virtio_transport_seqpacket_has_data(struct vsock_sock *vsk) 866 { 867 struct virtio_vsock_sock *vvs = vsk->trans; 868 u32 msg_count; 869 870 spin_lock_bh(&vvs->rx_lock); 871 msg_count = vvs->msg_count; 872 spin_unlock_bh(&vvs->rx_lock); 873 874 return msg_count; 875 } 876 EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_has_data); 877 878 static s64 virtio_transport_has_space(struct virtio_vsock_sock *vvs) 879 { 880 s64 bytes; 881 882 /* Use s64 arithmetic so if the peer shrinks peer_buf_alloc while 883 * we have bytes in flight (tx_cnt - peer_fwd_cnt), the subtraction 884 * does not underflow. 885 */ 886 bytes = (s64)virtio_transport_tx_buf_size(vvs) - 887 (vvs->tx_cnt - vvs->peer_fwd_cnt); 888 if (bytes < 0) 889 bytes = 0; 890 891 return bytes; 892 } 893 894 s64 virtio_transport_stream_has_space(struct vsock_sock *vsk) 895 { 896 struct virtio_vsock_sock *vvs = vsk->trans; 897 s64 bytes; 898 899 spin_lock_bh(&vvs->tx_lock); 900 bytes = virtio_transport_has_space(vvs); 901 spin_unlock_bh(&vvs->tx_lock); 902 903 return bytes; 904 } 905 EXPORT_SYMBOL_GPL(virtio_transport_stream_has_space); 906 907 int virtio_transport_do_socket_init(struct vsock_sock *vsk, 908 struct vsock_sock *psk) 909 { 910 struct virtio_vsock_sock *vvs; 911 912 vvs = kzalloc_obj(*vvs); 913 if (!vvs) 914 return -ENOMEM; 915 916 vsk->trans = vvs; 917 vvs->vsk = vsk; 918 if (psk && psk->trans) { 919 struct virtio_vsock_sock *ptrans = psk->trans; 920 921 vvs->peer_buf_alloc = ptrans->peer_buf_alloc; 922 } 923 924 if (vsk->buffer_size > VIRTIO_VSOCK_MAX_BUF_SIZE) 925 vsk->buffer_size = VIRTIO_VSOCK_MAX_BUF_SIZE; 926 927 vvs->buf_alloc = vsk->buffer_size; 928 929 spin_lock_init(&vvs->rx_lock); 930 spin_lock_init(&vvs->tx_lock); 931 skb_queue_head_init(&vvs->rx_queue); 932 933 return 0; 934 } 935 EXPORT_SYMBOL_GPL(virtio_transport_do_socket_init); 936 937 /* sk_lock held by the caller */ 938 void virtio_transport_notify_buffer_size(struct vsock_sock *vsk, u64 *val) 939 { 940 struct virtio_vsock_sock *vvs = vsk->trans; 941 942 if (*val > VIRTIO_VSOCK_MAX_BUF_SIZE) 943 *val = VIRTIO_VSOCK_MAX_BUF_SIZE; 944 945 vvs->buf_alloc = *val; 946 947 virtio_transport_send_credit_update(vsk); 948 } 949 EXPORT_SYMBOL_GPL(virtio_transport_notify_buffer_size); 950 951 int 952 virtio_transport_notify_poll_in(struct vsock_sock *vsk, 953 size_t target, 954 bool *data_ready_now) 955 { 956 *data_ready_now = vsock_stream_has_data(vsk) >= target; 957 958 return 0; 959 } 960 EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_in); 961 962 int 963 virtio_transport_notify_poll_out(struct vsock_sock *vsk, 964 size_t target, 965 bool *space_avail_now) 966 { 967 s64 free_space; 968 969 free_space = vsock_stream_has_space(vsk); 970 if (free_space > 0) 971 *space_avail_now = true; 972 else if (free_space == 0) 973 *space_avail_now = false; 974 975 return 0; 976 } 977 EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_out); 978 979 int virtio_transport_notify_recv_init(struct vsock_sock *vsk, 980 size_t target, struct vsock_transport_recv_notify_data *data) 981 { 982 return 0; 983 } 984 EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_init); 985 986 int virtio_transport_notify_recv_pre_block(struct vsock_sock *vsk, 987 size_t target, struct vsock_transport_recv_notify_data *data) 988 { 989 return 0; 990 } 991 EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_block); 992 993 int virtio_transport_notify_recv_pre_dequeue(struct vsock_sock *vsk, 994 size_t target, struct vsock_transport_recv_notify_data *data) 995 { 996 return 0; 997 } 998 EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_dequeue); 999 1000 int virtio_transport_notify_recv_post_dequeue(struct vsock_sock *vsk, 1001 size_t target, ssize_t copied, bool data_read, 1002 struct vsock_transport_recv_notify_data *data) 1003 { 1004 return 0; 1005 } 1006 EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_post_dequeue); 1007 1008 int virtio_transport_notify_send_init(struct vsock_sock *vsk, 1009 struct vsock_transport_send_notify_data *data) 1010 { 1011 return 0; 1012 } 1013 EXPORT_SYMBOL_GPL(virtio_transport_notify_send_init); 1014 1015 int virtio_transport_notify_send_pre_block(struct vsock_sock *vsk, 1016 struct vsock_transport_send_notify_data *data) 1017 { 1018 return 0; 1019 } 1020 EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_block); 1021 1022 int virtio_transport_notify_send_pre_enqueue(struct vsock_sock *vsk, 1023 struct vsock_transport_send_notify_data *data) 1024 { 1025 return 0; 1026 } 1027 EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_enqueue); 1028 1029 int virtio_transport_notify_send_post_enqueue(struct vsock_sock *vsk, 1030 ssize_t written, struct vsock_transport_send_notify_data *data) 1031 { 1032 return 0; 1033 } 1034 EXPORT_SYMBOL_GPL(virtio_transport_notify_send_post_enqueue); 1035 1036 u64 virtio_transport_stream_rcvhiwat(struct vsock_sock *vsk) 1037 { 1038 return vsk->buffer_size; 1039 } 1040 EXPORT_SYMBOL_GPL(virtio_transport_stream_rcvhiwat); 1041 1042 bool virtio_transport_stream_is_active(struct vsock_sock *vsk) 1043 { 1044 return true; 1045 } 1046 EXPORT_SYMBOL_GPL(virtio_transport_stream_is_active); 1047 1048 int virtio_transport_dgram_bind(struct vsock_sock *vsk, 1049 struct sockaddr_vm *addr) 1050 { 1051 return -EOPNOTSUPP; 1052 } 1053 EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind); 1054 1055 bool virtio_transport_dgram_allow(struct vsock_sock *vsk, u32 cid, u32 port) 1056 { 1057 return false; 1058 } 1059 EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow); 1060 1061 int virtio_transport_connect(struct vsock_sock *vsk) 1062 { 1063 struct virtio_vsock_pkt_info info = { 1064 .op = VIRTIO_VSOCK_OP_REQUEST, 1065 .vsk = vsk, 1066 .net = sock_net(sk_vsock(vsk)), 1067 }; 1068 1069 return virtio_transport_send_pkt_info(vsk, &info); 1070 } 1071 EXPORT_SYMBOL_GPL(virtio_transport_connect); 1072 1073 int virtio_transport_shutdown(struct vsock_sock *vsk, int mode) 1074 { 1075 struct virtio_vsock_pkt_info info = { 1076 .op = VIRTIO_VSOCK_OP_SHUTDOWN, 1077 .flags = (mode & RCV_SHUTDOWN ? 1078 VIRTIO_VSOCK_SHUTDOWN_RCV : 0) | 1079 (mode & SEND_SHUTDOWN ? 1080 VIRTIO_VSOCK_SHUTDOWN_SEND : 0), 1081 .vsk = vsk, 1082 .net = sock_net(sk_vsock(vsk)), 1083 }; 1084 1085 return virtio_transport_send_pkt_info(vsk, &info); 1086 } 1087 EXPORT_SYMBOL_GPL(virtio_transport_shutdown); 1088 1089 int 1090 virtio_transport_dgram_enqueue(struct vsock_sock *vsk, 1091 struct sockaddr_vm *remote_addr, 1092 struct msghdr *msg, 1093 size_t dgram_len) 1094 { 1095 return -EOPNOTSUPP; 1096 } 1097 EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue); 1098 1099 ssize_t 1100 virtio_transport_stream_enqueue(struct vsock_sock *vsk, 1101 struct msghdr *msg, 1102 size_t len) 1103 { 1104 struct virtio_vsock_pkt_info info = { 1105 .op = VIRTIO_VSOCK_OP_RW, 1106 .msg = msg, 1107 .pkt_len = len, 1108 .vsk = vsk, 1109 .net = sock_net(sk_vsock(vsk)), 1110 }; 1111 1112 return virtio_transport_send_pkt_info(vsk, &info); 1113 } 1114 EXPORT_SYMBOL_GPL(virtio_transport_stream_enqueue); 1115 1116 void virtio_transport_destruct(struct vsock_sock *vsk) 1117 { 1118 struct virtio_vsock_sock *vvs = vsk->trans; 1119 1120 virtio_transport_cancel_close_work(vsk, true); 1121 1122 kfree(vvs); 1123 vsk->trans = NULL; 1124 } 1125 EXPORT_SYMBOL_GPL(virtio_transport_destruct); 1126 1127 ssize_t virtio_transport_unsent_bytes(struct vsock_sock *vsk) 1128 { 1129 struct virtio_vsock_sock *vvs = vsk->trans; 1130 size_t ret; 1131 1132 spin_lock_bh(&vvs->tx_lock); 1133 ret = vvs->bytes_unsent; 1134 spin_unlock_bh(&vvs->tx_lock); 1135 1136 return ret; 1137 } 1138 EXPORT_SYMBOL_GPL(virtio_transport_unsent_bytes); 1139 1140 static int virtio_transport_reset(struct vsock_sock *vsk, 1141 struct sk_buff *skb) 1142 { 1143 struct virtio_vsock_pkt_info info = { 1144 .op = VIRTIO_VSOCK_OP_RST, 1145 .reply = !!skb, 1146 .vsk = vsk, 1147 .net = sock_net(sk_vsock(vsk)), 1148 }; 1149 1150 /* Send RST only if the original pkt is not a RST pkt */ 1151 if (skb && le16_to_cpu(virtio_vsock_hdr(skb)->op) == VIRTIO_VSOCK_OP_RST) 1152 return 0; 1153 1154 return virtio_transport_send_pkt_info(vsk, &info); 1155 } 1156 1157 /* Normally packets are associated with a socket. There may be no socket if an 1158 * attempt was made to connect to a socket that does not exist. 1159 * 1160 * net refers to the namespace of whoever sent the invalid message. For 1161 * loopback, this is the namespace of the socket. For vhost, this is the 1162 * namespace of the VM (i.e., vhost_vsock). 1163 */ 1164 static int virtio_transport_reset_no_sock(const struct virtio_transport *t, 1165 struct sk_buff *skb, struct net *net) 1166 { 1167 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); 1168 struct virtio_vsock_pkt_info info = { 1169 .op = VIRTIO_VSOCK_OP_RST, 1170 .type = le16_to_cpu(hdr->type), 1171 .reply = true, 1172 1173 /* Set sk owner to socket we are replying to (may be NULL for 1174 * non-loopback). This keeps a reference to the sock and 1175 * sock_net(sk) until the reply skb is freed. 1176 */ 1177 .vsk = vsock_sk(skb->sk), 1178 1179 /* net is not defined here because we pass it directly to 1180 * t->send_pkt(), instead of relying on 1181 * virtio_transport_send_pkt_info() to pass it. It is not needed 1182 * by virtio_transport_alloc_skb(). 1183 */ 1184 }; 1185 struct sk_buff *reply; 1186 1187 /* Send RST only if the original pkt is not a RST pkt */ 1188 if (le16_to_cpu(hdr->op) == VIRTIO_VSOCK_OP_RST) 1189 return 0; 1190 1191 if (!t) 1192 return -ENOTCONN; 1193 1194 reply = virtio_transport_alloc_skb(&info, 0, false, 1195 le64_to_cpu(hdr->dst_cid), 1196 le32_to_cpu(hdr->dst_port), 1197 le64_to_cpu(hdr->src_cid), 1198 le32_to_cpu(hdr->src_port)); 1199 if (!reply) 1200 return -ENOMEM; 1201 1202 return t->send_pkt(reply, net); 1203 } 1204 1205 /* This function should be called with sk_lock held and SOCK_DONE set */ 1206 static void virtio_transport_remove_sock(struct vsock_sock *vsk) 1207 { 1208 struct virtio_vsock_sock *vvs = vsk->trans; 1209 1210 /* We don't need to take rx_lock, as the socket is closing and we are 1211 * removing it. 1212 */ 1213 __skb_queue_purge(&vvs->rx_queue); 1214 vsock_remove_sock(vsk); 1215 } 1216 1217 static void virtio_transport_cancel_close_work(struct vsock_sock *vsk, 1218 bool cancel_timeout) 1219 { 1220 struct sock *sk = sk_vsock(vsk); 1221 1222 if (vsk->close_work_scheduled && 1223 (!cancel_timeout || cancel_delayed_work(&vsk->close_work))) { 1224 vsk->close_work_scheduled = false; 1225 1226 virtio_transport_remove_sock(vsk); 1227 1228 /* Release refcnt obtained when we scheduled the timeout */ 1229 sock_put(sk); 1230 } 1231 } 1232 1233 static void virtio_transport_do_close(struct vsock_sock *vsk, 1234 bool cancel_timeout) 1235 { 1236 struct sock *sk = sk_vsock(vsk); 1237 1238 sock_set_flag(sk, SOCK_DONE); 1239 vsk->peer_shutdown = SHUTDOWN_MASK; 1240 if (vsock_stream_has_data(vsk) <= 0) 1241 sk->sk_state = TCP_CLOSING; 1242 sk->sk_state_change(sk); 1243 1244 virtio_transport_cancel_close_work(vsk, cancel_timeout); 1245 } 1246 1247 static void virtio_transport_close_timeout(struct work_struct *work) 1248 { 1249 struct vsock_sock *vsk = 1250 container_of(work, struct vsock_sock, close_work.work); 1251 struct sock *sk = sk_vsock(vsk); 1252 1253 sock_hold(sk); 1254 lock_sock(sk); 1255 1256 if (!sock_flag(sk, SOCK_DONE)) { 1257 (void)virtio_transport_reset(vsk, NULL); 1258 1259 virtio_transport_do_close(vsk, false); 1260 } 1261 1262 vsk->close_work_scheduled = false; 1263 1264 release_sock(sk); 1265 sock_put(sk); 1266 } 1267 1268 /* User context, vsk->sk is locked */ 1269 static bool virtio_transport_close(struct vsock_sock *vsk) 1270 { 1271 struct sock *sk = &vsk->sk; 1272 1273 if (!(sk->sk_state == TCP_ESTABLISHED || 1274 sk->sk_state == TCP_CLOSING)) 1275 return true; 1276 1277 /* Already received SHUTDOWN from peer, reply with RST */ 1278 if ((vsk->peer_shutdown & SHUTDOWN_MASK) == SHUTDOWN_MASK) { 1279 (void)virtio_transport_reset(vsk, NULL); 1280 return true; 1281 } 1282 1283 if ((sk->sk_shutdown & SHUTDOWN_MASK) != SHUTDOWN_MASK) 1284 (void)virtio_transport_shutdown(vsk, SHUTDOWN_MASK); 1285 1286 if (!(current->flags & PF_EXITING)) 1287 vsock_linger(sk); 1288 1289 if (sock_flag(sk, SOCK_DONE)) { 1290 return true; 1291 } 1292 1293 sock_hold(sk); 1294 INIT_DELAYED_WORK(&vsk->close_work, 1295 virtio_transport_close_timeout); 1296 vsk->close_work_scheduled = true; 1297 schedule_delayed_work(&vsk->close_work, VSOCK_CLOSE_TIMEOUT); 1298 return false; 1299 } 1300 1301 void virtio_transport_release(struct vsock_sock *vsk) 1302 { 1303 struct sock *sk = &vsk->sk; 1304 bool remove_sock = true; 1305 1306 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) 1307 remove_sock = virtio_transport_close(vsk); 1308 1309 if (remove_sock) { 1310 sock_set_flag(sk, SOCK_DONE); 1311 virtio_transport_remove_sock(vsk); 1312 } 1313 } 1314 EXPORT_SYMBOL_GPL(virtio_transport_release); 1315 1316 static int 1317 virtio_transport_recv_connecting(struct sock *sk, 1318 struct sk_buff *skb) 1319 { 1320 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); 1321 struct vsock_sock *vsk = vsock_sk(sk); 1322 int skerr; 1323 int err; 1324 1325 switch (le16_to_cpu(hdr->op)) { 1326 case VIRTIO_VSOCK_OP_RESPONSE: 1327 sk->sk_state = TCP_ESTABLISHED; 1328 sk->sk_socket->state = SS_CONNECTED; 1329 vsock_insert_connected(vsk); 1330 sk->sk_state_change(sk); 1331 break; 1332 case VIRTIO_VSOCK_OP_INVALID: 1333 break; 1334 case VIRTIO_VSOCK_OP_RST: 1335 skerr = ECONNRESET; 1336 err = 0; 1337 goto destroy; 1338 default: 1339 skerr = EPROTO; 1340 err = -EINVAL; 1341 goto destroy; 1342 } 1343 return 0; 1344 1345 destroy: 1346 virtio_transport_reset(vsk, skb); 1347 sk->sk_state = TCP_CLOSE; 1348 sk->sk_err = skerr; 1349 sk_error_report(sk); 1350 return err; 1351 } 1352 1353 static void 1354 virtio_transport_recv_enqueue(struct vsock_sock *vsk, 1355 struct sk_buff *skb) 1356 { 1357 struct virtio_vsock_sock *vvs = vsk->trans; 1358 bool can_enqueue, free_pkt = false; 1359 struct virtio_vsock_hdr *hdr; 1360 u32 len; 1361 1362 hdr = virtio_vsock_hdr(skb); 1363 len = le32_to_cpu(hdr->len); 1364 1365 spin_lock_bh(&vvs->rx_lock); 1366 1367 can_enqueue = virtio_transport_inc_rx_pkt(vvs, len); 1368 if (!can_enqueue) { 1369 free_pkt = true; 1370 goto out; 1371 } 1372 1373 if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM) 1374 vvs->msg_count++; 1375 1376 /* Try to copy small packets into the buffer of last packet queued, 1377 * to avoid wasting memory queueing the entire buffer with a small 1378 * payload. Skip non-linear (e.g. zerocopy) skbs; these carry payload 1379 * in skb_shinfo. 1380 */ 1381 if (len <= GOOD_COPY_LEN && !skb_queue_empty(&vvs->rx_queue) && 1382 !skb_is_nonlinear(skb)) { 1383 struct virtio_vsock_hdr *last_hdr; 1384 struct sk_buff *last_skb; 1385 1386 last_skb = skb_peek_tail(&vvs->rx_queue); 1387 last_hdr = virtio_vsock_hdr(last_skb); 1388 1389 /* If there is space in the last packet queued, we copy the 1390 * new packet in its buffer. We avoid this if the last packet 1391 * queued has VIRTIO_VSOCK_SEQ_EOM set, because this is 1392 * delimiter of SEQPACKET message, so 'pkt' is the first packet 1393 * of a new message. 1394 */ 1395 if (skb->len < skb_tailroom(last_skb) && 1396 !(le32_to_cpu(last_hdr->flags) & VIRTIO_VSOCK_SEQ_EOM)) { 1397 memcpy(skb_put(last_skb, skb->len), skb->data, skb->len); 1398 free_pkt = true; 1399 last_hdr->flags |= hdr->flags; 1400 le32_add_cpu(&last_hdr->len, len); 1401 goto out; 1402 } 1403 } 1404 1405 __skb_queue_tail(&vvs->rx_queue, skb); 1406 1407 out: 1408 spin_unlock_bh(&vvs->rx_lock); 1409 if (free_pkt) 1410 kfree_skb(skb); 1411 } 1412 1413 static int 1414 virtio_transport_recv_connected(struct sock *sk, 1415 struct sk_buff *skb) 1416 { 1417 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); 1418 struct vsock_sock *vsk = vsock_sk(sk); 1419 int err = 0; 1420 1421 switch (le16_to_cpu(hdr->op)) { 1422 case VIRTIO_VSOCK_OP_RW: 1423 virtio_transport_recv_enqueue(vsk, skb); 1424 vsock_data_ready(sk); 1425 return err; 1426 case VIRTIO_VSOCK_OP_CREDIT_REQUEST: 1427 virtio_transport_send_credit_update(vsk); 1428 break; 1429 case VIRTIO_VSOCK_OP_CREDIT_UPDATE: 1430 sk->sk_write_space(sk); 1431 break; 1432 case VIRTIO_VSOCK_OP_SHUTDOWN: 1433 if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SHUTDOWN_RCV) 1434 vsk->peer_shutdown |= RCV_SHUTDOWN; 1435 if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SHUTDOWN_SEND) 1436 vsk->peer_shutdown |= SEND_SHUTDOWN; 1437 if (vsk->peer_shutdown == SHUTDOWN_MASK) { 1438 if (vsock_stream_has_data(vsk) <= 0 && !sock_flag(sk, SOCK_DONE)) { 1439 (void)virtio_transport_reset(vsk, NULL); 1440 virtio_transport_do_close(vsk, true); 1441 } 1442 /* Remove this socket anyway because the remote peer sent 1443 * the shutdown. This way a new connection will succeed 1444 * if the remote peer uses the same source port, 1445 * even if the old socket is still unreleased, but now disconnected. 1446 */ 1447 vsock_remove_sock(vsk); 1448 } 1449 if (le32_to_cpu(virtio_vsock_hdr(skb)->flags)) 1450 sk->sk_state_change(sk); 1451 break; 1452 case VIRTIO_VSOCK_OP_RST: 1453 virtio_transport_do_close(vsk, true); 1454 break; 1455 default: 1456 err = -EINVAL; 1457 break; 1458 } 1459 1460 kfree_skb(skb); 1461 return err; 1462 } 1463 1464 static void 1465 virtio_transport_recv_disconnecting(struct sock *sk, 1466 struct sk_buff *skb) 1467 { 1468 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); 1469 struct vsock_sock *vsk = vsock_sk(sk); 1470 1471 if (le16_to_cpu(hdr->op) == VIRTIO_VSOCK_OP_RST) 1472 virtio_transport_do_close(vsk, true); 1473 } 1474 1475 static int 1476 virtio_transport_send_response(struct vsock_sock *vsk, 1477 struct sk_buff *skb) 1478 { 1479 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); 1480 struct virtio_vsock_pkt_info info = { 1481 .op = VIRTIO_VSOCK_OP_RESPONSE, 1482 .remote_cid = le64_to_cpu(hdr->src_cid), 1483 .remote_port = le32_to_cpu(hdr->src_port), 1484 .reply = true, 1485 .vsk = vsk, 1486 .net = sock_net(sk_vsock(vsk)), 1487 }; 1488 1489 return virtio_transport_send_pkt_info(vsk, &info); 1490 } 1491 1492 static bool virtio_transport_space_update(struct sock *sk, 1493 struct sk_buff *skb) 1494 { 1495 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); 1496 struct vsock_sock *vsk = vsock_sk(sk); 1497 struct virtio_vsock_sock *vvs = vsk->trans; 1498 bool space_available; 1499 1500 /* Listener sockets are not associated with any transport, so we are 1501 * not able to take the state to see if there is space available in the 1502 * remote peer, but since they are only used to receive requests, we 1503 * can assume that there is always space available in the other peer. 1504 */ 1505 if (!vvs) 1506 return true; 1507 1508 /* buf_alloc and fwd_cnt is always included in the hdr */ 1509 spin_lock_bh(&vvs->tx_lock); 1510 vvs->peer_buf_alloc = le32_to_cpu(hdr->buf_alloc); 1511 vvs->peer_fwd_cnt = le32_to_cpu(hdr->fwd_cnt); 1512 space_available = virtio_transport_has_space(vvs); 1513 spin_unlock_bh(&vvs->tx_lock); 1514 return space_available; 1515 } 1516 1517 /* Handle server socket */ 1518 static int 1519 virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb, 1520 struct virtio_transport *t) 1521 { 1522 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); 1523 struct vsock_sock *vsk = vsock_sk(sk); 1524 struct vsock_sock *vchild; 1525 struct sock *child; 1526 int ret; 1527 1528 if (le16_to_cpu(hdr->op) != VIRTIO_VSOCK_OP_REQUEST) { 1529 virtio_transport_reset_no_sock(t, skb, sock_net(sk)); 1530 return -EINVAL; 1531 } 1532 1533 if (sk_acceptq_is_full(sk)) { 1534 virtio_transport_reset_no_sock(t, skb, sock_net(sk)); 1535 return -ENOMEM; 1536 } 1537 1538 /* __vsock_release() might have already flushed accept_queue. 1539 * Subsequent enqueues would lead to a memory leak. 1540 */ 1541 if (sk->sk_shutdown == SHUTDOWN_MASK) { 1542 virtio_transport_reset_no_sock(t, skb, sock_net(sk)); 1543 return -ESHUTDOWN; 1544 } 1545 1546 child = vsock_create_connected(sk); 1547 if (!child) { 1548 virtio_transport_reset_no_sock(t, skb, sock_net(sk)); 1549 return -ENOMEM; 1550 } 1551 1552 lock_sock_nested(child, SINGLE_DEPTH_NESTING); 1553 1554 child->sk_state = TCP_ESTABLISHED; 1555 1556 vchild = vsock_sk(child); 1557 vsock_addr_init(&vchild->local_addr, le64_to_cpu(hdr->dst_cid), 1558 le32_to_cpu(hdr->dst_port)); 1559 vsock_addr_init(&vchild->remote_addr, le64_to_cpu(hdr->src_cid), 1560 le32_to_cpu(hdr->src_port)); 1561 1562 ret = vsock_assign_transport(vchild, vsk); 1563 /* Transport assigned (looking at remote_addr) must be the same 1564 * where we received the request. 1565 */ 1566 if (ret || vchild->transport != &t->transport) { 1567 release_sock(child); 1568 virtio_transport_reset_no_sock(t, skb, sock_net(sk)); 1569 sock_put(child); 1570 return ret; 1571 } 1572 1573 sk_acceptq_added(sk); 1574 if (virtio_transport_space_update(child, skb)) 1575 child->sk_write_space(child); 1576 1577 vsock_insert_connected(vchild); 1578 vsock_enqueue_accept(sk, child); 1579 virtio_transport_send_response(vchild, skb); 1580 1581 release_sock(child); 1582 1583 sk->sk_data_ready(sk); 1584 return 0; 1585 } 1586 1587 static bool virtio_transport_valid_type(u16 type) 1588 { 1589 return (type == VIRTIO_VSOCK_TYPE_STREAM) || 1590 (type == VIRTIO_VSOCK_TYPE_SEQPACKET); 1591 } 1592 1593 /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex 1594 * lock. 1595 */ 1596 void virtio_transport_recv_pkt(struct virtio_transport *t, 1597 struct sk_buff *skb, struct net *net) 1598 { 1599 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); 1600 struct sockaddr_vm src, dst; 1601 struct vsock_sock *vsk; 1602 struct sock *sk; 1603 bool space_available; 1604 1605 vsock_addr_init(&src, le64_to_cpu(hdr->src_cid), 1606 le32_to_cpu(hdr->src_port)); 1607 vsock_addr_init(&dst, le64_to_cpu(hdr->dst_cid), 1608 le32_to_cpu(hdr->dst_port)); 1609 1610 trace_virtio_transport_recv_pkt(src.svm_cid, src.svm_port, 1611 dst.svm_cid, dst.svm_port, 1612 le32_to_cpu(hdr->len), 1613 le16_to_cpu(hdr->type), 1614 le16_to_cpu(hdr->op), 1615 le32_to_cpu(hdr->flags), 1616 le32_to_cpu(hdr->buf_alloc), 1617 le32_to_cpu(hdr->fwd_cnt)); 1618 1619 if (!virtio_transport_valid_type(le16_to_cpu(hdr->type))) { 1620 (void)virtio_transport_reset_no_sock(t, skb, net); 1621 goto free_pkt; 1622 } 1623 1624 /* The socket must be in connected or bound table 1625 * otherwise send reset back 1626 */ 1627 sk = vsock_find_connected_socket_net(&src, &dst, net); 1628 if (!sk) { 1629 sk = vsock_find_bound_socket_net(&dst, net); 1630 if (!sk) { 1631 (void)virtio_transport_reset_no_sock(t, skb, net); 1632 goto free_pkt; 1633 } 1634 } 1635 1636 if (virtio_transport_get_type(sk) != le16_to_cpu(hdr->type)) { 1637 (void)virtio_transport_reset_no_sock(t, skb, net); 1638 sock_put(sk); 1639 goto free_pkt; 1640 } 1641 1642 if (!skb_set_owner_sk_safe(skb, sk)) { 1643 WARN_ONCE(1, "receiving vsock socket has sk_refcnt == 0\n"); 1644 goto free_pkt; 1645 } 1646 1647 vsk = vsock_sk(sk); 1648 1649 lock_sock(sk); 1650 1651 /* Check if sk has been closed or assigned to another transport before 1652 * lock_sock (note: listener sockets are not assigned to any transport) 1653 */ 1654 if (sock_flag(sk, SOCK_DONE) || 1655 (sk->sk_state != TCP_LISTEN && vsk->transport != &t->transport)) { 1656 (void)virtio_transport_reset_no_sock(t, skb, net); 1657 release_sock(sk); 1658 sock_put(sk); 1659 goto free_pkt; 1660 } 1661 1662 space_available = virtio_transport_space_update(sk, skb); 1663 1664 /* Update CID in case it has changed after a transport reset event */ 1665 if (vsk->local_addr.svm_cid != VMADDR_CID_ANY) 1666 vsk->local_addr.svm_cid = dst.svm_cid; 1667 1668 if (space_available) 1669 sk->sk_write_space(sk); 1670 1671 switch (sk->sk_state) { 1672 case TCP_LISTEN: 1673 virtio_transport_recv_listen(sk, skb, t); 1674 kfree_skb(skb); 1675 break; 1676 case TCP_SYN_SENT: 1677 virtio_transport_recv_connecting(sk, skb); 1678 kfree_skb(skb); 1679 break; 1680 case TCP_ESTABLISHED: 1681 virtio_transport_recv_connected(sk, skb); 1682 break; 1683 case TCP_CLOSING: 1684 virtio_transport_recv_disconnecting(sk, skb); 1685 kfree_skb(skb); 1686 break; 1687 default: 1688 (void)virtio_transport_reset_no_sock(t, skb, net); 1689 kfree_skb(skb); 1690 break; 1691 } 1692 1693 release_sock(sk); 1694 1695 /* Release refcnt obtained when we fetched this socket out of the 1696 * bound or connected list. 1697 */ 1698 sock_put(sk); 1699 return; 1700 1701 free_pkt: 1702 kfree_skb(skb); 1703 } 1704 EXPORT_SYMBOL_GPL(virtio_transport_recv_pkt); 1705 1706 /* Remove skbs found in a queue that have a vsk that matches. 1707 * 1708 * Each skb is freed. 1709 * 1710 * Returns the count of skbs that were reply packets. 1711 */ 1712 int virtio_transport_purge_skbs(void *vsk, struct sk_buff_head *queue) 1713 { 1714 struct sk_buff_head freeme; 1715 struct sk_buff *skb, *tmp; 1716 int cnt = 0; 1717 1718 skb_queue_head_init(&freeme); 1719 1720 spin_lock_bh(&queue->lock); 1721 skb_queue_walk_safe(queue, skb, tmp) { 1722 if (vsock_sk(skb->sk) != vsk) 1723 continue; 1724 1725 __skb_unlink(skb, queue); 1726 __skb_queue_tail(&freeme, skb); 1727 1728 if (virtio_vsock_skb_reply(skb)) 1729 cnt++; 1730 } 1731 spin_unlock_bh(&queue->lock); 1732 1733 __skb_queue_purge(&freeme); 1734 1735 return cnt; 1736 } 1737 EXPORT_SYMBOL_GPL(virtio_transport_purge_skbs); 1738 1739 int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_actor) 1740 { 1741 struct virtio_vsock_sock *vvs = vsk->trans; 1742 struct sock *sk = sk_vsock(vsk); 1743 struct virtio_vsock_hdr *hdr; 1744 struct sk_buff *skb; 1745 u32 pkt_len; 1746 int off = 0; 1747 int err; 1748 1749 spin_lock_bh(&vvs->rx_lock); 1750 /* Use __skb_recv_datagram() for race-free handling of the receive. It 1751 * works for types other than dgrams. 1752 */ 1753 skb = __skb_recv_datagram(sk, &vvs->rx_queue, MSG_DONTWAIT, &off, &err); 1754 if (!skb) { 1755 spin_unlock_bh(&vvs->rx_lock); 1756 return err; 1757 } 1758 1759 hdr = virtio_vsock_hdr(skb); 1760 if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM) 1761 vvs->msg_count--; 1762 1763 pkt_len = le32_to_cpu(hdr->len); 1764 virtio_transport_dec_rx_pkt(vvs, pkt_len, pkt_len); 1765 spin_unlock_bh(&vvs->rx_lock); 1766 1767 virtio_transport_send_credit_update(vsk); 1768 1769 return recv_actor(sk, skb); 1770 } 1771 EXPORT_SYMBOL_GPL(virtio_transport_read_skb); 1772 1773 int virtio_transport_notify_set_rcvlowat(struct vsock_sock *vsk, int val) 1774 { 1775 struct virtio_vsock_sock *vvs = vsk->trans; 1776 bool send_update; 1777 1778 spin_lock_bh(&vvs->rx_lock); 1779 1780 /* If number of available bytes is less than new SO_RCVLOWAT value, 1781 * kick sender to send more data, because sender may sleep in its 1782 * 'send()' syscall waiting for enough space at our side. Also 1783 * don't send credit update when peer already knows actual value - 1784 * such transmission will be useless. 1785 */ 1786 send_update = (vvs->rx_bytes < val) && 1787 (vvs->fwd_cnt != vvs->last_fwd_cnt); 1788 1789 spin_unlock_bh(&vvs->rx_lock); 1790 1791 if (send_update) { 1792 int err; 1793 1794 err = virtio_transport_send_credit_update(vsk); 1795 if (err < 0) 1796 return err; 1797 } 1798 1799 return 0; 1800 } 1801 EXPORT_SYMBOL_GPL(virtio_transport_notify_set_rcvlowat); 1802 1803 MODULE_LICENSE("GPL v2"); 1804 MODULE_AUTHOR("Asias He"); 1805 MODULE_DESCRIPTION("common code for virtio vsock"); 1806