1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * common code for virtio vsock 4 * 5 * Copyright (C) 2013-2015 Red Hat, Inc. 6 * Author: Asias He <asias@redhat.com> 7 * Stefan Hajnoczi <stefanha@redhat.com> 8 */ 9 #include <linux/spinlock.h> 10 #include <linux/module.h> 11 #include <linux/sched/signal.h> 12 #include <linux/ctype.h> 13 #include <linux/list.h> 14 #include <linux/virtio_vsock.h> 15 #include <uapi/linux/vsockmon.h> 16 17 #include <net/sock.h> 18 #include <net/af_vsock.h> 19 20 #define CREATE_TRACE_POINTS 21 #include <trace/events/vsock_virtio_transport_common.h> 22 23 /* How long to wait for graceful shutdown of a connection */ 24 #define VSOCK_CLOSE_TIMEOUT (8 * HZ) 25 26 /* Threshold for detecting small packets to copy */ 27 #define GOOD_COPY_LEN 128 28 29 static void virtio_transport_cancel_close_work(struct vsock_sock *vsk, 30 bool cancel_timeout); 31 static s64 virtio_transport_has_space(struct virtio_vsock_sock *vvs); 32 33 static const struct virtio_transport * 34 virtio_transport_get_ops(struct vsock_sock *vsk) 35 { 36 const struct vsock_transport *t = vsock_core_get_transport(vsk); 37 38 if (WARN_ON(!t)) 39 return NULL; 40 41 return container_of(t, struct virtio_transport, transport); 42 } 43 44 static bool virtio_transport_can_zcopy(const struct virtio_transport *t_ops, 45 struct virtio_vsock_pkt_info *info, 46 size_t pkt_len) 47 { 48 struct iov_iter *iov_iter; 49 50 if (!info->msg) 51 return false; 52 53 iov_iter = &info->msg->msg_iter; 54 55 if (iov_iter->iov_offset) 56 return false; 57 58 /* We can't send whole iov. */ 59 if (iov_iter->count > pkt_len) 60 return false; 61 62 /* Check that transport can send data in zerocopy mode. */ 63 if (t_ops->can_msgzerocopy) { 64 int pages_to_send = iov_iter_npages(iov_iter, MAX_SKB_FRAGS); 65 66 /* +1 is for packet header. */ 67 return t_ops->can_msgzerocopy(pages_to_send + 1); 68 } 69 70 return true; 71 } 72 73 static int virtio_transport_init_zcopy_skb(struct vsock_sock *vsk, 74 struct sk_buff *skb, 75 struct msghdr *msg, 76 bool zerocopy) 77 { 78 struct ubuf_info *uarg; 79 80 if (msg->msg_ubuf) { 81 uarg = msg->msg_ubuf; 82 net_zcopy_get(uarg); 83 } else { 84 struct iov_iter *iter = &msg->msg_iter; 85 struct ubuf_info_msgzc *uarg_zc; 86 87 uarg = msg_zerocopy_realloc(sk_vsock(vsk), 88 iter->count, 89 NULL, false); 90 if (!uarg) 91 return -1; 92 93 uarg_zc = uarg_to_msgzc(uarg); 94 uarg_zc->zerocopy = zerocopy ? 1 : 0; 95 } 96 97 skb_zcopy_init(skb, uarg); 98 99 return 0; 100 } 101 102 static int virtio_transport_fill_skb(struct sk_buff *skb, 103 struct virtio_vsock_pkt_info *info, 104 size_t len, 105 bool zcopy) 106 { 107 struct msghdr *msg = info->msg; 108 109 if (zcopy) 110 return __zerocopy_sg_from_iter(msg, NULL, skb, 111 &msg->msg_iter, len, NULL); 112 113 virtio_vsock_skb_put(skb, len); 114 return skb_copy_datagram_from_iter_full(skb, 0, &msg->msg_iter, len); 115 } 116 117 static void virtio_transport_init_hdr(struct sk_buff *skb, 118 struct virtio_vsock_pkt_info *info, 119 size_t payload_len, 120 u32 src_cid, 121 u32 src_port, 122 u32 dst_cid, 123 u32 dst_port) 124 { 125 struct virtio_vsock_hdr *hdr; 126 127 hdr = virtio_vsock_hdr(skb); 128 hdr->type = cpu_to_le16(info->type); 129 hdr->op = cpu_to_le16(info->op); 130 hdr->src_cid = cpu_to_le64(src_cid); 131 hdr->dst_cid = cpu_to_le64(dst_cid); 132 hdr->src_port = cpu_to_le32(src_port); 133 hdr->dst_port = cpu_to_le32(dst_port); 134 hdr->flags = cpu_to_le32(info->flags); 135 hdr->len = cpu_to_le32(payload_len); 136 hdr->buf_alloc = cpu_to_le32(0); 137 hdr->fwd_cnt = cpu_to_le32(0); 138 } 139 140 static void virtio_transport_copy_nonlinear_skb(const struct sk_buff *skb, 141 void *dst, 142 size_t len) 143 { 144 struct iov_iter iov_iter = { 0 }; 145 struct kvec kvec; 146 size_t to_copy; 147 148 kvec.iov_base = dst; 149 kvec.iov_len = len; 150 151 iov_iter.iter_type = ITER_KVEC; 152 iov_iter.kvec = &kvec; 153 iov_iter.nr_segs = 1; 154 155 to_copy = min_t(size_t, len, skb->len); 156 157 skb_copy_datagram_iter(skb, VIRTIO_VSOCK_SKB_CB(skb)->offset, 158 &iov_iter, to_copy); 159 } 160 161 /* Packet capture */ 162 static struct sk_buff *virtio_transport_build_skb(void *opaque) 163 { 164 struct virtio_vsock_hdr *pkt_hdr; 165 struct sk_buff *pkt = opaque; 166 struct af_vsockmon_hdr *hdr; 167 struct sk_buff *skb; 168 size_t payload_len; 169 170 /* A packet could be split to fit the RX buffer, so we can retrieve 171 * the payload length from the header and the buffer pointer taking 172 * care of the offset in the original packet. 173 */ 174 pkt_hdr = virtio_vsock_hdr(pkt); 175 payload_len = pkt->len; 176 177 skb = alloc_skb(sizeof(*hdr) + sizeof(*pkt_hdr) + payload_len, 178 GFP_ATOMIC); 179 if (!skb) 180 return NULL; 181 182 hdr = skb_put(skb, sizeof(*hdr)); 183 184 /* pkt->hdr is little-endian so no need to byteswap here */ 185 hdr->src_cid = pkt_hdr->src_cid; 186 hdr->src_port = pkt_hdr->src_port; 187 hdr->dst_cid = pkt_hdr->dst_cid; 188 hdr->dst_port = pkt_hdr->dst_port; 189 190 hdr->transport = cpu_to_le16(AF_VSOCK_TRANSPORT_VIRTIO); 191 hdr->len = cpu_to_le16(sizeof(*pkt_hdr)); 192 memset(hdr->reserved, 0, sizeof(hdr->reserved)); 193 194 switch (le16_to_cpu(pkt_hdr->op)) { 195 case VIRTIO_VSOCK_OP_REQUEST: 196 case VIRTIO_VSOCK_OP_RESPONSE: 197 hdr->op = cpu_to_le16(AF_VSOCK_OP_CONNECT); 198 break; 199 case VIRTIO_VSOCK_OP_RST: 200 case VIRTIO_VSOCK_OP_SHUTDOWN: 201 hdr->op = cpu_to_le16(AF_VSOCK_OP_DISCONNECT); 202 break; 203 case VIRTIO_VSOCK_OP_RW: 204 hdr->op = cpu_to_le16(AF_VSOCK_OP_PAYLOAD); 205 break; 206 case VIRTIO_VSOCK_OP_CREDIT_UPDATE: 207 case VIRTIO_VSOCK_OP_CREDIT_REQUEST: 208 hdr->op = cpu_to_le16(AF_VSOCK_OP_CONTROL); 209 break; 210 default: 211 hdr->op = cpu_to_le16(AF_VSOCK_OP_UNKNOWN); 212 break; 213 } 214 215 skb_put_data(skb, pkt_hdr, sizeof(*pkt_hdr)); 216 217 if (payload_len) { 218 if (skb_is_nonlinear(pkt)) { 219 void *data = skb_put(skb, payload_len); 220 221 virtio_transport_copy_nonlinear_skb(pkt, data, payload_len); 222 } else { 223 skb_put_data(skb, pkt->data, payload_len); 224 } 225 } 226 227 return skb; 228 } 229 230 void virtio_transport_deliver_tap_pkt(struct sk_buff *skb) 231 { 232 if (virtio_vsock_skb_tap_delivered(skb)) 233 return; 234 235 vsock_deliver_tap(virtio_transport_build_skb, skb); 236 virtio_vsock_skb_set_tap_delivered(skb); 237 } 238 EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt); 239 240 static u16 virtio_transport_get_type(struct sock *sk) 241 { 242 if (sk->sk_type == SOCK_STREAM) 243 return VIRTIO_VSOCK_TYPE_STREAM; 244 else 245 return VIRTIO_VSOCK_TYPE_SEQPACKET; 246 } 247 248 /* Returns new sk_buff on success, otherwise returns NULL. */ 249 static struct sk_buff *virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *info, 250 size_t payload_len, 251 bool zcopy, 252 u32 src_cid, 253 u32 src_port, 254 u32 dst_cid, 255 u32 dst_port) 256 { 257 struct vsock_sock *vsk; 258 struct sk_buff *skb; 259 size_t skb_len; 260 261 skb_len = VIRTIO_VSOCK_SKB_HEADROOM; 262 263 if (!zcopy) 264 skb_len += payload_len; 265 266 skb = virtio_vsock_alloc_skb(skb_len, GFP_KERNEL); 267 if (!skb) 268 return NULL; 269 270 virtio_transport_init_hdr(skb, info, payload_len, src_cid, src_port, 271 dst_cid, dst_port); 272 273 vsk = info->vsk; 274 275 /* If 'vsk' != NULL then payload is always present, so we 276 * will never call '__zerocopy_sg_from_iter()' below without 277 * setting skb owner in 'skb_set_owner_w()'. The only case 278 * when 'vsk' == NULL is VIRTIO_VSOCK_OP_RST control message 279 * without payload. 280 */ 281 WARN_ON_ONCE(!(vsk && (info->msg && payload_len)) && zcopy); 282 283 /* Set owner here, because '__zerocopy_sg_from_iter()' uses 284 * owner of skb without check to update 'sk_wmem_alloc'. 285 */ 286 if (vsk) 287 skb_set_owner_w(skb, sk_vsock(vsk)); 288 289 if (info->msg && payload_len > 0) { 290 int err; 291 292 err = virtio_transport_fill_skb(skb, info, payload_len, zcopy); 293 if (err) 294 goto out; 295 296 if (msg_data_left(info->msg) == 0 && 297 info->type == VIRTIO_VSOCK_TYPE_SEQPACKET) { 298 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); 299 300 hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM); 301 302 if (info->msg->msg_flags & MSG_EOR) 303 hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR); 304 } 305 } 306 307 if (info->reply) 308 virtio_vsock_skb_set_reply(skb); 309 310 trace_virtio_transport_alloc_pkt(src_cid, src_port, 311 dst_cid, dst_port, 312 payload_len, 313 info->type, 314 info->op, 315 info->flags, 316 zcopy); 317 318 return skb; 319 out: 320 kfree_skb(skb); 321 return NULL; 322 } 323 324 /* This function can only be used on connecting/connected sockets, 325 * since a socket assigned to a transport is required. 326 * 327 * Do not use on listener sockets! 328 */ 329 static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, 330 struct virtio_vsock_pkt_info *info) 331 { 332 u32 max_skb_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; 333 u32 src_cid, src_port, dst_cid, dst_port; 334 const struct virtio_transport *t_ops; 335 struct virtio_vsock_sock *vvs; 336 u32 pkt_len = info->pkt_len; 337 bool can_zcopy = false; 338 u32 rest_len; 339 int ret; 340 341 info->type = virtio_transport_get_type(sk_vsock(vsk)); 342 343 t_ops = virtio_transport_get_ops(vsk); 344 if (unlikely(!t_ops)) 345 return -EFAULT; 346 347 src_cid = t_ops->transport.get_local_cid(); 348 src_port = vsk->local_addr.svm_port; 349 if (!info->remote_cid) { 350 dst_cid = vsk->remote_addr.svm_cid; 351 dst_port = vsk->remote_addr.svm_port; 352 } else { 353 dst_cid = info->remote_cid; 354 dst_port = info->remote_port; 355 } 356 357 vvs = vsk->trans; 358 359 /* virtio_transport_get_credit might return less than pkt_len credit */ 360 pkt_len = virtio_transport_get_credit(vvs, pkt_len); 361 362 /* Do not send zero length OP_RW pkt */ 363 if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) 364 return pkt_len; 365 366 if (info->msg) { 367 /* If zerocopy is not enabled by 'setsockopt()', we behave as 368 * there is no MSG_ZEROCOPY flag set. 369 */ 370 if (!sock_flag(sk_vsock(vsk), SOCK_ZEROCOPY)) 371 info->msg->msg_flags &= ~MSG_ZEROCOPY; 372 373 if (info->msg->msg_flags & MSG_ZEROCOPY) 374 can_zcopy = virtio_transport_can_zcopy(t_ops, info, pkt_len); 375 376 if (can_zcopy) 377 max_skb_len = min_t(u32, VIRTIO_VSOCK_MAX_PKT_BUF_SIZE, 378 (MAX_SKB_FRAGS * PAGE_SIZE)); 379 } 380 381 rest_len = pkt_len; 382 383 do { 384 struct sk_buff *skb; 385 size_t skb_len; 386 387 skb_len = min(max_skb_len, rest_len); 388 389 skb = virtio_transport_alloc_skb(info, skb_len, can_zcopy, 390 src_cid, src_port, 391 dst_cid, dst_port); 392 if (!skb) { 393 ret = -ENOMEM; 394 break; 395 } 396 397 /* We process buffer part by part, allocating skb on 398 * each iteration. If this is last skb for this buffer 399 * and MSG_ZEROCOPY mode is in use - we must allocate 400 * completion for the current syscall. 401 */ 402 if (info->msg && info->msg->msg_flags & MSG_ZEROCOPY && 403 skb_len == rest_len && info->op == VIRTIO_VSOCK_OP_RW) { 404 if (virtio_transport_init_zcopy_skb(vsk, skb, 405 info->msg, 406 can_zcopy)) { 407 kfree_skb(skb); 408 ret = -ENOMEM; 409 break; 410 } 411 } 412 413 virtio_transport_inc_tx_pkt(vvs, skb); 414 415 ret = t_ops->send_pkt(skb, info->net); 416 if (ret < 0) 417 break; 418 419 /* Both virtio and vhost 'send_pkt()' returns 'skb_len', 420 * but for reliability use 'ret' instead of 'skb_len'. 421 * Also if partial send happens (e.g. 'ret' != 'skb_len') 422 * somehow, we break this loop, but account such returned 423 * value in 'virtio_transport_put_credit()'. 424 */ 425 rest_len -= ret; 426 427 if (WARN_ONCE(ret != skb_len, 428 "'send_pkt()' returns %i, but %zu expected\n", 429 ret, skb_len)) 430 break; 431 } while (rest_len); 432 433 virtio_transport_put_credit(vvs, rest_len); 434 435 /* Return number of bytes, if any data has been sent. */ 436 if (rest_len != pkt_len) 437 ret = pkt_len - rest_len; 438 439 return ret; 440 } 441 442 static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs, 443 u32 len) 444 { 445 if (vvs->buf_used + len > vvs->buf_alloc) 446 return false; 447 448 vvs->rx_bytes += len; 449 vvs->buf_used += len; 450 return true; 451 } 452 453 static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs, 454 u32 bytes_read, u32 bytes_dequeued) 455 { 456 vvs->rx_bytes -= bytes_read; 457 vvs->buf_used -= bytes_dequeued; 458 vvs->fwd_cnt += bytes_dequeued; 459 } 460 461 void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct sk_buff *skb) 462 { 463 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); 464 465 spin_lock_bh(&vvs->rx_lock); 466 vvs->last_fwd_cnt = vvs->fwd_cnt; 467 hdr->fwd_cnt = cpu_to_le32(vvs->fwd_cnt); 468 hdr->buf_alloc = cpu_to_le32(vvs->buf_alloc); 469 spin_unlock_bh(&vvs->rx_lock); 470 } 471 EXPORT_SYMBOL_GPL(virtio_transport_inc_tx_pkt); 472 473 void virtio_transport_consume_skb_sent(struct sk_buff *skb, bool consume) 474 { 475 struct sock *s = skb->sk; 476 477 if (s && skb->len) { 478 struct vsock_sock *vs = vsock_sk(s); 479 struct virtio_vsock_sock *vvs; 480 481 vvs = vs->trans; 482 483 spin_lock_bh(&vvs->tx_lock); 484 vvs->bytes_unsent -= skb->len; 485 spin_unlock_bh(&vvs->tx_lock); 486 } 487 488 if (consume) 489 consume_skb(skb); 490 } 491 EXPORT_SYMBOL_GPL(virtio_transport_consume_skb_sent); 492 493 u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 credit) 494 { 495 u32 ret; 496 497 if (!credit) 498 return 0; 499 500 spin_lock_bh(&vvs->tx_lock); 501 ret = min_t(u32, credit, virtio_transport_has_space(vvs)); 502 vvs->tx_cnt += ret; 503 vvs->bytes_unsent += ret; 504 spin_unlock_bh(&vvs->tx_lock); 505 506 return ret; 507 } 508 EXPORT_SYMBOL_GPL(virtio_transport_get_credit); 509 510 void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit) 511 { 512 if (!credit) 513 return; 514 515 spin_lock_bh(&vvs->tx_lock); 516 vvs->tx_cnt -= credit; 517 vvs->bytes_unsent -= credit; 518 spin_unlock_bh(&vvs->tx_lock); 519 } 520 EXPORT_SYMBOL_GPL(virtio_transport_put_credit); 521 522 static int virtio_transport_send_credit_update(struct vsock_sock *vsk) 523 { 524 struct virtio_vsock_pkt_info info = { 525 .op = VIRTIO_VSOCK_OP_CREDIT_UPDATE, 526 .vsk = vsk, 527 .net = sock_net(sk_vsock(vsk)), 528 }; 529 530 return virtio_transport_send_pkt_info(vsk, &info); 531 } 532 533 static ssize_t 534 virtio_transport_stream_do_peek(struct vsock_sock *vsk, 535 struct msghdr *msg, 536 size_t len) 537 { 538 struct virtio_vsock_sock *vvs = vsk->trans; 539 struct sk_buff *skb; 540 size_t total = 0; 541 int err; 542 543 spin_lock_bh(&vvs->rx_lock); 544 545 skb_queue_walk(&vvs->rx_queue, skb) { 546 size_t bytes; 547 548 bytes = len - total; 549 if (bytes > skb->len) 550 bytes = skb->len; 551 552 spin_unlock_bh(&vvs->rx_lock); 553 554 /* sk_lock is held by caller so no one else can dequeue. 555 * Unlock rx_lock since skb_copy_datagram_iter() may sleep. 556 */ 557 err = skb_copy_datagram_iter(skb, VIRTIO_VSOCK_SKB_CB(skb)->offset, 558 &msg->msg_iter, bytes); 559 if (err) 560 goto out; 561 562 total += bytes; 563 564 spin_lock_bh(&vvs->rx_lock); 565 566 if (total == len) 567 break; 568 } 569 570 spin_unlock_bh(&vvs->rx_lock); 571 572 return total; 573 574 out: 575 if (total) 576 err = total; 577 return err; 578 } 579 580 static ssize_t 581 virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, 582 struct msghdr *msg, 583 size_t len) 584 { 585 struct virtio_vsock_sock *vvs = vsk->trans; 586 struct sk_buff *skb; 587 u32 fwd_cnt_delta; 588 bool low_rx_bytes; 589 int err = -EFAULT; 590 size_t total = 0; 591 u32 free_space; 592 593 spin_lock_bh(&vvs->rx_lock); 594 595 if (WARN_ONCE(skb_queue_empty(&vvs->rx_queue) && vvs->rx_bytes, 596 "rx_queue is empty, but rx_bytes is non-zero\n")) { 597 spin_unlock_bh(&vvs->rx_lock); 598 return err; 599 } 600 601 while (total < len && !skb_queue_empty(&vvs->rx_queue)) { 602 size_t bytes, dequeued = 0; 603 604 skb = skb_peek(&vvs->rx_queue); 605 606 bytes = min_t(size_t, len - total, 607 skb->len - VIRTIO_VSOCK_SKB_CB(skb)->offset); 608 609 /* sk_lock is held by caller so no one else can dequeue. 610 * Unlock rx_lock since skb_copy_datagram_iter() may sleep. 611 */ 612 spin_unlock_bh(&vvs->rx_lock); 613 614 err = skb_copy_datagram_iter(skb, 615 VIRTIO_VSOCK_SKB_CB(skb)->offset, 616 &msg->msg_iter, bytes); 617 if (err) 618 goto out; 619 620 spin_lock_bh(&vvs->rx_lock); 621 622 total += bytes; 623 624 VIRTIO_VSOCK_SKB_CB(skb)->offset += bytes; 625 626 if (skb->len == VIRTIO_VSOCK_SKB_CB(skb)->offset) { 627 dequeued = le32_to_cpu(virtio_vsock_hdr(skb)->len); 628 __skb_unlink(skb, &vvs->rx_queue); 629 consume_skb(skb); 630 } 631 632 virtio_transport_dec_rx_pkt(vvs, bytes, dequeued); 633 } 634 635 fwd_cnt_delta = vvs->fwd_cnt - vvs->last_fwd_cnt; 636 free_space = vvs->buf_alloc - fwd_cnt_delta; 637 low_rx_bytes = (vvs->rx_bytes < 638 sock_rcvlowat(sk_vsock(vsk), 0, INT_MAX)); 639 640 spin_unlock_bh(&vvs->rx_lock); 641 642 /* To reduce the number of credit update messages, 643 * don't update credits as long as lots of space is available. 644 * Note: the limit chosen here is arbitrary. Setting the limit 645 * too high causes extra messages. Too low causes transmitter 646 * stalls. As stalls are in theory more expensive than extra 647 * messages, we set the limit to a high value. TODO: experiment 648 * with different values. Also send credit update message when 649 * number of bytes in rx queue is not enough to wake up reader. 650 */ 651 if (fwd_cnt_delta && 652 (free_space < VIRTIO_VSOCK_MAX_PKT_BUF_SIZE || low_rx_bytes)) 653 virtio_transport_send_credit_update(vsk); 654 655 return total; 656 657 out: 658 if (total) 659 err = total; 660 return err; 661 } 662 663 static ssize_t 664 virtio_transport_seqpacket_do_peek(struct vsock_sock *vsk, 665 struct msghdr *msg) 666 { 667 struct virtio_vsock_sock *vvs = vsk->trans; 668 struct sk_buff *skb; 669 size_t total, len; 670 671 spin_lock_bh(&vvs->rx_lock); 672 673 if (!vvs->msg_count) { 674 spin_unlock_bh(&vvs->rx_lock); 675 return 0; 676 } 677 678 total = 0; 679 len = msg_data_left(msg); 680 681 skb_queue_walk(&vvs->rx_queue, skb) { 682 struct virtio_vsock_hdr *hdr; 683 684 if (total < len) { 685 size_t bytes; 686 int err; 687 688 bytes = len - total; 689 if (bytes > skb->len) 690 bytes = skb->len; 691 692 spin_unlock_bh(&vvs->rx_lock); 693 694 /* sk_lock is held by caller so no one else can dequeue. 695 * Unlock rx_lock since skb_copy_datagram_iter() may sleep. 696 */ 697 err = skb_copy_datagram_iter(skb, VIRTIO_VSOCK_SKB_CB(skb)->offset, 698 &msg->msg_iter, bytes); 699 if (err) 700 return err; 701 702 spin_lock_bh(&vvs->rx_lock); 703 } 704 705 total += skb->len; 706 hdr = virtio_vsock_hdr(skb); 707 708 if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM) { 709 if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOR) 710 msg->msg_flags |= MSG_EOR; 711 712 break; 713 } 714 } 715 716 spin_unlock_bh(&vvs->rx_lock); 717 718 return total; 719 } 720 721 static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk, 722 struct msghdr *msg, 723 int flags) 724 { 725 struct virtio_vsock_sock *vvs = vsk->trans; 726 int dequeued_len = 0; 727 size_t user_buf_len = msg_data_left(msg); 728 bool msg_ready = false; 729 struct sk_buff *skb; 730 731 spin_lock_bh(&vvs->rx_lock); 732 733 if (vvs->msg_count == 0) { 734 spin_unlock_bh(&vvs->rx_lock); 735 return 0; 736 } 737 738 while (!msg_ready) { 739 struct virtio_vsock_hdr *hdr; 740 size_t pkt_len; 741 742 skb = __skb_dequeue(&vvs->rx_queue); 743 if (!skb) 744 break; 745 hdr = virtio_vsock_hdr(skb); 746 pkt_len = (size_t)le32_to_cpu(hdr->len); 747 748 if (dequeued_len >= 0) { 749 size_t bytes_to_copy; 750 751 bytes_to_copy = min(user_buf_len, pkt_len); 752 753 if (bytes_to_copy) { 754 int err; 755 756 /* sk_lock is held by caller so no one else can dequeue. 757 * Unlock rx_lock since skb_copy_datagram_iter() may sleep. 758 */ 759 spin_unlock_bh(&vvs->rx_lock); 760 761 err = skb_copy_datagram_iter(skb, 0, 762 &msg->msg_iter, 763 bytes_to_copy); 764 if (err) { 765 /* Copy of message failed. Rest of 766 * fragments will be freed without copy. 767 */ 768 dequeued_len = err; 769 } else { 770 user_buf_len -= bytes_to_copy; 771 } 772 773 spin_lock_bh(&vvs->rx_lock); 774 } 775 776 if (dequeued_len >= 0) 777 dequeued_len += pkt_len; 778 } 779 780 if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM) { 781 msg_ready = true; 782 vvs->msg_count--; 783 784 if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOR) 785 msg->msg_flags |= MSG_EOR; 786 } 787 788 virtio_transport_dec_rx_pkt(vvs, pkt_len, pkt_len); 789 kfree_skb(skb); 790 } 791 792 spin_unlock_bh(&vvs->rx_lock); 793 794 virtio_transport_send_credit_update(vsk); 795 796 return dequeued_len; 797 } 798 799 ssize_t 800 virtio_transport_stream_dequeue(struct vsock_sock *vsk, 801 struct msghdr *msg, 802 size_t len, int flags) 803 { 804 if (flags & MSG_PEEK) 805 return virtio_transport_stream_do_peek(vsk, msg, len); 806 else 807 return virtio_transport_stream_do_dequeue(vsk, msg, len); 808 } 809 EXPORT_SYMBOL_GPL(virtio_transport_stream_dequeue); 810 811 ssize_t 812 virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk, 813 struct msghdr *msg, 814 int flags) 815 { 816 if (flags & MSG_PEEK) 817 return virtio_transport_seqpacket_do_peek(vsk, msg); 818 else 819 return virtio_transport_seqpacket_do_dequeue(vsk, msg, flags); 820 } 821 EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue); 822 823 static u32 virtio_transport_tx_buf_size(struct virtio_vsock_sock *vvs) 824 { 825 /* The peer advertises its receive buffer via peer_buf_alloc, but we 826 * cap it to our local buf_alloc so a remote peer cannot force us to 827 * queue more data than our own buffer configuration allows. 828 */ 829 return min(vvs->peer_buf_alloc, vvs->buf_alloc); 830 } 831 832 int 833 virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk, 834 struct msghdr *msg, 835 size_t len) 836 { 837 struct virtio_vsock_sock *vvs = vsk->trans; 838 839 spin_lock_bh(&vvs->tx_lock); 840 841 if (len > virtio_transport_tx_buf_size(vvs)) { 842 spin_unlock_bh(&vvs->tx_lock); 843 return -EMSGSIZE; 844 } 845 846 spin_unlock_bh(&vvs->tx_lock); 847 848 return virtio_transport_stream_enqueue(vsk, msg, len); 849 } 850 EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_enqueue); 851 852 int 853 virtio_transport_dgram_dequeue(struct vsock_sock *vsk, 854 struct msghdr *msg, 855 size_t len, int flags) 856 { 857 return -EOPNOTSUPP; 858 } 859 EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue); 860 861 s64 virtio_transport_stream_has_data(struct vsock_sock *vsk) 862 { 863 struct virtio_vsock_sock *vvs = vsk->trans; 864 s64 bytes; 865 866 spin_lock_bh(&vvs->rx_lock); 867 bytes = vvs->rx_bytes; 868 spin_unlock_bh(&vvs->rx_lock); 869 870 return bytes; 871 } 872 EXPORT_SYMBOL_GPL(virtio_transport_stream_has_data); 873 874 u32 virtio_transport_seqpacket_has_data(struct vsock_sock *vsk) 875 { 876 struct virtio_vsock_sock *vvs = vsk->trans; 877 u32 msg_count; 878 879 spin_lock_bh(&vvs->rx_lock); 880 msg_count = vvs->msg_count; 881 spin_unlock_bh(&vvs->rx_lock); 882 883 return msg_count; 884 } 885 EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_has_data); 886 887 static s64 virtio_transport_has_space(struct virtio_vsock_sock *vvs) 888 { 889 s64 bytes; 890 891 /* Use s64 arithmetic so if the peer shrinks peer_buf_alloc while 892 * we have bytes in flight (tx_cnt - peer_fwd_cnt), the subtraction 893 * does not underflow. 894 */ 895 bytes = (s64)virtio_transport_tx_buf_size(vvs) - 896 (vvs->tx_cnt - vvs->peer_fwd_cnt); 897 if (bytes < 0) 898 bytes = 0; 899 900 return bytes; 901 } 902 903 s64 virtio_transport_stream_has_space(struct vsock_sock *vsk) 904 { 905 struct virtio_vsock_sock *vvs = vsk->trans; 906 s64 bytes; 907 908 spin_lock_bh(&vvs->tx_lock); 909 bytes = virtio_transport_has_space(vvs); 910 spin_unlock_bh(&vvs->tx_lock); 911 912 return bytes; 913 } 914 EXPORT_SYMBOL_GPL(virtio_transport_stream_has_space); 915 916 int virtio_transport_do_socket_init(struct vsock_sock *vsk, 917 struct vsock_sock *psk) 918 { 919 struct virtio_vsock_sock *vvs; 920 921 vvs = kzalloc_obj(*vvs); 922 if (!vvs) 923 return -ENOMEM; 924 925 vsk->trans = vvs; 926 vvs->vsk = vsk; 927 if (psk && psk->trans) { 928 struct virtio_vsock_sock *ptrans = psk->trans; 929 930 vvs->peer_buf_alloc = ptrans->peer_buf_alloc; 931 } 932 933 if (vsk->buffer_size > VIRTIO_VSOCK_MAX_BUF_SIZE) 934 vsk->buffer_size = VIRTIO_VSOCK_MAX_BUF_SIZE; 935 936 vvs->buf_alloc = vsk->buffer_size; 937 938 spin_lock_init(&vvs->rx_lock); 939 spin_lock_init(&vvs->tx_lock); 940 skb_queue_head_init(&vvs->rx_queue); 941 942 return 0; 943 } 944 EXPORT_SYMBOL_GPL(virtio_transport_do_socket_init); 945 946 /* sk_lock held by the caller */ 947 void virtio_transport_notify_buffer_size(struct vsock_sock *vsk, u64 *val) 948 { 949 struct virtio_vsock_sock *vvs = vsk->trans; 950 951 if (*val > VIRTIO_VSOCK_MAX_BUF_SIZE) 952 *val = VIRTIO_VSOCK_MAX_BUF_SIZE; 953 954 vvs->buf_alloc = *val; 955 956 virtio_transport_send_credit_update(vsk); 957 } 958 EXPORT_SYMBOL_GPL(virtio_transport_notify_buffer_size); 959 960 int 961 virtio_transport_notify_poll_in(struct vsock_sock *vsk, 962 size_t target, 963 bool *data_ready_now) 964 { 965 *data_ready_now = vsock_stream_has_data(vsk) >= target; 966 967 return 0; 968 } 969 EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_in); 970 971 int 972 virtio_transport_notify_poll_out(struct vsock_sock *vsk, 973 size_t target, 974 bool *space_avail_now) 975 { 976 s64 free_space; 977 978 free_space = vsock_stream_has_space(vsk); 979 if (free_space > 0) 980 *space_avail_now = true; 981 else if (free_space == 0) 982 *space_avail_now = false; 983 984 return 0; 985 } 986 EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_out); 987 988 int virtio_transport_notify_recv_init(struct vsock_sock *vsk, 989 size_t target, struct vsock_transport_recv_notify_data *data) 990 { 991 return 0; 992 } 993 EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_init); 994 995 int virtio_transport_notify_recv_pre_block(struct vsock_sock *vsk, 996 size_t target, struct vsock_transport_recv_notify_data *data) 997 { 998 return 0; 999 } 1000 EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_block); 1001 1002 int virtio_transport_notify_recv_pre_dequeue(struct vsock_sock *vsk, 1003 size_t target, struct vsock_transport_recv_notify_data *data) 1004 { 1005 return 0; 1006 } 1007 EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_dequeue); 1008 1009 int virtio_transport_notify_recv_post_dequeue(struct vsock_sock *vsk, 1010 size_t target, ssize_t copied, bool data_read, 1011 struct vsock_transport_recv_notify_data *data) 1012 { 1013 return 0; 1014 } 1015 EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_post_dequeue); 1016 1017 int virtio_transport_notify_send_init(struct vsock_sock *vsk, 1018 struct vsock_transport_send_notify_data *data) 1019 { 1020 return 0; 1021 } 1022 EXPORT_SYMBOL_GPL(virtio_transport_notify_send_init); 1023 1024 int virtio_transport_notify_send_pre_block(struct vsock_sock *vsk, 1025 struct vsock_transport_send_notify_data *data) 1026 { 1027 return 0; 1028 } 1029 EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_block); 1030 1031 int virtio_transport_notify_send_pre_enqueue(struct vsock_sock *vsk, 1032 struct vsock_transport_send_notify_data *data) 1033 { 1034 return 0; 1035 } 1036 EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_enqueue); 1037 1038 int virtio_transport_notify_send_post_enqueue(struct vsock_sock *vsk, 1039 ssize_t written, struct vsock_transport_send_notify_data *data) 1040 { 1041 return 0; 1042 } 1043 EXPORT_SYMBOL_GPL(virtio_transport_notify_send_post_enqueue); 1044 1045 u64 virtio_transport_stream_rcvhiwat(struct vsock_sock *vsk) 1046 { 1047 return vsk->buffer_size; 1048 } 1049 EXPORT_SYMBOL_GPL(virtio_transport_stream_rcvhiwat); 1050 1051 bool virtio_transport_stream_is_active(struct vsock_sock *vsk) 1052 { 1053 return true; 1054 } 1055 EXPORT_SYMBOL_GPL(virtio_transport_stream_is_active); 1056 1057 int virtio_transport_dgram_bind(struct vsock_sock *vsk, 1058 struct sockaddr_vm *addr) 1059 { 1060 return -EOPNOTSUPP; 1061 } 1062 EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind); 1063 1064 bool virtio_transport_dgram_allow(struct vsock_sock *vsk, u32 cid, u32 port) 1065 { 1066 return false; 1067 } 1068 EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow); 1069 1070 int virtio_transport_connect(struct vsock_sock *vsk) 1071 { 1072 struct virtio_vsock_pkt_info info = { 1073 .op = VIRTIO_VSOCK_OP_REQUEST, 1074 .vsk = vsk, 1075 .net = sock_net(sk_vsock(vsk)), 1076 }; 1077 1078 return virtio_transport_send_pkt_info(vsk, &info); 1079 } 1080 EXPORT_SYMBOL_GPL(virtio_transport_connect); 1081 1082 int virtio_transport_shutdown(struct vsock_sock *vsk, int mode) 1083 { 1084 struct virtio_vsock_pkt_info info = { 1085 .op = VIRTIO_VSOCK_OP_SHUTDOWN, 1086 .flags = (mode & RCV_SHUTDOWN ? 1087 VIRTIO_VSOCK_SHUTDOWN_RCV : 0) | 1088 (mode & SEND_SHUTDOWN ? 1089 VIRTIO_VSOCK_SHUTDOWN_SEND : 0), 1090 .vsk = vsk, 1091 .net = sock_net(sk_vsock(vsk)), 1092 }; 1093 1094 return virtio_transport_send_pkt_info(vsk, &info); 1095 } 1096 EXPORT_SYMBOL_GPL(virtio_transport_shutdown); 1097 1098 int 1099 virtio_transport_dgram_enqueue(struct vsock_sock *vsk, 1100 struct sockaddr_vm *remote_addr, 1101 struct msghdr *msg, 1102 size_t dgram_len) 1103 { 1104 return -EOPNOTSUPP; 1105 } 1106 EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue); 1107 1108 ssize_t 1109 virtio_transport_stream_enqueue(struct vsock_sock *vsk, 1110 struct msghdr *msg, 1111 size_t len) 1112 { 1113 struct virtio_vsock_pkt_info info = { 1114 .op = VIRTIO_VSOCK_OP_RW, 1115 .msg = msg, 1116 .pkt_len = len, 1117 .vsk = vsk, 1118 .net = sock_net(sk_vsock(vsk)), 1119 }; 1120 1121 return virtio_transport_send_pkt_info(vsk, &info); 1122 } 1123 EXPORT_SYMBOL_GPL(virtio_transport_stream_enqueue); 1124 1125 void virtio_transport_destruct(struct vsock_sock *vsk) 1126 { 1127 struct virtio_vsock_sock *vvs = vsk->trans; 1128 1129 virtio_transport_cancel_close_work(vsk, true); 1130 1131 kfree(vvs); 1132 vsk->trans = NULL; 1133 } 1134 EXPORT_SYMBOL_GPL(virtio_transport_destruct); 1135 1136 ssize_t virtio_transport_unsent_bytes(struct vsock_sock *vsk) 1137 { 1138 struct virtio_vsock_sock *vvs = vsk->trans; 1139 size_t ret; 1140 1141 spin_lock_bh(&vvs->tx_lock); 1142 ret = vvs->bytes_unsent; 1143 spin_unlock_bh(&vvs->tx_lock); 1144 1145 return ret; 1146 } 1147 EXPORT_SYMBOL_GPL(virtio_transport_unsent_bytes); 1148 1149 static int virtio_transport_reset(struct vsock_sock *vsk, 1150 struct sk_buff *skb) 1151 { 1152 struct virtio_vsock_pkt_info info = { 1153 .op = VIRTIO_VSOCK_OP_RST, 1154 .reply = !!skb, 1155 .vsk = vsk, 1156 .net = sock_net(sk_vsock(vsk)), 1157 }; 1158 1159 /* Send RST only if the original pkt is not a RST pkt */ 1160 if (skb && le16_to_cpu(virtio_vsock_hdr(skb)->op) == VIRTIO_VSOCK_OP_RST) 1161 return 0; 1162 1163 return virtio_transport_send_pkt_info(vsk, &info); 1164 } 1165 1166 /* Normally packets are associated with a socket. There may be no socket if an 1167 * attempt was made to connect to a socket that does not exist. 1168 * 1169 * net refers to the namespace of whoever sent the invalid message. For 1170 * loopback, this is the namespace of the socket. For vhost, this is the 1171 * namespace of the VM (i.e., vhost_vsock). 1172 */ 1173 static int virtio_transport_reset_no_sock(const struct virtio_transport *t, 1174 struct sk_buff *skb, struct net *net) 1175 { 1176 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); 1177 struct virtio_vsock_pkt_info info = { 1178 .op = VIRTIO_VSOCK_OP_RST, 1179 .type = le16_to_cpu(hdr->type), 1180 .reply = true, 1181 1182 /* Set sk owner to socket we are replying to (may be NULL for 1183 * non-loopback). This keeps a reference to the sock and 1184 * sock_net(sk) until the reply skb is freed. 1185 */ 1186 .vsk = vsock_sk(skb->sk), 1187 1188 /* net is not defined here because we pass it directly to 1189 * t->send_pkt(), instead of relying on 1190 * virtio_transport_send_pkt_info() to pass it. It is not needed 1191 * by virtio_transport_alloc_skb(). 1192 */ 1193 }; 1194 struct sk_buff *reply; 1195 1196 /* Send RST only if the original pkt is not a RST pkt */ 1197 if (le16_to_cpu(hdr->op) == VIRTIO_VSOCK_OP_RST) 1198 return 0; 1199 1200 if (!t) 1201 return -ENOTCONN; 1202 1203 reply = virtio_transport_alloc_skb(&info, 0, false, 1204 le64_to_cpu(hdr->dst_cid), 1205 le32_to_cpu(hdr->dst_port), 1206 le64_to_cpu(hdr->src_cid), 1207 le32_to_cpu(hdr->src_port)); 1208 if (!reply) 1209 return -ENOMEM; 1210 1211 return t->send_pkt(reply, net); 1212 } 1213 1214 /* This function should be called with sk_lock held and SOCK_DONE set */ 1215 static void virtio_transport_remove_sock(struct vsock_sock *vsk) 1216 { 1217 struct virtio_vsock_sock *vvs = vsk->trans; 1218 1219 /* We don't need to take rx_lock, as the socket is closing and we are 1220 * removing it. 1221 */ 1222 __skb_queue_purge(&vvs->rx_queue); 1223 vsock_remove_sock(vsk); 1224 } 1225 1226 static void virtio_transport_cancel_close_work(struct vsock_sock *vsk, 1227 bool cancel_timeout) 1228 { 1229 struct sock *sk = sk_vsock(vsk); 1230 1231 if (vsk->close_work_scheduled && 1232 (!cancel_timeout || cancel_delayed_work(&vsk->close_work))) { 1233 vsk->close_work_scheduled = false; 1234 1235 virtio_transport_remove_sock(vsk); 1236 1237 /* Release refcnt obtained when we scheduled the timeout */ 1238 sock_put(sk); 1239 } 1240 } 1241 1242 static void virtio_transport_do_close(struct vsock_sock *vsk, 1243 bool cancel_timeout) 1244 { 1245 struct sock *sk = sk_vsock(vsk); 1246 1247 sock_set_flag(sk, SOCK_DONE); 1248 vsk->peer_shutdown = SHUTDOWN_MASK; 1249 if (vsock_stream_has_data(vsk) <= 0) 1250 sk->sk_state = TCP_CLOSING; 1251 sk->sk_state_change(sk); 1252 1253 virtio_transport_cancel_close_work(vsk, cancel_timeout); 1254 } 1255 1256 static void virtio_transport_close_timeout(struct work_struct *work) 1257 { 1258 struct vsock_sock *vsk = 1259 container_of(work, struct vsock_sock, close_work.work); 1260 struct sock *sk = sk_vsock(vsk); 1261 1262 sock_hold(sk); 1263 lock_sock(sk); 1264 1265 if (!sock_flag(sk, SOCK_DONE)) { 1266 (void)virtio_transport_reset(vsk, NULL); 1267 1268 virtio_transport_do_close(vsk, false); 1269 } 1270 1271 vsk->close_work_scheduled = false; 1272 1273 release_sock(sk); 1274 sock_put(sk); 1275 } 1276 1277 /* User context, vsk->sk is locked */ 1278 static bool virtio_transport_close(struct vsock_sock *vsk) 1279 { 1280 struct sock *sk = &vsk->sk; 1281 1282 if (!(sk->sk_state == TCP_ESTABLISHED || 1283 sk->sk_state == TCP_CLOSING)) 1284 return true; 1285 1286 /* Already received SHUTDOWN from peer, reply with RST */ 1287 if ((vsk->peer_shutdown & SHUTDOWN_MASK) == SHUTDOWN_MASK) { 1288 (void)virtio_transport_reset(vsk, NULL); 1289 return true; 1290 } 1291 1292 if ((sk->sk_shutdown & SHUTDOWN_MASK) != SHUTDOWN_MASK) 1293 (void)virtio_transport_shutdown(vsk, SHUTDOWN_MASK); 1294 1295 if (!(current->flags & PF_EXITING)) 1296 vsock_linger(sk); 1297 1298 if (sock_flag(sk, SOCK_DONE)) { 1299 return true; 1300 } 1301 1302 sock_hold(sk); 1303 INIT_DELAYED_WORK(&vsk->close_work, 1304 virtio_transport_close_timeout); 1305 vsk->close_work_scheduled = true; 1306 schedule_delayed_work(&vsk->close_work, VSOCK_CLOSE_TIMEOUT); 1307 return false; 1308 } 1309 1310 void virtio_transport_release(struct vsock_sock *vsk) 1311 { 1312 struct sock *sk = &vsk->sk; 1313 bool remove_sock = true; 1314 1315 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) 1316 remove_sock = virtio_transport_close(vsk); 1317 1318 if (remove_sock) { 1319 sock_set_flag(sk, SOCK_DONE); 1320 virtio_transport_remove_sock(vsk); 1321 } 1322 } 1323 EXPORT_SYMBOL_GPL(virtio_transport_release); 1324 1325 static int 1326 virtio_transport_recv_connecting(struct sock *sk, 1327 struct sk_buff *skb) 1328 { 1329 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); 1330 struct vsock_sock *vsk = vsock_sk(sk); 1331 int skerr; 1332 int err; 1333 1334 switch (le16_to_cpu(hdr->op)) { 1335 case VIRTIO_VSOCK_OP_RESPONSE: 1336 sk->sk_state = TCP_ESTABLISHED; 1337 sk->sk_socket->state = SS_CONNECTED; 1338 vsock_insert_connected(vsk); 1339 sk->sk_state_change(sk); 1340 break; 1341 case VIRTIO_VSOCK_OP_INVALID: 1342 break; 1343 case VIRTIO_VSOCK_OP_RST: 1344 skerr = ECONNRESET; 1345 err = 0; 1346 goto destroy; 1347 default: 1348 skerr = EPROTO; 1349 err = -EINVAL; 1350 goto destroy; 1351 } 1352 return 0; 1353 1354 destroy: 1355 virtio_transport_reset(vsk, skb); 1356 sk->sk_state = TCP_CLOSE; 1357 sk->sk_err = skerr; 1358 sk_error_report(sk); 1359 return err; 1360 } 1361 1362 static void 1363 virtio_transport_recv_enqueue(struct vsock_sock *vsk, 1364 struct sk_buff *skb) 1365 { 1366 struct virtio_vsock_sock *vvs = vsk->trans; 1367 bool can_enqueue, free_pkt = false; 1368 struct virtio_vsock_hdr *hdr; 1369 u32 len; 1370 1371 hdr = virtio_vsock_hdr(skb); 1372 len = le32_to_cpu(hdr->len); 1373 1374 spin_lock_bh(&vvs->rx_lock); 1375 1376 can_enqueue = virtio_transport_inc_rx_pkt(vvs, len); 1377 if (!can_enqueue) { 1378 free_pkt = true; 1379 goto out; 1380 } 1381 1382 if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM) 1383 vvs->msg_count++; 1384 1385 /* Try to copy small packets into the buffer of last packet queued, 1386 * to avoid wasting memory queueing the entire buffer with a small 1387 * payload. Skip non-linear (e.g. zerocopy) skbs; these carry payload 1388 * in skb_shinfo. 1389 */ 1390 if (len <= GOOD_COPY_LEN && !skb_queue_empty(&vvs->rx_queue) && 1391 !skb_is_nonlinear(skb)) { 1392 struct virtio_vsock_hdr *last_hdr; 1393 struct sk_buff *last_skb; 1394 1395 last_skb = skb_peek_tail(&vvs->rx_queue); 1396 last_hdr = virtio_vsock_hdr(last_skb); 1397 1398 /* If there is space in the last packet queued, we copy the 1399 * new packet in its buffer. We avoid this if the last packet 1400 * queued has VIRTIO_VSOCK_SEQ_EOM set, because this is 1401 * delimiter of SEQPACKET message, so 'pkt' is the first packet 1402 * of a new message. 1403 */ 1404 if (skb->len < skb_tailroom(last_skb) && 1405 !(le32_to_cpu(last_hdr->flags) & VIRTIO_VSOCK_SEQ_EOM)) { 1406 memcpy(skb_put(last_skb, skb->len), skb->data, skb->len); 1407 free_pkt = true; 1408 last_hdr->flags |= hdr->flags; 1409 le32_add_cpu(&last_hdr->len, len); 1410 goto out; 1411 } 1412 } 1413 1414 __skb_queue_tail(&vvs->rx_queue, skb); 1415 1416 out: 1417 spin_unlock_bh(&vvs->rx_lock); 1418 if (free_pkt) 1419 kfree_skb(skb); 1420 } 1421 1422 static int 1423 virtio_transport_recv_connected(struct sock *sk, 1424 struct sk_buff *skb) 1425 { 1426 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); 1427 struct vsock_sock *vsk = vsock_sk(sk); 1428 int err = 0; 1429 1430 switch (le16_to_cpu(hdr->op)) { 1431 case VIRTIO_VSOCK_OP_RW: 1432 virtio_transport_recv_enqueue(vsk, skb); 1433 vsock_data_ready(sk); 1434 return err; 1435 case VIRTIO_VSOCK_OP_CREDIT_REQUEST: 1436 virtio_transport_send_credit_update(vsk); 1437 break; 1438 case VIRTIO_VSOCK_OP_CREDIT_UPDATE: 1439 sk->sk_write_space(sk); 1440 break; 1441 case VIRTIO_VSOCK_OP_SHUTDOWN: 1442 if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SHUTDOWN_RCV) 1443 vsk->peer_shutdown |= RCV_SHUTDOWN; 1444 if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SHUTDOWN_SEND) 1445 vsk->peer_shutdown |= SEND_SHUTDOWN; 1446 if (vsk->peer_shutdown == SHUTDOWN_MASK) { 1447 if (vsock_stream_has_data(vsk) <= 0 && !sock_flag(sk, SOCK_DONE)) { 1448 (void)virtio_transport_reset(vsk, NULL); 1449 virtio_transport_do_close(vsk, true); 1450 } 1451 /* Remove this socket anyway because the remote peer sent 1452 * the shutdown. This way a new connection will succeed 1453 * if the remote peer uses the same source port, 1454 * even if the old socket is still unreleased, but now disconnected. 1455 */ 1456 vsock_remove_sock(vsk); 1457 } 1458 if (le32_to_cpu(virtio_vsock_hdr(skb)->flags)) 1459 sk->sk_state_change(sk); 1460 break; 1461 case VIRTIO_VSOCK_OP_RST: 1462 virtio_transport_do_close(vsk, true); 1463 break; 1464 default: 1465 err = -EINVAL; 1466 break; 1467 } 1468 1469 kfree_skb(skb); 1470 return err; 1471 } 1472 1473 static void 1474 virtio_transport_recv_disconnecting(struct sock *sk, 1475 struct sk_buff *skb) 1476 { 1477 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); 1478 struct vsock_sock *vsk = vsock_sk(sk); 1479 1480 if (le16_to_cpu(hdr->op) == VIRTIO_VSOCK_OP_RST) 1481 virtio_transport_do_close(vsk, true); 1482 } 1483 1484 static int 1485 virtio_transport_send_response(struct vsock_sock *vsk, 1486 struct sk_buff *skb) 1487 { 1488 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); 1489 struct virtio_vsock_pkt_info info = { 1490 .op = VIRTIO_VSOCK_OP_RESPONSE, 1491 .remote_cid = le64_to_cpu(hdr->src_cid), 1492 .remote_port = le32_to_cpu(hdr->src_port), 1493 .reply = true, 1494 .vsk = vsk, 1495 .net = sock_net(sk_vsock(vsk)), 1496 }; 1497 1498 return virtio_transport_send_pkt_info(vsk, &info); 1499 } 1500 1501 static bool virtio_transport_space_update(struct sock *sk, 1502 struct sk_buff *skb) 1503 { 1504 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); 1505 struct vsock_sock *vsk = vsock_sk(sk); 1506 struct virtio_vsock_sock *vvs = vsk->trans; 1507 bool space_available; 1508 1509 /* Listener sockets are not associated with any transport, so we are 1510 * not able to take the state to see if there is space available in the 1511 * remote peer, but since they are only used to receive requests, we 1512 * can assume that there is always space available in the other peer. 1513 */ 1514 if (!vvs) 1515 return true; 1516 1517 /* buf_alloc and fwd_cnt is always included in the hdr */ 1518 spin_lock_bh(&vvs->tx_lock); 1519 vvs->peer_buf_alloc = le32_to_cpu(hdr->buf_alloc); 1520 vvs->peer_fwd_cnt = le32_to_cpu(hdr->fwd_cnt); 1521 space_available = virtio_transport_has_space(vvs); 1522 spin_unlock_bh(&vvs->tx_lock); 1523 return space_available; 1524 } 1525 1526 /* Handle server socket */ 1527 static int 1528 virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb, 1529 struct virtio_transport *t) 1530 { 1531 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); 1532 struct vsock_sock *vsk = vsock_sk(sk); 1533 struct vsock_sock *vchild; 1534 struct sock *child; 1535 int ret; 1536 1537 if (le16_to_cpu(hdr->op) != VIRTIO_VSOCK_OP_REQUEST) { 1538 virtio_transport_reset_no_sock(t, skb, sock_net(sk)); 1539 return -EINVAL; 1540 } 1541 1542 if (sk_acceptq_is_full(sk)) { 1543 virtio_transport_reset_no_sock(t, skb, sock_net(sk)); 1544 return -ENOMEM; 1545 } 1546 1547 /* __vsock_release() might have already flushed accept_queue. 1548 * Subsequent enqueues would lead to a memory leak. 1549 */ 1550 if (sk->sk_shutdown == SHUTDOWN_MASK) { 1551 virtio_transport_reset_no_sock(t, skb, sock_net(sk)); 1552 return -ESHUTDOWN; 1553 } 1554 1555 child = vsock_create_connected(sk); 1556 if (!child) { 1557 virtio_transport_reset_no_sock(t, skb, sock_net(sk)); 1558 return -ENOMEM; 1559 } 1560 1561 sk_acceptq_added(sk); 1562 1563 lock_sock_nested(child, SINGLE_DEPTH_NESTING); 1564 1565 child->sk_state = TCP_ESTABLISHED; 1566 1567 vchild = vsock_sk(child); 1568 vsock_addr_init(&vchild->local_addr, le64_to_cpu(hdr->dst_cid), 1569 le32_to_cpu(hdr->dst_port)); 1570 vsock_addr_init(&vchild->remote_addr, le64_to_cpu(hdr->src_cid), 1571 le32_to_cpu(hdr->src_port)); 1572 1573 ret = vsock_assign_transport(vchild, vsk); 1574 /* Transport assigned (looking at remote_addr) must be the same 1575 * where we received the request. 1576 */ 1577 if (ret || vchild->transport != &t->transport) { 1578 release_sock(child); 1579 virtio_transport_reset_no_sock(t, skb, sock_net(sk)); 1580 sock_put(child); 1581 return ret; 1582 } 1583 1584 if (virtio_transport_space_update(child, skb)) 1585 child->sk_write_space(child); 1586 1587 vsock_insert_connected(vchild); 1588 vsock_enqueue_accept(sk, child); 1589 virtio_transport_send_response(vchild, skb); 1590 1591 release_sock(child); 1592 1593 sk->sk_data_ready(sk); 1594 return 0; 1595 } 1596 1597 static bool virtio_transport_valid_type(u16 type) 1598 { 1599 return (type == VIRTIO_VSOCK_TYPE_STREAM) || 1600 (type == VIRTIO_VSOCK_TYPE_SEQPACKET); 1601 } 1602 1603 /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex 1604 * lock. 1605 */ 1606 void virtio_transport_recv_pkt(struct virtio_transport *t, 1607 struct sk_buff *skb, struct net *net) 1608 { 1609 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); 1610 struct sockaddr_vm src, dst; 1611 struct vsock_sock *vsk; 1612 struct sock *sk; 1613 bool space_available; 1614 1615 vsock_addr_init(&src, le64_to_cpu(hdr->src_cid), 1616 le32_to_cpu(hdr->src_port)); 1617 vsock_addr_init(&dst, le64_to_cpu(hdr->dst_cid), 1618 le32_to_cpu(hdr->dst_port)); 1619 1620 trace_virtio_transport_recv_pkt(src.svm_cid, src.svm_port, 1621 dst.svm_cid, dst.svm_port, 1622 le32_to_cpu(hdr->len), 1623 le16_to_cpu(hdr->type), 1624 le16_to_cpu(hdr->op), 1625 le32_to_cpu(hdr->flags), 1626 le32_to_cpu(hdr->buf_alloc), 1627 le32_to_cpu(hdr->fwd_cnt)); 1628 1629 if (!virtio_transport_valid_type(le16_to_cpu(hdr->type))) { 1630 (void)virtio_transport_reset_no_sock(t, skb, net); 1631 goto free_pkt; 1632 } 1633 1634 /* The socket must be in connected or bound table 1635 * otherwise send reset back 1636 */ 1637 sk = vsock_find_connected_socket_net(&src, &dst, net); 1638 if (!sk) { 1639 sk = vsock_find_bound_socket_net(&dst, net); 1640 if (!sk) { 1641 (void)virtio_transport_reset_no_sock(t, skb, net); 1642 goto free_pkt; 1643 } 1644 } 1645 1646 if (virtio_transport_get_type(sk) != le16_to_cpu(hdr->type)) { 1647 (void)virtio_transport_reset_no_sock(t, skb, net); 1648 sock_put(sk); 1649 goto free_pkt; 1650 } 1651 1652 if (!skb_set_owner_sk_safe(skb, sk)) { 1653 WARN_ONCE(1, "receiving vsock socket has sk_refcnt == 0\n"); 1654 goto free_pkt; 1655 } 1656 1657 vsk = vsock_sk(sk); 1658 1659 lock_sock(sk); 1660 1661 /* Check if sk has been closed or assigned to another transport before 1662 * lock_sock (note: listener sockets are not assigned to any transport) 1663 */ 1664 if (sock_flag(sk, SOCK_DONE) || 1665 (sk->sk_state != TCP_LISTEN && vsk->transport != &t->transport)) { 1666 (void)virtio_transport_reset_no_sock(t, skb, net); 1667 release_sock(sk); 1668 sock_put(sk); 1669 goto free_pkt; 1670 } 1671 1672 space_available = virtio_transport_space_update(sk, skb); 1673 1674 /* Update CID in case it has changed after a transport reset event */ 1675 if (vsk->local_addr.svm_cid != VMADDR_CID_ANY) 1676 vsk->local_addr.svm_cid = dst.svm_cid; 1677 1678 if (space_available) 1679 sk->sk_write_space(sk); 1680 1681 switch (sk->sk_state) { 1682 case TCP_LISTEN: 1683 virtio_transport_recv_listen(sk, skb, t); 1684 kfree_skb(skb); 1685 break; 1686 case TCP_SYN_SENT: 1687 virtio_transport_recv_connecting(sk, skb); 1688 kfree_skb(skb); 1689 break; 1690 case TCP_ESTABLISHED: 1691 virtio_transport_recv_connected(sk, skb); 1692 break; 1693 case TCP_CLOSING: 1694 virtio_transport_recv_disconnecting(sk, skb); 1695 kfree_skb(skb); 1696 break; 1697 default: 1698 (void)virtio_transport_reset_no_sock(t, skb, net); 1699 kfree_skb(skb); 1700 break; 1701 } 1702 1703 release_sock(sk); 1704 1705 /* Release refcnt obtained when we fetched this socket out of the 1706 * bound or connected list. 1707 */ 1708 sock_put(sk); 1709 return; 1710 1711 free_pkt: 1712 kfree_skb(skb); 1713 } 1714 EXPORT_SYMBOL_GPL(virtio_transport_recv_pkt); 1715 1716 /* Remove skbs found in a queue that have a vsk that matches. 1717 * 1718 * Each skb is freed. 1719 * 1720 * Returns the count of skbs that were reply packets. 1721 */ 1722 int virtio_transport_purge_skbs(void *vsk, struct sk_buff_head *queue) 1723 { 1724 struct sk_buff_head freeme; 1725 struct sk_buff *skb, *tmp; 1726 int cnt = 0; 1727 1728 skb_queue_head_init(&freeme); 1729 1730 spin_lock_bh(&queue->lock); 1731 skb_queue_walk_safe(queue, skb, tmp) { 1732 if (vsock_sk(skb->sk) != vsk) 1733 continue; 1734 1735 __skb_unlink(skb, queue); 1736 __skb_queue_tail(&freeme, skb); 1737 1738 if (virtio_vsock_skb_reply(skb)) 1739 cnt++; 1740 } 1741 spin_unlock_bh(&queue->lock); 1742 1743 __skb_queue_purge(&freeme); 1744 1745 return cnt; 1746 } 1747 EXPORT_SYMBOL_GPL(virtio_transport_purge_skbs); 1748 1749 int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_actor) 1750 { 1751 struct virtio_vsock_sock *vvs = vsk->trans; 1752 struct sock *sk = sk_vsock(vsk); 1753 struct virtio_vsock_hdr *hdr; 1754 struct sk_buff *skb; 1755 u32 pkt_len; 1756 int off = 0; 1757 int err; 1758 1759 spin_lock_bh(&vvs->rx_lock); 1760 /* Use __skb_recv_datagram() for race-free handling of the receive. It 1761 * works for types other than dgrams. 1762 */ 1763 skb = __skb_recv_datagram(sk, &vvs->rx_queue, MSG_DONTWAIT, &off, &err); 1764 if (!skb) { 1765 spin_unlock_bh(&vvs->rx_lock); 1766 return err; 1767 } 1768 1769 hdr = virtio_vsock_hdr(skb); 1770 if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM) 1771 vvs->msg_count--; 1772 1773 pkt_len = le32_to_cpu(hdr->len); 1774 virtio_transport_dec_rx_pkt(vvs, pkt_len, pkt_len); 1775 spin_unlock_bh(&vvs->rx_lock); 1776 1777 virtio_transport_send_credit_update(vsk); 1778 1779 return recv_actor(sk, skb); 1780 } 1781 EXPORT_SYMBOL_GPL(virtio_transport_read_skb); 1782 1783 int virtio_transport_notify_set_rcvlowat(struct vsock_sock *vsk, int val) 1784 { 1785 struct virtio_vsock_sock *vvs = vsk->trans; 1786 bool send_update; 1787 1788 spin_lock_bh(&vvs->rx_lock); 1789 1790 /* If number of available bytes is less than new SO_RCVLOWAT value, 1791 * kick sender to send more data, because sender may sleep in its 1792 * 'send()' syscall waiting for enough space at our side. Also 1793 * don't send credit update when peer already knows actual value - 1794 * such transmission will be useless. 1795 */ 1796 send_update = (vvs->rx_bytes < val) && 1797 (vvs->fwd_cnt != vvs->last_fwd_cnt); 1798 1799 spin_unlock_bh(&vvs->rx_lock); 1800 1801 if (send_update) { 1802 int err; 1803 1804 err = virtio_transport_send_credit_update(vsk); 1805 if (err < 0) 1806 return err; 1807 } 1808 1809 return 0; 1810 } 1811 EXPORT_SYMBOL_GPL(virtio_transport_notify_set_rcvlowat); 1812 1813 MODULE_LICENSE("GPL v2"); 1814 MODULE_AUTHOR("Asias He"); 1815 MODULE_DESCRIPTION("common code for virtio vsock"); 1816