1685a6bf8SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only 2d021c344SAndy King /* 3d021c344SAndy King * VMware vSockets Driver 4d021c344SAndy King * 5d021c344SAndy King * Copyright (C) 2009-2013 VMware, Inc. All rights reserved. 6d021c344SAndy King */ 7d021c344SAndy King 8d021c344SAndy King #include <linux/types.h> 9d021c344SAndy King #include <linux/socket.h> 10d021c344SAndy King #include <linux/stddef.h> 11d021c344SAndy King #include <net/sock.h> 12d021c344SAndy King 13d021c344SAndy King #include "vmci_transport_notify.h" 14d021c344SAndy King 15d021c344SAndy King #define PKT_FIELD(vsk, field_name) (vmci_trans(vsk)->notify.pkt.field_name) 16d021c344SAndy King 17d021c344SAndy King static bool vmci_transport_notify_waiting_write(struct vsock_sock *vsk) 18d021c344SAndy King { 19d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) 20d021c344SAndy King bool retval; 21d021c344SAndy King u64 notify_limit; 22d021c344SAndy King 23d021c344SAndy King if (!PKT_FIELD(vsk, peer_waiting_write)) 24d021c344SAndy King return false; 25d021c344SAndy King 26d021c344SAndy King #ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL 27d021c344SAndy King /* When the sender blocks, we take that as a sign that the sender is 28d021c344SAndy King * faster than the receiver. To reduce the transmit rate of the sender, 29d021c344SAndy King * we delay the sending of the read notification by decreasing the 30d021c344SAndy King * write_notify_window. The notification is delayed until the number of 31d021c344SAndy King * bytes used in the queue drops below the write_notify_window. 32d021c344SAndy King */ 33d021c344SAndy King 34d021c344SAndy King if (!PKT_FIELD(vsk, peer_waiting_write_detected)) { 35d021c344SAndy King PKT_FIELD(vsk, peer_waiting_write_detected) = true; 36d021c344SAndy King if (PKT_FIELD(vsk, write_notify_window) < PAGE_SIZE) { 37d021c344SAndy King PKT_FIELD(vsk, write_notify_window) = 38d021c344SAndy King PKT_FIELD(vsk, write_notify_min_window); 39d021c344SAndy King } else { 40d021c344SAndy King PKT_FIELD(vsk, write_notify_window) -= PAGE_SIZE; 41d021c344SAndy King if (PKT_FIELD(vsk, write_notify_window) < 42d021c344SAndy King PKT_FIELD(vsk, write_notify_min_window)) 43d021c344SAndy King PKT_FIELD(vsk, write_notify_window) = 44d021c344SAndy King PKT_FIELD(vsk, write_notify_min_window); 45d021c344SAndy King 46d021c344SAndy King } 47d021c344SAndy King } 48d021c344SAndy King notify_limit = vmci_trans(vsk)->consume_size - 49d021c344SAndy King PKT_FIELD(vsk, write_notify_window); 50d021c344SAndy King #else 51d021c344SAndy King notify_limit = 0; 52d021c344SAndy King #endif 53d021c344SAndy King 54d021c344SAndy King /* For now we ignore the wait information and just see if the free 55d021c344SAndy King * space exceeds the notify limit. Note that improving this function 56d021c344SAndy King * to be more intelligent will not require a protocol change and will 57d021c344SAndy King * retain compatibility between endpoints with mixed versions of this 58d021c344SAndy King * function. 59d021c344SAndy King * 60d021c344SAndy King * The notify_limit is used to delay notifications in the case where 61d021c344SAndy King * flow control is enabled. Below the test is expressed in terms of 62d021c344SAndy King * free space in the queue: if free_space > ConsumeSize - 63d021c344SAndy King * write_notify_window then notify An alternate way of expressing this 64d021c344SAndy King * is to rewrite the expression to use the data ready in the receive 65d021c344SAndy King * queue: if write_notify_window > bufferReady then notify as 66d021c344SAndy King * free_space == ConsumeSize - bufferReady. 67d021c344SAndy King */ 68d021c344SAndy King retval = vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair) > 69d021c344SAndy King notify_limit; 70d021c344SAndy King #ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL 71d021c344SAndy King if (retval) { 72d021c344SAndy King /* 73d021c344SAndy King * Once we notify the peer, we reset the detected flag so the 74d021c344SAndy King * next wait will again cause a decrease in the window size. 75d021c344SAndy King */ 76d021c344SAndy King 77d021c344SAndy King PKT_FIELD(vsk, peer_waiting_write_detected) = false; 78d021c344SAndy King } 79d021c344SAndy King #endif 80d021c344SAndy King return retval; 81d021c344SAndy King #else 82d021c344SAndy King return true; 83d021c344SAndy King #endif 84d021c344SAndy King } 85d021c344SAndy King 86d021c344SAndy King static bool vmci_transport_notify_waiting_read(struct vsock_sock *vsk) 87d021c344SAndy King { 88d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) 89d021c344SAndy King if (!PKT_FIELD(vsk, peer_waiting_read)) 90d021c344SAndy King return false; 91d021c344SAndy King 92d021c344SAndy King /* For now we ignore the wait information and just see if there is any 93d021c344SAndy King * data for our peer to read. Note that improving this function to be 94d021c344SAndy King * more intelligent will not require a protocol change and will retain 95d021c344SAndy King * compatibility between endpoints with mixed versions of this 96d021c344SAndy King * function. 97d021c344SAndy King */ 98d021c344SAndy King return vmci_qpair_produce_buf_ready(vmci_trans(vsk)->qpair) > 0; 99d021c344SAndy King #else 100d021c344SAndy King return true; 101d021c344SAndy King #endif 102d021c344SAndy King } 103d021c344SAndy King 104d021c344SAndy King static void 105d021c344SAndy King vmci_transport_handle_waiting_read(struct sock *sk, 106d021c344SAndy King struct vmci_transport_packet *pkt, 107d021c344SAndy King bool bottom_half, 108d021c344SAndy King struct sockaddr_vm *dst, 109d021c344SAndy King struct sockaddr_vm *src) 110d021c344SAndy King { 111d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) 112d021c344SAndy King struct vsock_sock *vsk; 113d021c344SAndy King 114d021c344SAndy King vsk = vsock_sk(sk); 115d021c344SAndy King 116d021c344SAndy King PKT_FIELD(vsk, peer_waiting_read) = true; 117d021c344SAndy King memcpy(&PKT_FIELD(vsk, peer_waiting_read_info), &pkt->u.wait, 118d021c344SAndy King sizeof(PKT_FIELD(vsk, peer_waiting_read_info))); 119d021c344SAndy King 120d021c344SAndy King if (vmci_transport_notify_waiting_read(vsk)) { 121d021c344SAndy King bool sent; 122d021c344SAndy King 123d021c344SAndy King if (bottom_half) 124d021c344SAndy King sent = vmci_transport_send_wrote_bh(dst, src) > 0; 125d021c344SAndy King else 126d021c344SAndy King sent = vmci_transport_send_wrote(sk) > 0; 127d021c344SAndy King 128d021c344SAndy King if (sent) 129d021c344SAndy King PKT_FIELD(vsk, peer_waiting_read) = false; 130d021c344SAndy King } 131d021c344SAndy King #endif 132d021c344SAndy King } 133d021c344SAndy King 134d021c344SAndy King static void 135d021c344SAndy King vmci_transport_handle_waiting_write(struct sock *sk, 136d021c344SAndy King struct vmci_transport_packet *pkt, 137d021c344SAndy King bool bottom_half, 138d021c344SAndy King struct sockaddr_vm *dst, 139d021c344SAndy King struct sockaddr_vm *src) 140d021c344SAndy King { 141d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) 142d021c344SAndy King struct vsock_sock *vsk; 143d021c344SAndy King 144d021c344SAndy King vsk = vsock_sk(sk); 145d021c344SAndy King 146d021c344SAndy King PKT_FIELD(vsk, peer_waiting_write) = true; 147d021c344SAndy King memcpy(&PKT_FIELD(vsk, peer_waiting_write_info), &pkt->u.wait, 148d021c344SAndy King sizeof(PKT_FIELD(vsk, peer_waiting_write_info))); 149d021c344SAndy King 150d021c344SAndy King if (vmci_transport_notify_waiting_write(vsk)) { 151d021c344SAndy King bool sent; 152d021c344SAndy King 153d021c344SAndy King if (bottom_half) 154d021c344SAndy King sent = vmci_transport_send_read_bh(dst, src) > 0; 155d021c344SAndy King else 156d021c344SAndy King sent = vmci_transport_send_read(sk) > 0; 157d021c344SAndy King 158d021c344SAndy King if (sent) 159d021c344SAndy King PKT_FIELD(vsk, peer_waiting_write) = false; 160d021c344SAndy King } 161d021c344SAndy King #endif 162d021c344SAndy King } 163d021c344SAndy King 164d021c344SAndy King static void 165d021c344SAndy King vmci_transport_handle_read(struct sock *sk, 166d021c344SAndy King struct vmci_transport_packet *pkt, 167d021c344SAndy King bool bottom_half, 168d021c344SAndy King struct sockaddr_vm *dst, struct sockaddr_vm *src) 169d021c344SAndy King { 170d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) 171d021c344SAndy King struct vsock_sock *vsk; 172d021c344SAndy King 173d021c344SAndy King vsk = vsock_sk(sk); 174d021c344SAndy King PKT_FIELD(vsk, sent_waiting_write) = false; 175d021c344SAndy King #endif 176d021c344SAndy King 177d021c344SAndy King sk->sk_write_space(sk); 178d021c344SAndy King } 179d021c344SAndy King 180d021c344SAndy King static bool send_waiting_read(struct sock *sk, u64 room_needed) 181d021c344SAndy King { 182d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) 183d021c344SAndy King struct vsock_sock *vsk; 184d021c344SAndy King struct vmci_transport_waiting_info waiting_info; 185d021c344SAndy King u64 tail; 186d021c344SAndy King u64 head; 187d021c344SAndy King u64 room_left; 188d021c344SAndy King bool ret; 189d021c344SAndy King 190d021c344SAndy King vsk = vsock_sk(sk); 191d021c344SAndy King 192d021c344SAndy King if (PKT_FIELD(vsk, sent_waiting_read)) 193d021c344SAndy King return true; 194d021c344SAndy King 195d021c344SAndy King if (PKT_FIELD(vsk, write_notify_window) < 196d021c344SAndy King vmci_trans(vsk)->consume_size) 197d021c344SAndy King PKT_FIELD(vsk, write_notify_window) = 198d021c344SAndy King min(PKT_FIELD(vsk, write_notify_window) + PAGE_SIZE, 199d021c344SAndy King vmci_trans(vsk)->consume_size); 200d021c344SAndy King 201d021c344SAndy King vmci_qpair_get_consume_indexes(vmci_trans(vsk)->qpair, &tail, &head); 202d021c344SAndy King room_left = vmci_trans(vsk)->consume_size - head; 203d021c344SAndy King if (room_needed >= room_left) { 204d021c344SAndy King waiting_info.offset = room_needed - room_left; 205d021c344SAndy King waiting_info.generation = 206d021c344SAndy King PKT_FIELD(vsk, consume_q_generation) + 1; 207d021c344SAndy King } else { 208d021c344SAndy King waiting_info.offset = head + room_needed; 209d021c344SAndy King waiting_info.generation = PKT_FIELD(vsk, consume_q_generation); 210d021c344SAndy King } 211d021c344SAndy King 212d021c344SAndy King ret = vmci_transport_send_waiting_read(sk, &waiting_info) > 0; 213d021c344SAndy King if (ret) 214d021c344SAndy King PKT_FIELD(vsk, sent_waiting_read) = true; 215d021c344SAndy King 216d021c344SAndy King return ret; 217d021c344SAndy King #else 218d021c344SAndy King return true; 219d021c344SAndy King #endif 220d021c344SAndy King } 221d021c344SAndy King 222d021c344SAndy King static bool send_waiting_write(struct sock *sk, u64 room_needed) 223d021c344SAndy King { 224d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) 225d021c344SAndy King struct vsock_sock *vsk; 226d021c344SAndy King struct vmci_transport_waiting_info waiting_info; 227d021c344SAndy King u64 tail; 228d021c344SAndy King u64 head; 229d021c344SAndy King u64 room_left; 230d021c344SAndy King bool ret; 231d021c344SAndy King 232d021c344SAndy King vsk = vsock_sk(sk); 233d021c344SAndy King 234d021c344SAndy King if (PKT_FIELD(vsk, sent_waiting_write)) 235d021c344SAndy King return true; 236d021c344SAndy King 237d021c344SAndy King vmci_qpair_get_produce_indexes(vmci_trans(vsk)->qpair, &tail, &head); 238d021c344SAndy King room_left = vmci_trans(vsk)->produce_size - tail; 239d021c344SAndy King if (room_needed + 1 >= room_left) { 240d021c344SAndy King /* Wraps around to current generation. */ 241d021c344SAndy King waiting_info.offset = room_needed + 1 - room_left; 242d021c344SAndy King waiting_info.generation = PKT_FIELD(vsk, produce_q_generation); 243d021c344SAndy King } else { 244d021c344SAndy King waiting_info.offset = tail + room_needed + 1; 245d021c344SAndy King waiting_info.generation = 246d021c344SAndy King PKT_FIELD(vsk, produce_q_generation) - 1; 247d021c344SAndy King } 248d021c344SAndy King 249d021c344SAndy King ret = vmci_transport_send_waiting_write(sk, &waiting_info) > 0; 250d021c344SAndy King if (ret) 251d021c344SAndy King PKT_FIELD(vsk, sent_waiting_write) = true; 252d021c344SAndy King 253d021c344SAndy King return ret; 254d021c344SAndy King #else 255d021c344SAndy King return true; 256d021c344SAndy King #endif 257d021c344SAndy King } 258d021c344SAndy King 259d021c344SAndy King static int vmci_transport_send_read_notification(struct sock *sk) 260d021c344SAndy King { 261d021c344SAndy King struct vsock_sock *vsk; 262d021c344SAndy King bool sent_read; 263d021c344SAndy King unsigned int retries; 264d021c344SAndy King int err; 265d021c344SAndy King 266d021c344SAndy King vsk = vsock_sk(sk); 267d021c344SAndy King sent_read = false; 268d021c344SAndy King retries = 0; 269d021c344SAndy King err = 0; 270d021c344SAndy King 271d021c344SAndy King if (vmci_transport_notify_waiting_write(vsk)) { 272d021c344SAndy King /* Notify the peer that we have read, retrying the send on 273d021c344SAndy King * failure up to our maximum value. XXX For now we just log 274d021c344SAndy King * the failure, but later we should schedule a work item to 275d021c344SAndy King * handle the resend until it succeeds. That would require 276d021c344SAndy King * keeping track of work items in the vsk and cleaning them up 277d021c344SAndy King * upon socket close. 278d021c344SAndy King */ 279d021c344SAndy King while (!(vsk->peer_shutdown & RCV_SHUTDOWN) && 280d021c344SAndy King !sent_read && 281d021c344SAndy King retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) { 282d021c344SAndy King err = vmci_transport_send_read(sk); 283d021c344SAndy King if (err >= 0) 284d021c344SAndy King sent_read = true; 285d021c344SAndy King 286d021c344SAndy King retries++; 287d021c344SAndy King } 288d021c344SAndy King 289d021c344SAndy King if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS) 290d021c344SAndy King pr_err("%p unable to send read notify to peer\n", sk); 291d021c344SAndy King else 292d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) 293d021c344SAndy King PKT_FIELD(vsk, peer_waiting_write) = false; 294d021c344SAndy King #endif 295d021c344SAndy King 296d021c344SAndy King } 297d021c344SAndy King return err; 298d021c344SAndy King } 299d021c344SAndy King 300d021c344SAndy King static void 301d021c344SAndy King vmci_transport_handle_wrote(struct sock *sk, 302d021c344SAndy King struct vmci_transport_packet *pkt, 303d021c344SAndy King bool bottom_half, 304d021c344SAndy King struct sockaddr_vm *dst, struct sockaddr_vm *src) 305d021c344SAndy King { 306d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) 307d021c344SAndy King struct vsock_sock *vsk = vsock_sk(sk); 308d021c344SAndy King PKT_FIELD(vsk, sent_waiting_read) = false; 309d021c344SAndy King #endif 310676d2369SDavid S. Miller sk->sk_data_ready(sk); 311d021c344SAndy King } 312d021c344SAndy King 313d021c344SAndy King static void vmci_transport_notify_pkt_socket_init(struct sock *sk) 314d021c344SAndy King { 315d021c344SAndy King struct vsock_sock *vsk = vsock_sk(sk); 316d021c344SAndy King 317d021c344SAndy King PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE; 318d021c344SAndy King PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE; 319d021c344SAndy King PKT_FIELD(vsk, peer_waiting_read) = false; 320d021c344SAndy King PKT_FIELD(vsk, peer_waiting_write) = false; 321d021c344SAndy King PKT_FIELD(vsk, peer_waiting_write_detected) = false; 322d021c344SAndy King PKT_FIELD(vsk, sent_waiting_read) = false; 323d021c344SAndy King PKT_FIELD(vsk, sent_waiting_write) = false; 324d021c344SAndy King PKT_FIELD(vsk, produce_q_generation) = 0; 325d021c344SAndy King PKT_FIELD(vsk, consume_q_generation) = 0; 326d021c344SAndy King 327d021c344SAndy King memset(&PKT_FIELD(vsk, peer_waiting_read_info), 0, 328d021c344SAndy King sizeof(PKT_FIELD(vsk, peer_waiting_read_info))); 329d021c344SAndy King memset(&PKT_FIELD(vsk, peer_waiting_write_info), 0, 330d021c344SAndy King sizeof(PKT_FIELD(vsk, peer_waiting_write_info))); 331d021c344SAndy King } 332d021c344SAndy King 333d021c344SAndy King static void vmci_transport_notify_pkt_socket_destruct(struct vsock_sock *vsk) 334d021c344SAndy King { 335d021c344SAndy King } 336d021c344SAndy King 337d021c344SAndy King static int 338d021c344SAndy King vmci_transport_notify_pkt_poll_in(struct sock *sk, 339d021c344SAndy King size_t target, bool *data_ready_now) 340d021c344SAndy King { 341d021c344SAndy King struct vsock_sock *vsk = vsock_sk(sk); 342d021c344SAndy King 343*a274f6ffSArseniy Krasnov if (vsock_stream_has_data(vsk) >= target) { 344d021c344SAndy King *data_ready_now = true; 345d021c344SAndy King } else { 346*a274f6ffSArseniy Krasnov /* We can't read right now because there is not enough data 347*a274f6ffSArseniy Krasnov * in the queue. Ask for notifications when there is something 348*a274f6ffSArseniy Krasnov * to read. 349d021c344SAndy King */ 3503b4477d2SStefan Hajnoczi if (sk->sk_state == TCP_ESTABLISHED) { 351d021c344SAndy King if (!send_waiting_read(sk, 1)) 352d021c344SAndy King return -1; 353d021c344SAndy King 354d021c344SAndy King } 355d021c344SAndy King *data_ready_now = false; 356d021c344SAndy King } 357d021c344SAndy King 358d021c344SAndy King return 0; 359d021c344SAndy King } 360d021c344SAndy King 361d021c344SAndy King static int 362d021c344SAndy King vmci_transport_notify_pkt_poll_out(struct sock *sk, 363d021c344SAndy King size_t target, bool *space_avail_now) 364d021c344SAndy King { 365d021c344SAndy King s64 produce_q_free_space; 366d021c344SAndy King struct vsock_sock *vsk = vsock_sk(sk); 367d021c344SAndy King 368d021c344SAndy King produce_q_free_space = vsock_stream_has_space(vsk); 369d021c344SAndy King if (produce_q_free_space > 0) { 370d021c344SAndy King *space_avail_now = true; 371d021c344SAndy King return 0; 372d021c344SAndy King } else if (produce_q_free_space == 0) { 373d021c344SAndy King /* This is a connected socket but we can't currently send data. 374d021c344SAndy King * Notify the peer that we are waiting if the queue is full. We 375d021c344SAndy King * only send a waiting write if the queue is full because 376d021c344SAndy King * otherwise we end up in an infinite WAITING_WRITE, READ, 377d021c344SAndy King * WAITING_WRITE, READ, etc. loop. Treat failing to send the 378d021c344SAndy King * notification as a socket error, passing that back through 379d021c344SAndy King * the mask. 380d021c344SAndy King */ 381d021c344SAndy King if (!send_waiting_write(sk, 1)) 382d021c344SAndy King return -1; 383d021c344SAndy King 384d021c344SAndy King *space_avail_now = false; 385d021c344SAndy King } 386d021c344SAndy King 387d021c344SAndy King return 0; 388d021c344SAndy King } 389d021c344SAndy King 390d021c344SAndy King static int 391d021c344SAndy King vmci_transport_notify_pkt_recv_init( 392d021c344SAndy King struct sock *sk, 393d021c344SAndy King size_t target, 394d021c344SAndy King struct vmci_transport_recv_notify_data *data) 395d021c344SAndy King { 396d021c344SAndy King struct vsock_sock *vsk = vsock_sk(sk); 397d021c344SAndy King 398d021c344SAndy King #ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY 399d021c344SAndy King data->consume_head = 0; 400d021c344SAndy King data->produce_tail = 0; 401d021c344SAndy King #ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL 402d021c344SAndy King data->notify_on_block = false; 403d021c344SAndy King 404d021c344SAndy King if (PKT_FIELD(vsk, write_notify_min_window) < target + 1) { 405d021c344SAndy King PKT_FIELD(vsk, write_notify_min_window) = target + 1; 406d021c344SAndy King if (PKT_FIELD(vsk, write_notify_window) < 407d021c344SAndy King PKT_FIELD(vsk, write_notify_min_window)) { 408d021c344SAndy King /* If the current window is smaller than the new 409d021c344SAndy King * minimal window size, we need to reevaluate whether 410d021c344SAndy King * we need to notify the sender. If the number of ready 411d021c344SAndy King * bytes are smaller than the new window, we need to 412d021c344SAndy King * send a notification to the sender before we block. 413d021c344SAndy King */ 414d021c344SAndy King 415d021c344SAndy King PKT_FIELD(vsk, write_notify_window) = 416d021c344SAndy King PKT_FIELD(vsk, write_notify_min_window); 417d021c344SAndy King data->notify_on_block = true; 418d021c344SAndy King } 419d021c344SAndy King } 420d021c344SAndy King #endif 421d021c344SAndy King #endif 422d021c344SAndy King 423d021c344SAndy King return 0; 424d021c344SAndy King } 425d021c344SAndy King 426d021c344SAndy King static int 427d021c344SAndy King vmci_transport_notify_pkt_recv_pre_block( 428d021c344SAndy King struct sock *sk, 429d021c344SAndy King size_t target, 430d021c344SAndy King struct vmci_transport_recv_notify_data *data) 431d021c344SAndy King { 432d021c344SAndy King int err = 0; 433d021c344SAndy King 434d021c344SAndy King /* Notify our peer that we are waiting for data to read. */ 435d021c344SAndy King if (!send_waiting_read(sk, target)) { 436d021c344SAndy King err = -EHOSTUNREACH; 437d021c344SAndy King return err; 438d021c344SAndy King } 439d021c344SAndy King #ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL 440d021c344SAndy King if (data->notify_on_block) { 441d021c344SAndy King err = vmci_transport_send_read_notification(sk); 442d021c344SAndy King if (err < 0) 443d021c344SAndy King return err; 444d021c344SAndy King 445d021c344SAndy King data->notify_on_block = false; 446d021c344SAndy King } 447d021c344SAndy King #endif 448d021c344SAndy King 449d021c344SAndy King return err; 450d021c344SAndy King } 451d021c344SAndy King 452d021c344SAndy King static int 453d021c344SAndy King vmci_transport_notify_pkt_recv_pre_dequeue( 454d021c344SAndy King struct sock *sk, 455d021c344SAndy King size_t target, 456d021c344SAndy King struct vmci_transport_recv_notify_data *data) 457d021c344SAndy King { 458d021c344SAndy King struct vsock_sock *vsk = vsock_sk(sk); 459d021c344SAndy King 460d021c344SAndy King /* Now consume up to len bytes from the queue. Note that since we have 461d021c344SAndy King * the socket locked we should copy at least ready bytes. 462d021c344SAndy King */ 463d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) 464d021c344SAndy King vmci_qpair_get_consume_indexes(vmci_trans(vsk)->qpair, 465d021c344SAndy King &data->produce_tail, 466d021c344SAndy King &data->consume_head); 467d021c344SAndy King #endif 468d021c344SAndy King 469d021c344SAndy King return 0; 470d021c344SAndy King } 471d021c344SAndy King 472d021c344SAndy King static int 473d021c344SAndy King vmci_transport_notify_pkt_recv_post_dequeue( 474d021c344SAndy King struct sock *sk, 475d021c344SAndy King size_t target, 476d021c344SAndy King ssize_t copied, 477d021c344SAndy King bool data_read, 478d021c344SAndy King struct vmci_transport_recv_notify_data *data) 479d021c344SAndy King { 480d021c344SAndy King struct vsock_sock *vsk; 481d021c344SAndy King int err; 482d021c344SAndy King 483d021c344SAndy King vsk = vsock_sk(sk); 484d021c344SAndy King err = 0; 485d021c344SAndy King 486d021c344SAndy King if (data_read) { 487d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) 488d021c344SAndy King /* Detect a wrap-around to maintain queue generation. Note 489d021c344SAndy King * that this is safe since we hold the socket lock across the 490d021c344SAndy King * two queue pair operations. 491d021c344SAndy King */ 492d021c344SAndy King if (copied >= 493d021c344SAndy King vmci_trans(vsk)->consume_size - data->consume_head) 494d021c344SAndy King PKT_FIELD(vsk, consume_q_generation)++; 495d021c344SAndy King #endif 496d021c344SAndy King 497d021c344SAndy King err = vmci_transport_send_read_notification(sk); 498d021c344SAndy King if (err < 0) 499d021c344SAndy King return err; 500d021c344SAndy King 501d021c344SAndy King } 502d021c344SAndy King return err; 503d021c344SAndy King } 504d021c344SAndy King 505d021c344SAndy King static int 506d021c344SAndy King vmci_transport_notify_pkt_send_init( 507d021c344SAndy King struct sock *sk, 508d021c344SAndy King struct vmci_transport_send_notify_data *data) 509d021c344SAndy King { 510d021c344SAndy King #ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY 511d021c344SAndy King data->consume_head = 0; 512d021c344SAndy King data->produce_tail = 0; 513d021c344SAndy King #endif 514d021c344SAndy King 515d021c344SAndy King return 0; 516d021c344SAndy King } 517d021c344SAndy King 518d021c344SAndy King static int 519d021c344SAndy King vmci_transport_notify_pkt_send_pre_block( 520d021c344SAndy King struct sock *sk, 521d021c344SAndy King struct vmci_transport_send_notify_data *data) 522d021c344SAndy King { 523d021c344SAndy King /* Notify our peer that we are waiting for room to write. */ 524d021c344SAndy King if (!send_waiting_write(sk, 1)) 525d021c344SAndy King return -EHOSTUNREACH; 526d021c344SAndy King 527d021c344SAndy King return 0; 528d021c344SAndy King } 529d021c344SAndy King 530d021c344SAndy King static int 531d021c344SAndy King vmci_transport_notify_pkt_send_pre_enqueue( 532d021c344SAndy King struct sock *sk, 533d021c344SAndy King struct vmci_transport_send_notify_data *data) 534d021c344SAndy King { 535d021c344SAndy King struct vsock_sock *vsk = vsock_sk(sk); 536d021c344SAndy King 537d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) 538d021c344SAndy King vmci_qpair_get_produce_indexes(vmci_trans(vsk)->qpair, 539d021c344SAndy King &data->produce_tail, 540d021c344SAndy King &data->consume_head); 541d021c344SAndy King #endif 542d021c344SAndy King 543d021c344SAndy King return 0; 544d021c344SAndy King } 545d021c344SAndy King 546d021c344SAndy King static int 547d021c344SAndy King vmci_transport_notify_pkt_send_post_enqueue( 548d021c344SAndy King struct sock *sk, 549d021c344SAndy King ssize_t written, 550d021c344SAndy King struct vmci_transport_send_notify_data *data) 551d021c344SAndy King { 552d021c344SAndy King int err = 0; 553d021c344SAndy King struct vsock_sock *vsk; 554d021c344SAndy King bool sent_wrote = false; 555d021c344SAndy King int retries = 0; 556d021c344SAndy King 557d021c344SAndy King vsk = vsock_sk(sk); 558d021c344SAndy King 559d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) 560d021c344SAndy King /* Detect a wrap-around to maintain queue generation. Note that this 561d021c344SAndy King * is safe since we hold the socket lock across the two queue pair 562d021c344SAndy King * operations. 563d021c344SAndy King */ 564d021c344SAndy King if (written >= vmci_trans(vsk)->produce_size - data->produce_tail) 565d021c344SAndy King PKT_FIELD(vsk, produce_q_generation)++; 566d021c344SAndy King 567d021c344SAndy King #endif 568d021c344SAndy King 569d021c344SAndy King if (vmci_transport_notify_waiting_read(vsk)) { 570d021c344SAndy King /* Notify the peer that we have written, retrying the send on 571d021c344SAndy King * failure up to our maximum value. See the XXX comment for the 572d021c344SAndy King * corresponding piece of code in StreamRecvmsg() for potential 573d021c344SAndy King * improvements. 574d021c344SAndy King */ 575d021c344SAndy King while (!(vsk->peer_shutdown & RCV_SHUTDOWN) && 576d021c344SAndy King !sent_wrote && 577d021c344SAndy King retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) { 578d021c344SAndy King err = vmci_transport_send_wrote(sk); 579d021c344SAndy King if (err >= 0) 580d021c344SAndy King sent_wrote = true; 581d021c344SAndy King 582d021c344SAndy King retries++; 583d021c344SAndy King } 584d021c344SAndy King 585d021c344SAndy King if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS) { 586d021c344SAndy King pr_err("%p unable to send wrote notify to peer\n", sk); 587d021c344SAndy King return err; 588d021c344SAndy King } else { 589d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) 590d021c344SAndy King PKT_FIELD(vsk, peer_waiting_read) = false; 591d021c344SAndy King #endif 592d021c344SAndy King } 593d021c344SAndy King } 594d021c344SAndy King return err; 595d021c344SAndy King } 596d021c344SAndy King 597d021c344SAndy King static void 598d021c344SAndy King vmci_transport_notify_pkt_handle_pkt( 599d021c344SAndy King struct sock *sk, 600d021c344SAndy King struct vmci_transport_packet *pkt, 601d021c344SAndy King bool bottom_half, 602d021c344SAndy King struct sockaddr_vm *dst, 603d021c344SAndy King struct sockaddr_vm *src, bool *pkt_processed) 604d021c344SAndy King { 605d021c344SAndy King bool processed = false; 606d021c344SAndy King 607d021c344SAndy King switch (pkt->type) { 608d021c344SAndy King case VMCI_TRANSPORT_PACKET_TYPE_WROTE: 609d021c344SAndy King vmci_transport_handle_wrote(sk, pkt, bottom_half, dst, src); 610d021c344SAndy King processed = true; 611d021c344SAndy King break; 612d021c344SAndy King case VMCI_TRANSPORT_PACKET_TYPE_READ: 613d021c344SAndy King vmci_transport_handle_read(sk, pkt, bottom_half, dst, src); 614d021c344SAndy King processed = true; 615d021c344SAndy King break; 616d021c344SAndy King case VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE: 617d021c344SAndy King vmci_transport_handle_waiting_write(sk, pkt, bottom_half, 618d021c344SAndy King dst, src); 619d021c344SAndy King processed = true; 620d021c344SAndy King break; 621d021c344SAndy King 622d021c344SAndy King case VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ: 623d021c344SAndy King vmci_transport_handle_waiting_read(sk, pkt, bottom_half, 624d021c344SAndy King dst, src); 625d021c344SAndy King processed = true; 626d021c344SAndy King break; 627d021c344SAndy King } 628d021c344SAndy King 629d021c344SAndy King if (pkt_processed) 630d021c344SAndy King *pkt_processed = processed; 631d021c344SAndy King } 632d021c344SAndy King 633d021c344SAndy King static void vmci_transport_notify_pkt_process_request(struct sock *sk) 634d021c344SAndy King { 635d021c344SAndy King struct vsock_sock *vsk = vsock_sk(sk); 636d021c344SAndy King 637d021c344SAndy King PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size; 638d021c344SAndy King if (vmci_trans(vsk)->consume_size < 639d021c344SAndy King PKT_FIELD(vsk, write_notify_min_window)) 640d021c344SAndy King PKT_FIELD(vsk, write_notify_min_window) = 641d021c344SAndy King vmci_trans(vsk)->consume_size; 642d021c344SAndy King } 643d021c344SAndy King 644d021c344SAndy King static void vmci_transport_notify_pkt_process_negotiate(struct sock *sk) 645d021c344SAndy King { 646d021c344SAndy King struct vsock_sock *vsk = vsock_sk(sk); 647d021c344SAndy King 648d021c344SAndy King PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size; 649d021c344SAndy King if (vmci_trans(vsk)->consume_size < 650d021c344SAndy King PKT_FIELD(vsk, write_notify_min_window)) 651d021c344SAndy King PKT_FIELD(vsk, write_notify_min_window) = 652d021c344SAndy King vmci_trans(vsk)->consume_size; 653d021c344SAndy King } 654d021c344SAndy King 655d021c344SAndy King /* Socket control packet based operations. */ 6563b22dae3SJulia Lawall const struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops = { 65799a5e178SKees Cook .socket_init = vmci_transport_notify_pkt_socket_init, 65899a5e178SKees Cook .socket_destruct = vmci_transport_notify_pkt_socket_destruct, 65999a5e178SKees Cook .poll_in = vmci_transport_notify_pkt_poll_in, 66099a5e178SKees Cook .poll_out = vmci_transport_notify_pkt_poll_out, 66199a5e178SKees Cook .handle_notify_pkt = vmci_transport_notify_pkt_handle_pkt, 66299a5e178SKees Cook .recv_init = vmci_transport_notify_pkt_recv_init, 66399a5e178SKees Cook .recv_pre_block = vmci_transport_notify_pkt_recv_pre_block, 66499a5e178SKees Cook .recv_pre_dequeue = vmci_transport_notify_pkt_recv_pre_dequeue, 66599a5e178SKees Cook .recv_post_dequeue = vmci_transport_notify_pkt_recv_post_dequeue, 66699a5e178SKees Cook .send_init = vmci_transport_notify_pkt_send_init, 66799a5e178SKees Cook .send_pre_block = vmci_transport_notify_pkt_send_pre_block, 66899a5e178SKees Cook .send_pre_enqueue = vmci_transport_notify_pkt_send_pre_enqueue, 66999a5e178SKees Cook .send_post_enqueue = vmci_transport_notify_pkt_send_post_enqueue, 67099a5e178SKees Cook .process_request = vmci_transport_notify_pkt_process_request, 67199a5e178SKees Cook .process_negotiate = vmci_transport_notify_pkt_process_negotiate, 672d021c344SAndy King }; 673