1685a6bf8SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
2d021c344SAndy King /*
3d021c344SAndy King * VMware vSockets Driver
4d021c344SAndy King *
5d021c344SAndy King * Copyright (C) 2009-2013 VMware, Inc. All rights reserved.
6d021c344SAndy King */
7d021c344SAndy King
8d021c344SAndy King #include <linux/types.h>
9d021c344SAndy King #include <linux/socket.h>
10d021c344SAndy King #include <linux/stddef.h>
11d021c344SAndy King #include <net/sock.h>
12d021c344SAndy King
13d021c344SAndy King #include "vmci_transport_notify.h"
14d021c344SAndy King
15d021c344SAndy King #define PKT_FIELD(vsk, field_name) (vmci_trans(vsk)->notify.pkt.field_name)
16d021c344SAndy King
vmci_transport_notify_waiting_write(struct vsock_sock * vsk)17d021c344SAndy King static bool vmci_transport_notify_waiting_write(struct vsock_sock *vsk)
18d021c344SAndy King {
19d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
20d021c344SAndy King bool retval;
21d021c344SAndy King u64 notify_limit;
22d021c344SAndy King
23d021c344SAndy King if (!PKT_FIELD(vsk, peer_waiting_write))
24d021c344SAndy King return false;
25d021c344SAndy King
26d021c344SAndy King #ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL
27d021c344SAndy King /* When the sender blocks, we take that as a sign that the sender is
28d021c344SAndy King * faster than the receiver. To reduce the transmit rate of the sender,
29d021c344SAndy King * we delay the sending of the read notification by decreasing the
30d021c344SAndy King * write_notify_window. The notification is delayed until the number of
31d021c344SAndy King * bytes used in the queue drops below the write_notify_window.
32d021c344SAndy King */
33d021c344SAndy King
34d021c344SAndy King if (!PKT_FIELD(vsk, peer_waiting_write_detected)) {
35d021c344SAndy King PKT_FIELD(vsk, peer_waiting_write_detected) = true;
36d021c344SAndy King if (PKT_FIELD(vsk, write_notify_window) < PAGE_SIZE) {
37d021c344SAndy King PKT_FIELD(vsk, write_notify_window) =
38d021c344SAndy King PKT_FIELD(vsk, write_notify_min_window);
39d021c344SAndy King } else {
40d021c344SAndy King PKT_FIELD(vsk, write_notify_window) -= PAGE_SIZE;
41d021c344SAndy King if (PKT_FIELD(vsk, write_notify_window) <
42d021c344SAndy King PKT_FIELD(vsk, write_notify_min_window))
43d021c344SAndy King PKT_FIELD(vsk, write_notify_window) =
44d021c344SAndy King PKT_FIELD(vsk, write_notify_min_window);
45d021c344SAndy King
46d021c344SAndy King }
47d021c344SAndy King }
48d021c344SAndy King notify_limit = vmci_trans(vsk)->consume_size -
49d021c344SAndy King PKT_FIELD(vsk, write_notify_window);
50d021c344SAndy King #else
51d021c344SAndy King notify_limit = 0;
52d021c344SAndy King #endif
53d021c344SAndy King
54d021c344SAndy King /* For now we ignore the wait information and just see if the free
55d021c344SAndy King * space exceeds the notify limit. Note that improving this function
56d021c344SAndy King * to be more intelligent will not require a protocol change and will
57d021c344SAndy King * retain compatibility between endpoints with mixed versions of this
58d021c344SAndy King * function.
59d021c344SAndy King *
60d021c344SAndy King * The notify_limit is used to delay notifications in the case where
61d021c344SAndy King * flow control is enabled. Below the test is expressed in terms of
62d021c344SAndy King * free space in the queue: if free_space > ConsumeSize -
63d021c344SAndy King * write_notify_window then notify An alternate way of expressing this
64d021c344SAndy King * is to rewrite the expression to use the data ready in the receive
65d021c344SAndy King * queue: if write_notify_window > bufferReady then notify as
66d021c344SAndy King * free_space == ConsumeSize - bufferReady.
67d021c344SAndy King */
68d021c344SAndy King retval = vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair) >
69d021c344SAndy King notify_limit;
70d021c344SAndy King #ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL
71d021c344SAndy King if (retval) {
72d021c344SAndy King /*
73d021c344SAndy King * Once we notify the peer, we reset the detected flag so the
74d021c344SAndy King * next wait will again cause a decrease in the window size.
75d021c344SAndy King */
76d021c344SAndy King
77d021c344SAndy King PKT_FIELD(vsk, peer_waiting_write_detected) = false;
78d021c344SAndy King }
79d021c344SAndy King #endif
80d021c344SAndy King return retval;
81d021c344SAndy King #else
82d021c344SAndy King return true;
83d021c344SAndy King #endif
84d021c344SAndy King }
85d021c344SAndy King
vmci_transport_notify_waiting_read(struct vsock_sock * vsk)86d021c344SAndy King static bool vmci_transport_notify_waiting_read(struct vsock_sock *vsk)
87d021c344SAndy King {
88d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
89d021c344SAndy King if (!PKT_FIELD(vsk, peer_waiting_read))
90d021c344SAndy King return false;
91d021c344SAndy King
92d021c344SAndy King /* For now we ignore the wait information and just see if there is any
93d021c344SAndy King * data for our peer to read. Note that improving this function to be
94d021c344SAndy King * more intelligent will not require a protocol change and will retain
95d021c344SAndy King * compatibility between endpoints with mixed versions of this
96d021c344SAndy King * function.
97d021c344SAndy King */
98d021c344SAndy King return vmci_qpair_produce_buf_ready(vmci_trans(vsk)->qpair) > 0;
99d021c344SAndy King #else
100d021c344SAndy King return true;
101d021c344SAndy King #endif
102d021c344SAndy King }
103d021c344SAndy King
104d021c344SAndy King static void
vmci_transport_handle_waiting_read(struct sock * sk,struct vmci_transport_packet * pkt,bool bottom_half,struct sockaddr_vm * dst,struct sockaddr_vm * src)105d021c344SAndy King vmci_transport_handle_waiting_read(struct sock *sk,
106d021c344SAndy King struct vmci_transport_packet *pkt,
107d021c344SAndy King bool bottom_half,
108d021c344SAndy King struct sockaddr_vm *dst,
109d021c344SAndy King struct sockaddr_vm *src)
110d021c344SAndy King {
111d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
112d021c344SAndy King struct vsock_sock *vsk;
113d021c344SAndy King
114d021c344SAndy King vsk = vsock_sk(sk);
115d021c344SAndy King
116d021c344SAndy King PKT_FIELD(vsk, peer_waiting_read) = true;
117d021c344SAndy King memcpy(&PKT_FIELD(vsk, peer_waiting_read_info), &pkt->u.wait,
118d021c344SAndy King sizeof(PKT_FIELD(vsk, peer_waiting_read_info)));
119d021c344SAndy King
120d021c344SAndy King if (vmci_transport_notify_waiting_read(vsk)) {
121d021c344SAndy King bool sent;
122d021c344SAndy King
123d021c344SAndy King if (bottom_half)
124d021c344SAndy King sent = vmci_transport_send_wrote_bh(dst, src) > 0;
125d021c344SAndy King else
126d021c344SAndy King sent = vmci_transport_send_wrote(sk) > 0;
127d021c344SAndy King
128d021c344SAndy King if (sent)
129d021c344SAndy King PKT_FIELD(vsk, peer_waiting_read) = false;
130d021c344SAndy King }
131d021c344SAndy King #endif
132d021c344SAndy King }
133d021c344SAndy King
134d021c344SAndy King static void
vmci_transport_handle_waiting_write(struct sock * sk,struct vmci_transport_packet * pkt,bool bottom_half,struct sockaddr_vm * dst,struct sockaddr_vm * src)135d021c344SAndy King vmci_transport_handle_waiting_write(struct sock *sk,
136d021c344SAndy King struct vmci_transport_packet *pkt,
137d021c344SAndy King bool bottom_half,
138d021c344SAndy King struct sockaddr_vm *dst,
139d021c344SAndy King struct sockaddr_vm *src)
140d021c344SAndy King {
141d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
142d021c344SAndy King struct vsock_sock *vsk;
143d021c344SAndy King
144d021c344SAndy King vsk = vsock_sk(sk);
145d021c344SAndy King
146d021c344SAndy King PKT_FIELD(vsk, peer_waiting_write) = true;
147d021c344SAndy King memcpy(&PKT_FIELD(vsk, peer_waiting_write_info), &pkt->u.wait,
148d021c344SAndy King sizeof(PKT_FIELD(vsk, peer_waiting_write_info)));
149d021c344SAndy King
150d021c344SAndy King if (vmci_transport_notify_waiting_write(vsk)) {
151d021c344SAndy King bool sent;
152d021c344SAndy King
153d021c344SAndy King if (bottom_half)
154d021c344SAndy King sent = vmci_transport_send_read_bh(dst, src) > 0;
155d021c344SAndy King else
156d021c344SAndy King sent = vmci_transport_send_read(sk) > 0;
157d021c344SAndy King
158d021c344SAndy King if (sent)
159d021c344SAndy King PKT_FIELD(vsk, peer_waiting_write) = false;
160d021c344SAndy King }
161d021c344SAndy King #endif
162d021c344SAndy King }
163d021c344SAndy King
164d021c344SAndy King static void
vmci_transport_handle_read(struct sock * sk,struct vmci_transport_packet * pkt,bool bottom_half,struct sockaddr_vm * dst,struct sockaddr_vm * src)165d021c344SAndy King vmci_transport_handle_read(struct sock *sk,
166d021c344SAndy King struct vmci_transport_packet *pkt,
167d021c344SAndy King bool bottom_half,
168d021c344SAndy King struct sockaddr_vm *dst, struct sockaddr_vm *src)
169d021c344SAndy King {
170d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
171d021c344SAndy King struct vsock_sock *vsk;
172d021c344SAndy King
173d021c344SAndy King vsk = vsock_sk(sk);
174d021c344SAndy King PKT_FIELD(vsk, sent_waiting_write) = false;
175d021c344SAndy King #endif
176d021c344SAndy King
177d021c344SAndy King sk->sk_write_space(sk);
178d021c344SAndy King }
179d021c344SAndy King
send_waiting_read(struct sock * sk,u64 room_needed)180d021c344SAndy King static bool send_waiting_read(struct sock *sk, u64 room_needed)
181d021c344SAndy King {
182d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
183d021c344SAndy King struct vsock_sock *vsk;
184d021c344SAndy King struct vmci_transport_waiting_info waiting_info;
185d021c344SAndy King u64 tail;
186d021c344SAndy King u64 head;
187d021c344SAndy King u64 room_left;
188d021c344SAndy King bool ret;
189d021c344SAndy King
190d021c344SAndy King vsk = vsock_sk(sk);
191d021c344SAndy King
192d021c344SAndy King if (PKT_FIELD(vsk, sent_waiting_read))
193d021c344SAndy King return true;
194d021c344SAndy King
195d021c344SAndy King if (PKT_FIELD(vsk, write_notify_window) <
196d021c344SAndy King vmci_trans(vsk)->consume_size)
197d021c344SAndy King PKT_FIELD(vsk, write_notify_window) =
198d021c344SAndy King min(PKT_FIELD(vsk, write_notify_window) + PAGE_SIZE,
199d021c344SAndy King vmci_trans(vsk)->consume_size);
200d021c344SAndy King
201d021c344SAndy King vmci_qpair_get_consume_indexes(vmci_trans(vsk)->qpair, &tail, &head);
202d021c344SAndy King room_left = vmci_trans(vsk)->consume_size - head;
203d021c344SAndy King if (room_needed >= room_left) {
204d021c344SAndy King waiting_info.offset = room_needed - room_left;
205d021c344SAndy King waiting_info.generation =
206d021c344SAndy King PKT_FIELD(vsk, consume_q_generation) + 1;
207d021c344SAndy King } else {
208d021c344SAndy King waiting_info.offset = head + room_needed;
209d021c344SAndy King waiting_info.generation = PKT_FIELD(vsk, consume_q_generation);
210d021c344SAndy King }
211d021c344SAndy King
212d021c344SAndy King ret = vmci_transport_send_waiting_read(sk, &waiting_info) > 0;
213d021c344SAndy King if (ret)
214d021c344SAndy King PKT_FIELD(vsk, sent_waiting_read) = true;
215d021c344SAndy King
216d021c344SAndy King return ret;
217d021c344SAndy King #else
218d021c344SAndy King return true;
219d021c344SAndy King #endif
220d021c344SAndy King }
221d021c344SAndy King
send_waiting_write(struct sock * sk,u64 room_needed)222d021c344SAndy King static bool send_waiting_write(struct sock *sk, u64 room_needed)
223d021c344SAndy King {
224d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
225d021c344SAndy King struct vsock_sock *vsk;
226d021c344SAndy King struct vmci_transport_waiting_info waiting_info;
227d021c344SAndy King u64 tail;
228d021c344SAndy King u64 head;
229d021c344SAndy King u64 room_left;
230d021c344SAndy King bool ret;
231d021c344SAndy King
232d021c344SAndy King vsk = vsock_sk(sk);
233d021c344SAndy King
234d021c344SAndy King if (PKT_FIELD(vsk, sent_waiting_write))
235d021c344SAndy King return true;
236d021c344SAndy King
237d021c344SAndy King vmci_qpair_get_produce_indexes(vmci_trans(vsk)->qpair, &tail, &head);
238d021c344SAndy King room_left = vmci_trans(vsk)->produce_size - tail;
239d021c344SAndy King if (room_needed + 1 >= room_left) {
240d021c344SAndy King /* Wraps around to current generation. */
241d021c344SAndy King waiting_info.offset = room_needed + 1 - room_left;
242d021c344SAndy King waiting_info.generation = PKT_FIELD(vsk, produce_q_generation);
243d021c344SAndy King } else {
244d021c344SAndy King waiting_info.offset = tail + room_needed + 1;
245d021c344SAndy King waiting_info.generation =
246d021c344SAndy King PKT_FIELD(vsk, produce_q_generation) - 1;
247d021c344SAndy King }
248d021c344SAndy King
249d021c344SAndy King ret = vmci_transport_send_waiting_write(sk, &waiting_info) > 0;
250d021c344SAndy King if (ret)
251d021c344SAndy King PKT_FIELD(vsk, sent_waiting_write) = true;
252d021c344SAndy King
253d021c344SAndy King return ret;
254d021c344SAndy King #else
255d021c344SAndy King return true;
256d021c344SAndy King #endif
257d021c344SAndy King }
258d021c344SAndy King
vmci_transport_send_read_notification(struct sock * sk)259d021c344SAndy King static int vmci_transport_send_read_notification(struct sock *sk)
260d021c344SAndy King {
261d021c344SAndy King struct vsock_sock *vsk;
262d021c344SAndy King bool sent_read;
263d021c344SAndy King unsigned int retries;
264d021c344SAndy King int err;
265d021c344SAndy King
266d021c344SAndy King vsk = vsock_sk(sk);
267d021c344SAndy King sent_read = false;
268d021c344SAndy King retries = 0;
269d021c344SAndy King err = 0;
270d021c344SAndy King
271d021c344SAndy King if (vmci_transport_notify_waiting_write(vsk)) {
272d021c344SAndy King /* Notify the peer that we have read, retrying the send on
273d021c344SAndy King * failure up to our maximum value. XXX For now we just log
274d021c344SAndy King * the failure, but later we should schedule a work item to
275d021c344SAndy King * handle the resend until it succeeds. That would require
276d021c344SAndy King * keeping track of work items in the vsk and cleaning them up
277d021c344SAndy King * upon socket close.
278d021c344SAndy King */
279d021c344SAndy King while (!(vsk->peer_shutdown & RCV_SHUTDOWN) &&
280d021c344SAndy King !sent_read &&
281d021c344SAndy King retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) {
282d021c344SAndy King err = vmci_transport_send_read(sk);
283d021c344SAndy King if (err >= 0)
284d021c344SAndy King sent_read = true;
285d021c344SAndy King
286d021c344SAndy King retries++;
287d021c344SAndy King }
288d021c344SAndy King
289d021c344SAndy King if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS)
290d021c344SAndy King pr_err("%p unable to send read notify to peer\n", sk);
291d021c344SAndy King else
292d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
293d021c344SAndy King PKT_FIELD(vsk, peer_waiting_write) = false;
294d021c344SAndy King #endif
295d021c344SAndy King
296d021c344SAndy King }
297d021c344SAndy King return err;
298d021c344SAndy King }
299d021c344SAndy King
300d021c344SAndy King static void
vmci_transport_handle_wrote(struct sock * sk,struct vmci_transport_packet * pkt,bool bottom_half,struct sockaddr_vm * dst,struct sockaddr_vm * src)301d021c344SAndy King vmci_transport_handle_wrote(struct sock *sk,
302d021c344SAndy King struct vmci_transport_packet *pkt,
303d021c344SAndy King bool bottom_half,
304d021c344SAndy King struct sockaddr_vm *dst, struct sockaddr_vm *src)
305d021c344SAndy King {
306d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
307d021c344SAndy King struct vsock_sock *vsk = vsock_sk(sk);
308d021c344SAndy King PKT_FIELD(vsk, sent_waiting_read) = false;
309d021c344SAndy King #endif
310*e061aed9SArseniy Krasnov vsock_data_ready(sk);
311d021c344SAndy King }
312d021c344SAndy King
vmci_transport_notify_pkt_socket_init(struct sock * sk)313d021c344SAndy King static void vmci_transport_notify_pkt_socket_init(struct sock *sk)
314d021c344SAndy King {
315d021c344SAndy King struct vsock_sock *vsk = vsock_sk(sk);
316d021c344SAndy King
317d021c344SAndy King PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE;
318d021c344SAndy King PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE;
319d021c344SAndy King PKT_FIELD(vsk, peer_waiting_read) = false;
320d021c344SAndy King PKT_FIELD(vsk, peer_waiting_write) = false;
321d021c344SAndy King PKT_FIELD(vsk, peer_waiting_write_detected) = false;
322d021c344SAndy King PKT_FIELD(vsk, sent_waiting_read) = false;
323d021c344SAndy King PKT_FIELD(vsk, sent_waiting_write) = false;
324d021c344SAndy King PKT_FIELD(vsk, produce_q_generation) = 0;
325d021c344SAndy King PKT_FIELD(vsk, consume_q_generation) = 0;
326d021c344SAndy King
327d021c344SAndy King memset(&PKT_FIELD(vsk, peer_waiting_read_info), 0,
328d021c344SAndy King sizeof(PKT_FIELD(vsk, peer_waiting_read_info)));
329d021c344SAndy King memset(&PKT_FIELD(vsk, peer_waiting_write_info), 0,
330d021c344SAndy King sizeof(PKT_FIELD(vsk, peer_waiting_write_info)));
331d021c344SAndy King }
332d021c344SAndy King
vmci_transport_notify_pkt_socket_destruct(struct vsock_sock * vsk)333d021c344SAndy King static void vmci_transport_notify_pkt_socket_destruct(struct vsock_sock *vsk)
334d021c344SAndy King {
335d021c344SAndy King }
336d021c344SAndy King
337d021c344SAndy King static int
vmci_transport_notify_pkt_poll_in(struct sock * sk,size_t target,bool * data_ready_now)338d021c344SAndy King vmci_transport_notify_pkt_poll_in(struct sock *sk,
339d021c344SAndy King size_t target, bool *data_ready_now)
340d021c344SAndy King {
341d021c344SAndy King struct vsock_sock *vsk = vsock_sk(sk);
342d021c344SAndy King
343a274f6ffSArseniy Krasnov if (vsock_stream_has_data(vsk) >= target) {
344d021c344SAndy King *data_ready_now = true;
345d021c344SAndy King } else {
346a274f6ffSArseniy Krasnov /* We can't read right now because there is not enough data
347a274f6ffSArseniy Krasnov * in the queue. Ask for notifications when there is something
348a274f6ffSArseniy Krasnov * to read.
349d021c344SAndy King */
3503b4477d2SStefan Hajnoczi if (sk->sk_state == TCP_ESTABLISHED) {
351d021c344SAndy King if (!send_waiting_read(sk, 1))
352d021c344SAndy King return -1;
353d021c344SAndy King
354d021c344SAndy King }
355d021c344SAndy King *data_ready_now = false;
356d021c344SAndy King }
357d021c344SAndy King
358d021c344SAndy King return 0;
359d021c344SAndy King }
360d021c344SAndy King
361d021c344SAndy King static int
vmci_transport_notify_pkt_poll_out(struct sock * sk,size_t target,bool * space_avail_now)362d021c344SAndy King vmci_transport_notify_pkt_poll_out(struct sock *sk,
363d021c344SAndy King size_t target, bool *space_avail_now)
364d021c344SAndy King {
365d021c344SAndy King s64 produce_q_free_space;
366d021c344SAndy King struct vsock_sock *vsk = vsock_sk(sk);
367d021c344SAndy King
368d021c344SAndy King produce_q_free_space = vsock_stream_has_space(vsk);
369d021c344SAndy King if (produce_q_free_space > 0) {
370d021c344SAndy King *space_avail_now = true;
371d021c344SAndy King return 0;
372d021c344SAndy King } else if (produce_q_free_space == 0) {
373d021c344SAndy King /* This is a connected socket but we can't currently send data.
374d021c344SAndy King * Notify the peer that we are waiting if the queue is full. We
375d021c344SAndy King * only send a waiting write if the queue is full because
376d021c344SAndy King * otherwise we end up in an infinite WAITING_WRITE, READ,
377d021c344SAndy King * WAITING_WRITE, READ, etc. loop. Treat failing to send the
378d021c344SAndy King * notification as a socket error, passing that back through
379d021c344SAndy King * the mask.
380d021c344SAndy King */
381d021c344SAndy King if (!send_waiting_write(sk, 1))
382d021c344SAndy King return -1;
383d021c344SAndy King
384d021c344SAndy King *space_avail_now = false;
385d021c344SAndy King }
386d021c344SAndy King
387d021c344SAndy King return 0;
388d021c344SAndy King }
389d021c344SAndy King
390d021c344SAndy King static int
vmci_transport_notify_pkt_recv_init(struct sock * sk,size_t target,struct vmci_transport_recv_notify_data * data)391d021c344SAndy King vmci_transport_notify_pkt_recv_init(
392d021c344SAndy King struct sock *sk,
393d021c344SAndy King size_t target,
394d021c344SAndy King struct vmci_transport_recv_notify_data *data)
395d021c344SAndy King {
396d021c344SAndy King struct vsock_sock *vsk = vsock_sk(sk);
397d021c344SAndy King
398d021c344SAndy King #ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY
399d021c344SAndy King data->consume_head = 0;
400d021c344SAndy King data->produce_tail = 0;
401d021c344SAndy King #ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL
402d021c344SAndy King data->notify_on_block = false;
403d021c344SAndy King
404d021c344SAndy King if (PKT_FIELD(vsk, write_notify_min_window) < target + 1) {
405d021c344SAndy King PKT_FIELD(vsk, write_notify_min_window) = target + 1;
406d021c344SAndy King if (PKT_FIELD(vsk, write_notify_window) <
407d021c344SAndy King PKT_FIELD(vsk, write_notify_min_window)) {
408d021c344SAndy King /* If the current window is smaller than the new
409d021c344SAndy King * minimal window size, we need to reevaluate whether
410d021c344SAndy King * we need to notify the sender. If the number of ready
411d021c344SAndy King * bytes are smaller than the new window, we need to
412d021c344SAndy King * send a notification to the sender before we block.
413d021c344SAndy King */
414d021c344SAndy King
415d021c344SAndy King PKT_FIELD(vsk, write_notify_window) =
416d021c344SAndy King PKT_FIELD(vsk, write_notify_min_window);
417d021c344SAndy King data->notify_on_block = true;
418d021c344SAndy King }
419d021c344SAndy King }
420d021c344SAndy King #endif
421d021c344SAndy King #endif
422d021c344SAndy King
423d021c344SAndy King return 0;
424d021c344SAndy King }
425d021c344SAndy King
426d021c344SAndy King static int
vmci_transport_notify_pkt_recv_pre_block(struct sock * sk,size_t target,struct vmci_transport_recv_notify_data * data)427d021c344SAndy King vmci_transport_notify_pkt_recv_pre_block(
428d021c344SAndy King struct sock *sk,
429d021c344SAndy King size_t target,
430d021c344SAndy King struct vmci_transport_recv_notify_data *data)
431d021c344SAndy King {
432d021c344SAndy King int err = 0;
433d021c344SAndy King
434d021c344SAndy King /* Notify our peer that we are waiting for data to read. */
435d021c344SAndy King if (!send_waiting_read(sk, target)) {
436d021c344SAndy King err = -EHOSTUNREACH;
437d021c344SAndy King return err;
438d021c344SAndy King }
439d021c344SAndy King #ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL
440d021c344SAndy King if (data->notify_on_block) {
441d021c344SAndy King err = vmci_transport_send_read_notification(sk);
442d021c344SAndy King if (err < 0)
443d021c344SAndy King return err;
444d021c344SAndy King
445d021c344SAndy King data->notify_on_block = false;
446d021c344SAndy King }
447d021c344SAndy King #endif
448d021c344SAndy King
449d021c344SAndy King return err;
450d021c344SAndy King }
451d021c344SAndy King
452d021c344SAndy King static int
vmci_transport_notify_pkt_recv_pre_dequeue(struct sock * sk,size_t target,struct vmci_transport_recv_notify_data * data)453d021c344SAndy King vmci_transport_notify_pkt_recv_pre_dequeue(
454d021c344SAndy King struct sock *sk,
455d021c344SAndy King size_t target,
456d021c344SAndy King struct vmci_transport_recv_notify_data *data)
457d021c344SAndy King {
458d021c344SAndy King struct vsock_sock *vsk = vsock_sk(sk);
459d021c344SAndy King
460d021c344SAndy King /* Now consume up to len bytes from the queue. Note that since we have
461d021c344SAndy King * the socket locked we should copy at least ready bytes.
462d021c344SAndy King */
463d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
464d021c344SAndy King vmci_qpair_get_consume_indexes(vmci_trans(vsk)->qpair,
465d021c344SAndy King &data->produce_tail,
466d021c344SAndy King &data->consume_head);
467d021c344SAndy King #endif
468d021c344SAndy King
469d021c344SAndy King return 0;
470d021c344SAndy King }
471d021c344SAndy King
472d021c344SAndy King static int
vmci_transport_notify_pkt_recv_post_dequeue(struct sock * sk,size_t target,ssize_t copied,bool data_read,struct vmci_transport_recv_notify_data * data)473d021c344SAndy King vmci_transport_notify_pkt_recv_post_dequeue(
474d021c344SAndy King struct sock *sk,
475d021c344SAndy King size_t target,
476d021c344SAndy King ssize_t copied,
477d021c344SAndy King bool data_read,
478d021c344SAndy King struct vmci_transport_recv_notify_data *data)
479d021c344SAndy King {
480d021c344SAndy King struct vsock_sock *vsk;
481d021c344SAndy King int err;
482d021c344SAndy King
483d021c344SAndy King vsk = vsock_sk(sk);
484d021c344SAndy King err = 0;
485d021c344SAndy King
486d021c344SAndy King if (data_read) {
487d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
488d021c344SAndy King /* Detect a wrap-around to maintain queue generation. Note
489d021c344SAndy King * that this is safe since we hold the socket lock across the
490d021c344SAndy King * two queue pair operations.
491d021c344SAndy King */
492d021c344SAndy King if (copied >=
493d021c344SAndy King vmci_trans(vsk)->consume_size - data->consume_head)
494d021c344SAndy King PKT_FIELD(vsk, consume_q_generation)++;
495d021c344SAndy King #endif
496d021c344SAndy King
497d021c344SAndy King err = vmci_transport_send_read_notification(sk);
498d021c344SAndy King if (err < 0)
499d021c344SAndy King return err;
500d021c344SAndy King
501d021c344SAndy King }
502d021c344SAndy King return err;
503d021c344SAndy King }
504d021c344SAndy King
505d021c344SAndy King static int
vmci_transport_notify_pkt_send_init(struct sock * sk,struct vmci_transport_send_notify_data * data)506d021c344SAndy King vmci_transport_notify_pkt_send_init(
507d021c344SAndy King struct sock *sk,
508d021c344SAndy King struct vmci_transport_send_notify_data *data)
509d021c344SAndy King {
510d021c344SAndy King #ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY
511d021c344SAndy King data->consume_head = 0;
512d021c344SAndy King data->produce_tail = 0;
513d021c344SAndy King #endif
514d021c344SAndy King
515d021c344SAndy King return 0;
516d021c344SAndy King }
517d021c344SAndy King
518d021c344SAndy King static int
vmci_transport_notify_pkt_send_pre_block(struct sock * sk,struct vmci_transport_send_notify_data * data)519d021c344SAndy King vmci_transport_notify_pkt_send_pre_block(
520d021c344SAndy King struct sock *sk,
521d021c344SAndy King struct vmci_transport_send_notify_data *data)
522d021c344SAndy King {
523d021c344SAndy King /* Notify our peer that we are waiting for room to write. */
524d021c344SAndy King if (!send_waiting_write(sk, 1))
525d021c344SAndy King return -EHOSTUNREACH;
526d021c344SAndy King
527d021c344SAndy King return 0;
528d021c344SAndy King }
529d021c344SAndy King
530d021c344SAndy King static int
vmci_transport_notify_pkt_send_pre_enqueue(struct sock * sk,struct vmci_transport_send_notify_data * data)531d021c344SAndy King vmci_transport_notify_pkt_send_pre_enqueue(
532d021c344SAndy King struct sock *sk,
533d021c344SAndy King struct vmci_transport_send_notify_data *data)
534d021c344SAndy King {
535d021c344SAndy King struct vsock_sock *vsk = vsock_sk(sk);
536d021c344SAndy King
537d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
538d021c344SAndy King vmci_qpair_get_produce_indexes(vmci_trans(vsk)->qpair,
539d021c344SAndy King &data->produce_tail,
540d021c344SAndy King &data->consume_head);
541d021c344SAndy King #endif
542d021c344SAndy King
543d021c344SAndy King return 0;
544d021c344SAndy King }
545d021c344SAndy King
546d021c344SAndy King static int
vmci_transport_notify_pkt_send_post_enqueue(struct sock * sk,ssize_t written,struct vmci_transport_send_notify_data * data)547d021c344SAndy King vmci_transport_notify_pkt_send_post_enqueue(
548d021c344SAndy King struct sock *sk,
549d021c344SAndy King ssize_t written,
550d021c344SAndy King struct vmci_transport_send_notify_data *data)
551d021c344SAndy King {
552d021c344SAndy King int err = 0;
553d021c344SAndy King struct vsock_sock *vsk;
554d021c344SAndy King bool sent_wrote = false;
555d021c344SAndy King int retries = 0;
556d021c344SAndy King
557d021c344SAndy King vsk = vsock_sk(sk);
558d021c344SAndy King
559d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
560d021c344SAndy King /* Detect a wrap-around to maintain queue generation. Note that this
561d021c344SAndy King * is safe since we hold the socket lock across the two queue pair
562d021c344SAndy King * operations.
563d021c344SAndy King */
564d021c344SAndy King if (written >= vmci_trans(vsk)->produce_size - data->produce_tail)
565d021c344SAndy King PKT_FIELD(vsk, produce_q_generation)++;
566d021c344SAndy King
567d021c344SAndy King #endif
568d021c344SAndy King
569d021c344SAndy King if (vmci_transport_notify_waiting_read(vsk)) {
570d021c344SAndy King /* Notify the peer that we have written, retrying the send on
571d021c344SAndy King * failure up to our maximum value. See the XXX comment for the
572d021c344SAndy King * corresponding piece of code in StreamRecvmsg() for potential
573d021c344SAndy King * improvements.
574d021c344SAndy King */
575d021c344SAndy King while (!(vsk->peer_shutdown & RCV_SHUTDOWN) &&
576d021c344SAndy King !sent_wrote &&
577d021c344SAndy King retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) {
578d021c344SAndy King err = vmci_transport_send_wrote(sk);
579d021c344SAndy King if (err >= 0)
580d021c344SAndy King sent_wrote = true;
581d021c344SAndy King
582d021c344SAndy King retries++;
583d021c344SAndy King }
584d021c344SAndy King
585d021c344SAndy King if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS) {
586d021c344SAndy King pr_err("%p unable to send wrote notify to peer\n", sk);
587d021c344SAndy King return err;
588d021c344SAndy King } else {
589d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
590d021c344SAndy King PKT_FIELD(vsk, peer_waiting_read) = false;
591d021c344SAndy King #endif
592d021c344SAndy King }
593d021c344SAndy King }
594d021c344SAndy King return err;
595d021c344SAndy King }
596d021c344SAndy King
597d021c344SAndy King static void
vmci_transport_notify_pkt_handle_pkt(struct sock * sk,struct vmci_transport_packet * pkt,bool bottom_half,struct sockaddr_vm * dst,struct sockaddr_vm * src,bool * pkt_processed)598d021c344SAndy King vmci_transport_notify_pkt_handle_pkt(
599d021c344SAndy King struct sock *sk,
600d021c344SAndy King struct vmci_transport_packet *pkt,
601d021c344SAndy King bool bottom_half,
602d021c344SAndy King struct sockaddr_vm *dst,
603d021c344SAndy King struct sockaddr_vm *src, bool *pkt_processed)
604d021c344SAndy King {
605d021c344SAndy King bool processed = false;
606d021c344SAndy King
607d021c344SAndy King switch (pkt->type) {
608d021c344SAndy King case VMCI_TRANSPORT_PACKET_TYPE_WROTE:
609d021c344SAndy King vmci_transport_handle_wrote(sk, pkt, bottom_half, dst, src);
610d021c344SAndy King processed = true;
611d021c344SAndy King break;
612d021c344SAndy King case VMCI_TRANSPORT_PACKET_TYPE_READ:
613d021c344SAndy King vmci_transport_handle_read(sk, pkt, bottom_half, dst, src);
614d021c344SAndy King processed = true;
615d021c344SAndy King break;
616d021c344SAndy King case VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE:
617d021c344SAndy King vmci_transport_handle_waiting_write(sk, pkt, bottom_half,
618d021c344SAndy King dst, src);
619d021c344SAndy King processed = true;
620d021c344SAndy King break;
621d021c344SAndy King
622d021c344SAndy King case VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ:
623d021c344SAndy King vmci_transport_handle_waiting_read(sk, pkt, bottom_half,
624d021c344SAndy King dst, src);
625d021c344SAndy King processed = true;
626d021c344SAndy King break;
627d021c344SAndy King }
628d021c344SAndy King
629d021c344SAndy King if (pkt_processed)
630d021c344SAndy King *pkt_processed = processed;
631d021c344SAndy King }
632d021c344SAndy King
vmci_transport_notify_pkt_process_request(struct sock * sk)633d021c344SAndy King static void vmci_transport_notify_pkt_process_request(struct sock *sk)
634d021c344SAndy King {
635d021c344SAndy King struct vsock_sock *vsk = vsock_sk(sk);
636d021c344SAndy King
637d021c344SAndy King PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size;
638d021c344SAndy King if (vmci_trans(vsk)->consume_size <
639d021c344SAndy King PKT_FIELD(vsk, write_notify_min_window))
640d021c344SAndy King PKT_FIELD(vsk, write_notify_min_window) =
641d021c344SAndy King vmci_trans(vsk)->consume_size;
642d021c344SAndy King }
643d021c344SAndy King
vmci_transport_notify_pkt_process_negotiate(struct sock * sk)644d021c344SAndy King static void vmci_transport_notify_pkt_process_negotiate(struct sock *sk)
645d021c344SAndy King {
646d021c344SAndy King struct vsock_sock *vsk = vsock_sk(sk);
647d021c344SAndy King
648d021c344SAndy King PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size;
649d021c344SAndy King if (vmci_trans(vsk)->consume_size <
650d021c344SAndy King PKT_FIELD(vsk, write_notify_min_window))
651d021c344SAndy King PKT_FIELD(vsk, write_notify_min_window) =
652d021c344SAndy King vmci_trans(vsk)->consume_size;
653d021c344SAndy King }
654d021c344SAndy King
655d021c344SAndy King /* Socket control packet based operations. */
6563b22dae3SJulia Lawall const struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops = {
65799a5e178SKees Cook .socket_init = vmci_transport_notify_pkt_socket_init,
65899a5e178SKees Cook .socket_destruct = vmci_transport_notify_pkt_socket_destruct,
65999a5e178SKees Cook .poll_in = vmci_transport_notify_pkt_poll_in,
66099a5e178SKees Cook .poll_out = vmci_transport_notify_pkt_poll_out,
66199a5e178SKees Cook .handle_notify_pkt = vmci_transport_notify_pkt_handle_pkt,
66299a5e178SKees Cook .recv_init = vmci_transport_notify_pkt_recv_init,
66399a5e178SKees Cook .recv_pre_block = vmci_transport_notify_pkt_recv_pre_block,
66499a5e178SKees Cook .recv_pre_dequeue = vmci_transport_notify_pkt_recv_pre_dequeue,
66599a5e178SKees Cook .recv_post_dequeue = vmci_transport_notify_pkt_recv_post_dequeue,
66699a5e178SKees Cook .send_init = vmci_transport_notify_pkt_send_init,
66799a5e178SKees Cook .send_pre_block = vmci_transport_notify_pkt_send_pre_block,
66899a5e178SKees Cook .send_pre_enqueue = vmci_transport_notify_pkt_send_pre_enqueue,
66999a5e178SKees Cook .send_post_enqueue = vmci_transport_notify_pkt_send_post_enqueue,
67099a5e178SKees Cook .process_request = vmci_transport_notify_pkt_process_request,
67199a5e178SKees Cook .process_negotiate = vmci_transport_notify_pkt_process_negotiate,
672d021c344SAndy King };
673